I have solr cloud cluster and it is throwing the following exception, it is happening when solr does something with the replicas
null:org.apache.solr.common.SolrException: org.apache.solr.client.solrj.SolrServerException: IOException occured when talking to server at: http://10.14.0.72:8080/solr/IE_big_index_shard1_0_replica2
at org.apache.solr.handler.component.SearchHandler.handleRequestBody(SearchHandler.java:407)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:155)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2033)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:652)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:460)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:229)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:184)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1668)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:581)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1160)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:511)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1092)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213)
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134)
at org.eclipse.jetty.server.Server.handle(Server.java:518)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:308)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:244)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)
at org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93)
at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceAndRun(ExecuteProduceConsume.java:246)
at org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:156)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:654)
at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:572)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.solr.client.solrj.SolrServerException: IOException occured when talking to server at: http://10.14.0.72:8080/solr/IE_big_index_shard1_0_replica2
at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:591)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:241)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:230)
at org.apache.solr.client.solrj.SolrClient.request(SolrClient.java:1219)
at org.apache.solr.handler.component.HttpShardHandler$1.call(HttpShardHandler.java:198)
at org.apache.solr.handler.component.HttpShardHandler$1.call(HttpShardHandler.java:163)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at org.apache.solr.common.util.ExecutorUtil$MDCAwareThreadPoolExecutor.lambda$execute$0(ExecutorUtil.java:229)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
... 1 more
Caused by: org.apache.http.conn.ConnectionPoolTimeoutException: Timeout waiting for connection from pool
at org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:226)
at org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:195)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:423)
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:882)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:107)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:482)
... 12 more
Related
I am trying to read from hdfs location and save into an AWS s3 Bucket as follows :
spark.sql("select * from sometbl")
.coalesce(1)
.write.mode(SaveMode.Overwrite)
.format("csv")
.save(s"s3a://mybucket/foldername")
But i get this Exception :
Caused by: org.apache.spark.SparkException: Task failed while writing rows.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:257)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:170)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:169)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.amazonaws.SdkClientException: Unable to execute HTTP request: null
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleRetryableException(AmazonHttpClient.java:1113)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1063)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:743)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:717)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4229)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4176)
at com.amazonaws.services.s3.AmazonS3Client.copyObject(AmazonS3Client.java:1852)
at com.amazonaws.services.s3.transfer.internal.CopyCallable.copyInOneChunk(CopyCallable.java:146)
at com.amazonaws.services.s3.transfer.internal.CopyCallable.call(CopyCallable.java:134)
at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:133)
at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:44)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
... 3 more
Caused by: com.amazonaws.thirdparty.apache.http.client.ClientProtocolException
at com.amazonaws.thirdparty.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:186)
at com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1235)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1055)
... 17 more
Caused by: com.amazonaws.thirdparty.apache.http.ProtocolException: Content-Length header already present
at com.amazonaws.thirdparty.apache.http.protocol.RequestContent.process(RequestContent.java:96)
at com.amazonaws.thirdparty.apache.http.protocol.ImmutableHttpProcessor.process(ImmutableHttpProcessor.java:132)
at com.amazonaws.thirdparty.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:182)
at com.amazonaws.thirdparty.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
... 22 more
Is there a bug in s3 client ? these are versions used
"com.amazonaws" % "aws-java-sdk" % "1.7.4",
"org.apache.hadoop" % "hadoop-aws" % "2.7.3"
Using IntegrationStudio I have created an ESB project.Then I created a proxy service and added a dblookup mediator. I have configured it to use postgresql DB. I configured as mentioned below.
connection_type as DB_CONNECTION
In database configuration window, I choose connection type as postgresql.
I chose "get from server" radio button and selected "42.2.5" from the combo list and entered the connection parameters.
Connection DB Driver: com.postgres.jdbc.Driver (tried with org.postgresql.Driver)
jdbc url connection: jdbc:postgresql://localhost:5432/EDH_DATABASE
connection username: postgres
password: entered
The test connection works fine. But when running the proxy through IntegrationStudio it gives the following error.
NOTE: I COPIED postgresql-42.2.5.jar to IntegrationStudio.app/Contents/Eclipse/runtime/microesb/lib
[2020-01-08 17:39:25,612] ERROR {org.apache.synapse.mediators.db.DBLookupMediator} - SQL Exception occurred while executing statement : select * from teacher; against DataSource : jdbc:postgresql://localhost:5432/EDH_DATABASE org.apache.commons.dbcp.SQLNestedException: Cannot load JDBC driver class 'com.postgres.jdbc.Driver'
at org.apache.commons.dbcp.BasicDataSource.createConnectionFactory(BasicDataSource.java:1429)
at org.apache.commons.dbcp.BasicDataSource.createDataSource(BasicDataSource.java:1371)
at org.apache.commons.dbcp.BasicDataSource.getConnection(BasicDataSource.java:1044)
at org.apache.synapse.mediators.db.DBLookupMediator.processStatement(DBLookupMediator.java:58)
at org.apache.synapse.mediators.db.AbstractDBMediator.mediate(AbstractDBMediator.java:243)
at org.apache.synapse.mediators.AbstractListMediator.mediate(AbstractListMediator.java:109)
at org.apache.synapse.mediators.AbstractListMediator.mediate(AbstractListMediator.java:71)
at org.apache.synapse.mediators.base.SequenceMediator.mediate(SequenceMediator.java:158)
at org.apache.synapse.core.axis2.ProxyServiceMessageReceiver.receive(ProxyServiceMessageReceiver.java:224)
at org.apache.axis2.engine.AxisEngine.receive(AxisEngine.java:180)
at org.apache.synapse.transport.passthru.ServerWorker.processNonEntityEnclosingRESTHandler(ServerWorker.java:367)
at org.apache.synapse.transport.passthru.ServerWorker.processEntityEnclosingRequest(ServerWorker.java:412)
at org.apache.synapse.transport.passthru.ServerWorker.run(ServerWorker.java:181)
at org.apache.axis2.transport.base.threads.NativeWorkerPool$1.run(NativeWorkerPool.java:172)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: com.postgres.jdbc.Driver
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.eclipse.osgi.internal.framework.ContextFinder.loadClass(ContextFinder.java:139)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.apache.commons.dbcp.BasicDataSource.createConnectionFactory(BasicDataSource.java:1420)
... 16 more
The postgresql-42.2.5.jar does not have the path "com.postgresql.jdbc.Driver".
So tried "org.postgresql.Driver" as the Connection DB Driver field value. But still it gives the same error. INSPITE OF USING"org.postgresql.Driver", using the configurable field, it still gives the same error.
[2020-01-08 17:39:25,612] ERROR {org.apache.synapse.mediators.db.DBLookupMediator} - SQL Exception occurred while executing statement : select * from teacher; against DataSource : jdbc:postgresql://localhost:5432/EDH_DATABASE org.apache.commons.dbcp.SQLNestedException:
Cannot load JDBC driver class 'com.postgres.jdbc.Driver'
at
org.apache.commons.dbcp.BasicDataSource.createConnectionFactory(BasicDataSource.java:1429)
at org.apache.commons.dbcp.BasicDataSource.createDataSource(BasicDataSource.java:1371)
at org.apache.commons.dbcp.BasicDataSource.getConnection(BasicDataSource.java:1044)
at
org.apache.synapse.mediators.db.DBLookupMediator.processStatement(DBLookupMediator.java:58)
at org.apache.synapse.mediators.db.AbstractDBMediator.mediate(AbstractDBMediator.java:243)
at org.apache.synapse.mediators.AbstractListMediator.mediate(AbstractListMediator.java:109)
at org.apache.synapse.mediators.AbstractListMediator.mediate(AbstractListMediator.java:71)
at org.apache.synapse.mediators.base.SequenceMediator.mediate(SequenceMediator.java:158)
at Org.apache.synapse.core.axis2.ProxyServiceMessageReceiver.receive(ProxyServiceMessageReceiver.java:224)
at org.apache.axis2.engine.AxisEngine.receive(AxisEngine.java:180)
at org.apache.synapse.transport.passthru.ServerWorker.processNonEntityEnclosingRESTHandler(ServerWorker.java:367)
at
org.apache.synapse.transport.passthru.ServerWorker.processEntityEnclosingRequest(ServerWorker.java:412)
at org.apache.synapse.transport.passthru.ServerWorker.run(ServerWorker.java:181)
at
org.apache.axis2.transport.base.threads.NativeWorkerPool$1.run(NativeWorkerPool.java:172)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: com.postgres.jdbc.Driver
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.eclipse.osgi.internal.framework.ContextFinder.loadClass(ContextFinder.java:139)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at
org.apache.commons.dbcp.BasicDataSource.createConnectionFactory(BasicDataSource.java:1420)
... 16 more
Any help would be greatly appreciated.
It seems the correct Driver name is "org.postgresql.Driver". In your error logs, even though you have changed the Driver name from "com.postgres.jdbc.Driver" to "org.postgresql.Driver", it seems to be looking for the former one. Please make sure that, you have stopped the running Micro Integrator by clicking the stop button and run the proxy again by clicking on "Run As -> Run on Micro Integrator".
After following this tutorial https://docs.wso2.com/display/IS410/Connect%2Bthe%2BWSO2%2BAPI%2BManager%2Bto%2Ban%2BExternal%2BLDAP%2BUser%2BStore
to enable the LDAP authentication in WSO2 API Manager I restart the server and appear this error:
TID: [-1] [] [2018-09-30 14:26:39,274] ERROR {org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker} - Error while trying to login to data receiver :/xxx.xx.xx.xxx:9711 {org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker}
org.wso2.carbon.databridge.agent.exception.DataEndpointAuthenticationException: Error while trying to login to data receiver :/xxx.xx.xx.xxx:9711
at org.wso2.carbon.databridge.agent.endpoint.binary.BinaryDataEndpoint.login(BinaryDataEndpoint.java:47)
at org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker.connect(DataEndpointConnectionWorker.java:93)
at org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker.run(DataEndpointConnectionWorker.java:42)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.wso2.carbon.databridge.commons.exception.AuthenticationException: wrong userName or password
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.wso2.carbon.databridge.agent.endpoint.binary.BinaryEventSender.processResponse(BinaryEventSender.java:162)
at org.wso2.carbon.databridge.agent.endpoint.binary.BinaryDataEndpoint.login(BinaryDataEndpoint.java:42)
... 7 more
TID: [-1] [] [2018-09-30 14:26:39,274] ERROR {org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker} - Error while trying to connect to the endpoint. Cannot borrow client for ssl://xxx.xx.xx.xxx:9711 {org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker}
org.wso2.carbon.databridge.agent.exception.DataEndpointAuthenticationException: Cannot borrow client for ssl://xxx.xx.xx.xxx:9711
at org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker.connect(DataEndpointConnectionWorker.java:99)
at org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker.run(DataEndpointConnectionWorker.java:42)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.wso2.carbon.databridge.agent.exception.DataEndpointAuthenticationException: Error while trying to login to data receiver :/xxx.xx.xx.xxx:9711
at org.wso2.carbon.databridge.agent.endpoint.binary.BinaryDataEndpoint.login(BinaryDataEndpoint.java:47)
at org.wso2.carbon.databridge.agent.endpoint.DataEndpointConnectionWorker.connect(DataEndpointConnectionWorker.java:93)
... 6 more
Caused by: org.wso2.carbon.databridge.commons.exception.AuthenticationException: wrong userName or password
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.wso2.carbon.databridge.agent.endpoint.binary.BinaryEventSender.processResponse(BinaryEventSender.java:162)
at org.wso2.carbon.databridge.agent.endpoint.binary.BinaryDataEndpoint.login(BinaryDataEndpoint.java:42)
... 7 more
But the server start correctly and I can login with the LDAP users. So, how can I control that message or edit the user/password that is referencing in the error because I think that is checking the default wso2 authentication.
This error can happen when using binary data publisher for throttling and the username or password is incorrect. Related configuration is located inside "ThrottlingConfigurations" section of "repository/conf/api-manager.xml" file. Try to update the Username and Password inside the "DataPublisher" configuration to the LDAP user.
<ThrottlingConfigurations>
<EnableAdvanceThrottling>true</EnableAdvanceThrottling>
<DataPublisher>
<Enabled>true</Enabled>
<Type>Binary</Type>
<ReceiverUrlGroup>tcp://${carbon.local.ip}:${receiver.url.port}</ReceiverUrlGroup>
<AuthUrlGroup>ssl://${carbon.local.ip}:${auth.url.port}</AuthUrlGroup>
<Username>user_name</Username>
<Password>password</Password>
....
</ThrottlingConfigurations>
JDBC interpreter for hive in Zeppelin works fine on non MR queries. In case of MR, getting the below error
%Hive
select * from table where month=2;
getting the below exception :
java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:296)
at org.apache.commons.dbcp2.DelegatingStatement.execute(DelegatingStatement.java:291)
at org.apache.commons.dbcp2.DelegatingStatement.execute(DelegatingStatement.java:291)
at org.apache.zeppelin.jdbc.JDBCInterpreter.executeSql(JDBCInterpreter.java:577)
at org.apache.zeppelin.jdbc.JDBCInterpreter.interpret(JDBCInterpreter.java:660)
at org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:94)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:489)
at org.apache.zeppelin.scheduler.Job.run(Job.java:175)
at org.apache.zeppelin.scheduler.ParallelScheduler$JobRunner.run(ParallelScheduler.java:162)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
I have a Spark job which works fine for 150G dataset. However, when I tried to increase the amount of data to around 600G, I kept getting the following errors, and it seems to be failing at: myRDD.count() at line:
at com.myproject.myJob.MyProcessor$.process(MyProcessor.scala:45)
Does anyone have any suggestion about how to resolve this problem? I am running on AWS-EMR-4.1.0-Spark 1.5.0. Thanks!
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1280)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1268)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1267)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1267)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1455)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1444)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1813)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1826)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1839)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1910)
at org.apache.spark.rdd.RDD.count(RDD.scala:1121)
at com.myproject.myJob.MyProcessor$.process(MyProcessor.scala:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:525)
Caused by: java.io.IOException: Failed to connect to ip-10-153-139-23.ec2.internal:48632
at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:193)
at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:156)
at org.apache.spark.network.netty.NettyBlockTransferService$$anon$1.createAndStart(NettyBlockTransferService.scala:88)
at org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:140)
at org.apache.spark.network.shuffle.RetryingBlockFetcher.access$200(RetryingBlockFetcher.java:43)
at org.apache.spark.network.shuffle.RetryingBlockFetcher$1.run(RetryingBlockFetcher.java:170)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.nio.channels.UnresolvedAddressException
at sun.nio.ch.Net.checkAddress(Net.java:107)
at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:649)
at io.netty.channel.socket.nio.NioSocketChannel.doConnect(NioSocketChannel.java:209)
at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.connect(AbstractNioChannel.java:207)
at io.netty.channel.DefaultChannelPipeline$HeadContext.connect(DefaultChannelPipeline.java:1097)
at io.netty.channel.AbstractChannelHandlerContext.invokeConnect(AbstractChannelHandlerContext.java:471)
at io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:456)
at io.netty.channel.ChannelOutboundHandlerAdapter.connect(ChannelOutboundHandlerAdapter.java:47)
at io.netty.channel.AbstractChannelHandlerContext.invokeConnect(AbstractChannelHandlerContext.java:471)
at io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:456)
at io.netty.channel.ChannelDuplexHandler.connect(ChannelDuplexHandler.java:50)
at io.netty.channel.AbstractChannelHandlerContext.invokeConnect(AbstractChannelHandlerContext.java:471)
at io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:456)
at io.netty.channel.AbstractChannelHandlerContext.connect(AbstractChannelHandlerContext.java:438)
at io.netty.channel.DefaultChannelPipeline.connect(DefaultChannelPipeline.java:908)
at io.netty.channel.AbstractChannel.connect(AbstractChannel.java:203)
at io.netty.bootstrap.Bootstrap$2.run(Bootstrap.java:166)