Spark: AWSClientIOException in XmlResponsesSaxParser ListBucketHandler while writing parquet to S3 - amazon-web-services

I have a streaming application in Spark which continuously writes parquet files to S3 location in append mode. Recently it has been failing often with the following error:
org.apache.hadoop.fs.s3a.AWSClientIOException: getFileStatus on writePath/_temporary/: com.amazonaws.SdkClientException: Failed to parse XML document with handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler: Failed to parse XML document with handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:128)
at org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:1638)
at org.apache.hadoop.fs.s3a.S3AFileSystem.innerMkdirs(S3AFileSystem.java:1518)
at org.apache.hadoop.fs.s3a.S3AFileSystem.mkdirs(S3AFileSystem.java:1482)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1961)
at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.setupJob(FileOutputCommitter.java:339)
at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.setupJob(HadoopMapReduceCommitProtocol.scala:162)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:176)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:154)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:654)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:654)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:654)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:225)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:547)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:628)
at org.apache.spark.streaming.dstream.DStream$$anonfun$foreachRDD$1$$anonfun$apply$mcV$sp$3.apply(DStream.scala:628)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:416)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:257)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:256)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.amazonaws.SdkClientException: Failed to parse XML document with handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler
at com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser.parseXmlInputStream(XmlResponsesSaxParser.java:161)
at com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser.parseListBucketObjectsResponse(XmlResponsesSaxParser.java:317)
at com.amazonaws.services.s3.model.transform.Unmarshallers$ListObjectsUnmarshaller.unmarshall(Unmarshallers.java:70)
at com.amazonaws.services.s3.model.transform.Unmarshallers$ListObjectsUnmarshaller.unmarshall(Unmarshallers.java:59)
at com.amazonaws.services.s3.internal.S3XmlResponseHandler.handle(S3XmlResponseHandler.java:62)
at com.amazonaws.services.s3.internal.S3XmlResponseHandler.handle(S3XmlResponseHandler.java:31)
at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:70)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1545)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1270)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1056)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:743)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:717)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4330)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4277)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4271)
at com.amazonaws.services.s3.AmazonS3Client.listObjects(AmazonS3Client.java:835)
at org.apache.hadoop.fs.s3a.S3AFileSystem.listObjects(S3AFileSystem.java:918)
at org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:1611)
... 47 more
Caused by: org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 5; XML document structures must start and end within the same entity.
at org.apache.xerces.util.ErrorHandlerWrapper.createSAXParseException(Unknown Source)
at org.apache.xerces.util.ErrorHandlerWrapper.fatalError(Unknown Source)
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
at org.apache.xerces.impl.XMLScanner.reportFatalError(Unknown Source)
at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.endEntity(Unknown Source)
at org.apache.xerces.impl.XMLDocumentScannerImpl.endEntity(Unknown Source)
at org.apache.xerces.impl.XMLEntityManager.endEntity(Unknown Source)
at org.apache.xerces.impl.XMLEntityScanner.load(Unknown Source)
at org.apache.xerces.impl.XMLEntityScanner.skipSpaces(Unknown Source)
at org.apache.xerces.impl.XMLScanner.scanPIData(Unknown Source)
at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanPIData(Unknown Source)
at org.apache.xerces.impl.XMLScanner.scanPI(Unknown Source)
at org.apache.xerces.impl.XMLDocumentScannerImpl$PrologDispatcher.dispatch(Unknown Source)
at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source)
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source)
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source)
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source)
at com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser.parseXmlInputStream(XmlResponsesSaxParser.java:147)
... 68 more
But I never call the ListBuckets function of S3 explicitly in my code. This error occurs every 1 or 2 days. I also found that recently the number of files written has increased and also the number of deleted markers in the S3 paths have increased significantly (I have versioning enabled). I also tried increasing this timeout value spark.hadoop.fs.s3a.connection.timeout=120000 as suggested here: https://community.cloudera.com/t5/Support-Questions/Hive-to-S3-Error-timeout/td-p/208042 but this didn't help. Versions used:
sparkVersion = "2.3.0"
hadoopVersion = "2.8.3"
awsJavaSDKVersion = "1.11.297"
mapreduce.fileoutputcommitter.algorithm.version 2
Can someone help with this?

This is happening with a versioned bucket where you've deleted a lot (tombeston markers), and you are using a version of the s3a library/AWS SDK combo which uses the v1 list API, which always returns 5000 entries on a long list...the scan can time out if there are lots of tombstone and old versions to skip. Surfaces with XML parser error, e.g. HADOOP-13811
Fix: upgrade spark to the Hadoop-3.1 JARs (everywhere, not just hadoop-aws), and use its (default) v2 listing API. See HADOOP-13421.
June 2021 Update: Hadoop-3.3.1 lets you disable those marker delete calls, fs.s3a.directory.marker.retention to keep
<property>
<name>fs.s3a.bucket.directory.marker.retention</name>
<value>keep</value>
</property>
This provides speed and scalability. Spark can also now be built with the hadoop-3.1 JARs.
The marker retention = keep option is not backwards compatible with older branches of Hadoop which do not have a compatibility patch. Check the documentation.

Related

Spark read from S3 working, but I am unable to write using the same session [duplicate]

This question already has answers here:
UnsatisfiedLinkError while writing to S3 using Staging S3A Committer on Windows
(2 answers)
Closed 9 months ago.
I am using a pyspark test script to read and write files to S3. Here is how I initialize the spark-session:
import findspark
from pyspark.sql import SparkSession
findspark.init()
spark = SparkSession \
.builder \
.config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
.config("fs.s3a.fast.upload", "true") \
.config("fs.s3a.access.key", session.get_credentials().access_key) \
.config("fs.s3a.secret.key", session.get_credentials().secret_key) \
.master("local") \
.getOrCreate()
I have Spark 3.0.3 pre-built for Apache Hadoop 3.2 and later, so I haven't installed hadoop independently (I understand this version of Spark does not need it).
I have downloaded the following jar files in the spark jar folder:
hadoop-aws-3.2.0.jar -> Retrieved from https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.2.0
aws-java-sdk-bundle-1.11.375.jar -> Found in the dependencies of the above jar.
I have selected these two based on that the version of hadoop that I am using, and the dependencies found in the maven repository page.
With this configuration, this piece of code works when I try to read from a file located in S3, using the S3a filesystem:
s3read_path = 's3a://' + path-to-file-in-s3
df = spark.read.parquet(s3read_path)
df.count()
This returns some content inside the file as expected. However, when I try to write a different dataframe to the same location, I keep getting an error. This is the code:
key_name = 's3a://' + key_name.replace('s3://', '') + file
df.repartition(1).write.parquet(key_name, mode='overwrite')
which results in the following error:
22/02/22 17:22:00 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)/ 1]
java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230)
at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:160)
at org.apache.hadoop.util.DiskChecker.checkDirInternal(DiskChecker.java:100)
at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:77)
at org.apache.hadoop.util.BasicDiskValidator.checkStatus(BasicDiskValidator.java:32)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:331)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:394)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createTmpFileForWrite(LocalDirAllocator.java:477)
at org.apache.hadoop.fs.LocalDirAllocator.createTmpFileForWrite(LocalDirAllocator.java:213)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createTmpFileForWrite(S3AFileSystem.java:575)
at org.apache.hadoop.fs.s3a.S3ADataBlocks$DiskBlockFactory.create(S3ADataBlocks.java:811)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.createBlockIfNeeded(S3ABlockOutputStream.java:190)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.<init>(S3ABlockOutputStream.java:168)
at org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:781)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098)
at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:248)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:390)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:126)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:111)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:269)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:210)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
22/02/22 17:22:00 WARN TaskSetManager: Lost task 0.0 in stage 3.0 (TID 3, AT-5CG9513X0N.mshome.net, executor driver): java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230)
at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:160)
at org.apache.hadoop.util.DiskChecker.checkDirInternal(DiskChecker.java:100)
at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:77)
at org.apache.hadoop.util.BasicDiskValidator.checkStatus(BasicDiskValidator.java:32)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:331)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:394)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createTmpFileForWrite(LocalDirAllocator.java:477)
at org.apache.hadoop.fs.LocalDirAllocator.createTmpFileForWrite(LocalDirAllocator.java:213)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createTmpFileForWrite(S3AFileSystem.java:575)
at org.apache.hadoop.fs.s3a.S3ADataBlocks$DiskBlockFactory.create(S3ADataBlocks.java:811)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.createBlockIfNeeded(S3ABlockOutputStream.java:190)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.<init>(S3ABlockOutputStream.java:168)
at org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:781)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098)
at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:248)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:390)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:126)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:111)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:269)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:210)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
22/02/22 17:22:00 ERROR TaskSetManager: Task 0 in stage 3.0 failed 1 times; aborting job
22/02/22 17:22:00 ERROR FileFormatWriter: Aborting job ccd33513-a067-4468-9706-99c136e45805.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3.0 failed 1 times, most recent failure: Lost task 0.0 in stage 3.0 (TID 3, AT-5CG9513X0N.mshome.net, executor driver): java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230)
at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:160)
at org.apache.hadoop.util.DiskChecker.checkDirInternal(DiskChecker.java:100)
at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:77)
at org.apache.hadoop.util.BasicDiskValidator.checkStatus(BasicDiskValidator.java:32)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:331)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:394)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createTmpFileForWrite(LocalDirAllocator.java:477)
at org.apache.hadoop.fs.LocalDirAllocator.createTmpFileForWrite(LocalDirAllocator.java:213)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createTmpFileForWrite(S3AFileSystem.java:575)
at org.apache.hadoop.fs.s3a.S3ADataBlocks$DiskBlockFactory.create(S3ADataBlocks.java:811)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.createBlockIfNeeded(S3ABlockOutputStream.java:190)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.<init>(S3ABlockOutputStream.java:168)
at org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:781)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098)
at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:248)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:390)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:126)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:111)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:269)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:210)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:200)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:178)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:131)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:126)
at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:962)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:767)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:962)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:414)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:398)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:287)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:847)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230)
at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:160)
at org.apache.hadoop.util.DiskChecker.checkDirInternal(DiskChecker.java:100)
at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:77)
at org.apache.hadoop.util.BasicDiskValidator.checkStatus(BasicDiskValidator.java:32)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:331)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:394)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createTmpFileForWrite(LocalDirAllocator.java:477)
at org.apache.hadoop.fs.LocalDirAllocator.createTmpFileForWrite(LocalDirAllocator.java:213)
at org.apache.hadoop.fs.s3a.S3AFileSystem.createTmpFileForWrite(S3AFileSystem.java:575)
at org.apache.hadoop.fs.s3a.S3ADataBlocks$DiskBlockFactory.create(S3ADataBlocks.java:811)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.createBlockIfNeeded(S3ABlockOutputStream.java:190)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.<init>(S3ABlockOutputStream.java:168)
at org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:781)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098)
at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:248)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:390)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:150)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:126)
at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:111)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:269)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:210)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
... 1 more
Can please anyone help me? I think it might be some kind of error due to the jar versions but I am not able to find the proper configuration.
Thanks in advance!
That's the issue:
UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
From the javadoc:
Thrown if the Java Virtual Machine cannot find an appropriate native-language definition of a method declared native.
That seems to recommend double-checking the classpath as there are missing libraries.
Given that you downloaded the following jar files in the spark jar folder you seem to have introduced a class incompatibility.
I'd avoid using these two jars and find a proven way to write a S3a filesystem.

Debugging the error [Vertica][VJDBC](100172) One or more rows were rejected by the server

I am getting this error when sqooping records from hdfs to db
I have around 350 columns in that table, what's the ideal way to find out what is causing the issue.
2021-04-16 01:41:02,160 INFO [IPC Server handler 27 on 34142] org.apache.hadoop.mapred.TaskAttemptListenerImpl: Diagnostics report from attempt_1618264489948_81800_m_000000_0: Error: java.io.IOException: java.sql.SQLException: [Vertica][VJDBC](100172) One or more rows were rejected by the server.
at org.apache.sqoop.mapreduce.AsyncSqlRecordWriter.close(AsyncSqlRecordWriter.java:205)
at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.close(MapTask.java:670)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:793)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:170)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:164)
Caused by: java.sql.SQLException: [Vertica][VJDBC](100172) One or more rows were rejected by the server.
at com.vertica.dsi.dataengine.impl.DSIErrorResult.<init>(Unknown Source)
at com.vertica.dataengine.VQueryExecutor.registerBatchInsertResults(Unknown Source)
at com.vertica.dataengine.VQueryExecutor.readCopyDataResponse(Unknown Source)
at com.vertica.dataengine.VQueryExecutor.handleExecuteResponse(Unknown Source)
at com.vertica.dataengine.VQueryExecutor.execute(Unknown Source)
at com.vertica.jdbc.common.SPreparedStatement.executeBatch(Unknown Source)
at com.vertica.jdbc.VerticaJdbc4PreparedStatementImpl.executeBatch(Unknown Source)
at org.apache.sqoop.mapreduce.AsyncSqlOutputFormat$AsyncSqlExecThread.run(AsyncSqlOutputFormat.java:231)
Caused by: com.vertica.support.exceptions.ErrorException: [Vertica][VJDBC](100172) One or more rows were rejected by the server.

java.lang.NoClassDefFoundError while running TestNG xml file with configured ReportNG

I am a new to Selenium, running my first TestNG with configured ReportNG. I have performed all the following required steps. Please help with your inputs. Thanks!
Added the following JARs into the project
guice 3.0
Velocity 1.4
reportng 1.1.4
Disabled default listeners for TestNG
Added ReportNG listeners to the xml
XML file:
After running the .xml as TestNG suite, getting the following error. The class definition looks fine...a snapshot below...
package SeleniumProject;
<<Import statements go here>>
public class Test1DemoautBookTicket {
--------
}
Error:
java.lang.NoClassDefFoundError:
org/apache/commons/collections/ExtendedProperties
at org.apache.velocity.runtime.RuntimeInstance.<init>
(RuntimeInstance.java:160)
at org.apache.velocity.runtime.RuntimeSingleton.<clinit>
(RuntimeSingleton.java:95)
at org.apache.velocity.app.Velocity.setProperty(Velocity.java:117)
at org.uncommons.reportng.AbstractReporter.<init>(AbstractReporter.java:62)
at org.uncommons.reportng.HTMLReporter.<init>(HTMLReporter.java:83)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at java.lang.Class.newInstance(Unknown Source)
at org.testng.internal.ClassHelper.newInstance(ClassHelper.java:50)
at org.testng.TestNG.initializeConfiguration(TestNG.java:914)
at org.testng.TestNG.run(TestNG.java:1022)
at org.testng.remote.AbstractRemoteTestNG.run(AbstractRemoteTestNG.java:114)
at org.testng.remote.RemoteTestNG.initAndRun(RemoteTestNG.java:251)
at org.testng.remote.RemoteTestNG.main(RemoteTestNG.java:77)
Caused by: java.lang.ClassNotFoundException:
org.apache.commons.collections.ExtendedProperties
at java.net.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
... 16 more
<!DOCTYPE suite SYSTEM "http://testng.org/testng-1.0.dtd" >
<suite name="FirstSelenium">
<listeners>
<listener class-name="org.uncommons.reportng.HTMLReporter"/>
<listener class-name="org.uncommons.reportng.JUnitXMLReporter"/>
</listeners>
<test name="Test1">
<classes>
<class name="SeleniumProject.Test1DemoautBookTicket"></class>
</classes>
</test>
</suite>
I downloaded the ReportNG jars from jar-download.com.
Among the files was also a file named velocity-dep-1.4.jar. Before I added it to the project I got the same error as you.
(Disabling the default listeners doesn't seem necessary.)

WSO2 BAM REST-API Sample Execution

I have WSO2BAM V2.2.0 installed on Windows 7 and Cygwin 1.7.18. When I try to run REST-API sample I get the following error in the BAM console.
Can anyone please let me know what can be wrong and how to resolve the issue?
TID: [0] [BAM] [2011-05-14 20:46:05,603] INFO {org.apache.cassandra.service.GCInspector} - GC for MarkSweepCompact: 408 ms for 1 collections, 94079960 used; max is 1037959168 {org.apache.cassandra.service.GCInspector}
TID: [0] [BAM] [2011-05-14 20:46:05,808] ERROR {org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl} - Error during query execution.. {org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl}
java.util.concurrent.ExecutionException: java.lang.NullPointerException
at java.util.concurrent.FutureTask$Sync.innerGet(Unknown Source) at java.util.concurrent.FutureTask.get(Unknown Source)
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl.execute(HiveExecutorServiceImpl.java:91)
at org.wso2.carbon.analytics.hive.task.HiveScriptExecutorTask.execute(HiveScriptExecutorTask.java:60)
at org.wso2.carbon.ntask.core.impl.TaskQuartzJobAdapter.execute(TaskQuartzJobAdapter.java:56) at org.quartz.core.JobRunShell.run(JobRunShell.java:213)
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source)
at java.util.concurrent.FutureTask$Sync.innerRun(Unknown Source)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.NullPointerException
at javax.jdo.spi.JDOImplHelper.checkAuthorizedStateManager(JDOImplHelper.java:609)
at org.apache.hadoop.hive.metastore.model.MDatabase.jdoReplaceStateManager(MDatabase.java)
at org.datanucleus.state.AbstractStateManager$1.run(AbstractStateManager.java:298) at java.security.AccessController.doPrivileged(Native Method)
at org.datanucleus.state.AbstractStateManager.replaceStateManager(AbstractStateManager.java:294)
at org.datanucleus.state.JDOStateManagerImpl.updateLevel2CacheForFields(JDOStateManagerImpl.java:1264)
at org.datanucleus.state.JDOStateManagerImpl.loadUnloadedFields(JDOStateManagerImpl.java:1374)
at org.datanucleus.api.jdo.state.Hollow.transitionRetrieve(Hollow.java:168)
at org.datanucleus.state.AbstractStateManager.retrieve(AbstractStateManager.java:751)
at org.datanucleus.ObjectManagerImpl.retrieveObject(ObjectManagerImpl.java:1472)
at org.datanucleus.MultithreadedObjectManager.retrieveObject(MultithreadedObjectManager.java:280)
at org.datanucleus.api.jdo.JDOPersistenceManager.jdoRetrieve(JDOPersistenceManager.java:621)
at org.datanucleus.api.jdo.JDOPersistenceManager.retrieve(JDOPersistenceManager.java:638)
at org.datanucleus.api.jdo.JDOPersistenceManager.retrieve(JDOPersistenceManager.java:647)
at org.apache.hadoop.hive.metastore.ObjectStore.getMDatabase(ObjectStore.java:396)
at org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:414)
at org.apache.hadoop.hive.metastore.HiveContext.getCurrentContext(HiveContext.java:130)
at org.apache.hadoop.hive.jdbc.HiveConnection.<init>(HiveConnection.java:63)
at org.apache.hadoop.hive.jdbc.HiveDriver.connect(HiveDriver.java:104)
at java.sql.DriverManager.getConnection(Unknown Source)
at java.sql.DriverManager.getConnection(Unknown Source)
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl$ScriptCallable.call(HiveExecutorServiceImpl.java:234)
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl$ScriptCallable.call(HiveExecutorServiceImpl.java:217)
... 5 more
TID: [0] [BAM] [2011-05-14 20:46:05,813] ERROR {org.wso2.carbon.analytics.hive.task.HiveScriptExecutorTask} - Error while executing script : jmx_toolbox_823 {org.wso2.carbon.analytics.hive.task.HiveScriptExecutorTask}
org.wso2.carbon.analytics.hive.exception.HiveExecutionException: Error during query execution..
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl.execute(HiveExecutorServiceImpl.java:97)
at org.wso2.carbon.analytics.hive.task.HiveScriptExecutorTask.execute(HiveScriptExecutorTask.java:60)
at org.wso2.carbon.ntask.core.impl.TaskQuartzJobAdapter.execute(TaskQuartzJobAdapter.java:56)
at org.quartz.core.JobRunShell.run(JobRunShell.java:213)
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source)
at java.util.concurrent.FutureTask$Sync.innerRun(Unknown Source)
at java.util.concurrent.FutureTask.run(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.util.concurrent.ExecutionException: java.lang.NullPointerException
at java.util.concurrent.FutureTask$Sync.innerGet(Unknown Source)
at java.util.concurrent.FutureTask.get(Unknown Source)
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl.execute(HiveExecutorServiceImpl.java:91)
... 9 more
Caused by: java.lang.NullPointerException
at javax.jdo.spi.JDOImplHelper.checkAuthorizedStateManager(JDOImplHelper.java:609)
at org.apache.hadoop.hive.metastore.model.MDatabase.jdoReplaceStateManager(MDatabase.java)
at org.datanucleus.state.AbstractStateManager$1.run(AbstractStateManager.java:298)
at java.security.AccessController.doPrivileged(Native Method)
at org.datanucleus.state.AbstractStateManager.replaceStateManager(AbstractStateManager.java:294)
at org.datanucleus.state.JDOStateManagerImpl.updateLevel2CacheForFields(JDOStateManagerImpl.java:1264)
at org.datanucleus.state.JDOStateManagerImpl.loadUnloadedFields(JDOStateManagerImpl.java:1374)
at org.datanucleus.api.jdo.state.Hollow.transitionRetrieve(Hollow.java:168)
at org.datanucleus.state.AbstractStateManager.retrieve(AbstractStateManager.java:751) at org.datanucleus.ObjectManagerImpl.retrieveObject(ObjectManagerImpl.java:1472)
at org.datanucleus.MultithreadedObjectManager.retrieveObject(MultithreadedObjectManager.java:280)
at org.datanucleus.api.jdo.JDOPersistenceManager.jdoRetrieve(JDOPersistenceManager.java:621)
at org.datanucleus.api.jdo.JDOPersistenceManager.retrieve(JDOPersistenceManager.java:638)
at org.datanucleus.api.jdo.JDOPersistenceManager.retrieve(JDOPersistenceManager.java:647)
at org.apache.hadoop.hive.metastore.ObjectStore.getMDatabase(ObjectStore.java:396)
at org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:414)
at org.apache.hadoop.hive.metastore.HiveContext.getCurrentContext(HiveContext.java:130)
at org.apache.hadoop.hive.jdbc.HiveConnection.<init>(HiveConnection.java:63)
at org.apache.hadoop.hive.jdbc.HiveDriver.connect(HiveDriver.java:104)
at java.sql.DriverManager.getConnection(Unknown Source)
at java.sql.DriverManager.getConnection(Unknown Source)
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl$ScriptCallable.call(HiveExecutorServiceImpl.java:234)
at org.wso2.carbon.analytics.hive.impl.HiveExecutorServiceImpl$ScriptCallable.call(HiveExecutorServiceImpl.java:217)
... 5
I am not sure but see whether your Java version is JDK 1.7. If so please change to JDK 1.6.
This issue should not be due to Cygwin issue or when publishing data to BAM via rest API. It seems this issue has come when running several hive scripts parallely. How many toolboxes have you installed when you get this issue?
Please try to configure in linux ,it may work.

UnExpectedException - UnsatisfiedLinkError

I'm working with a maven-gwt Projekt using m2e plugin.
I implemented a rpc for having access to my server-side datastore.
However starting my webclient gives me an UnsatisfiedLinkError:
com.google.gwt.user.server.rpc.UnexpectedException: Service method 'public abstract java.util.List de.myPackage.client.SearchService.doSearch(java.lang.String,de.myPackage.shared.Coordinate,de.myPackage.shared.Coordinate)' threw an unexpected exception: java.lang.UnsatisfiedLinkError: org.gwtopenmaps.openlayers.client.LonLatImpl.create(DD)Lorg/gwtopenmaps/openlayers/client/util/JSObject;
at com.google.gwt.user.server.rpc.RPC.encodeResponseForFailure(RPC.java:389)
at com.google.gwt.user.server.rpc.RPC.invokeAndEncodeResponse(RPC.java:579)
at com.google.gwt.user.server.rpc.RemoteServiceServlet.processCall(RemoteServiceServlet.java:208)
at com.google.gwt.user.server.rpc.RemoteServiceServlet.processPost(RemoteServiceServlet.java:248)
at com.google.gwt.user.server.rpc.AbstractRemoteServiceServlet.doPost(AbstractRemoteServiceServlet.java:62)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:637)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
at org.mortbay.jetty.servlet.ServletHolder.handle(ServletHolder.java:487)
at org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:362)
at org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216)
at org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:181)
at org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:729)
at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:405)
at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
at org.mortbay.jetty.handler.RequestLogHandler.handle(RequestLogHandler.java:49)
at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
at org.mortbay.jetty.Server.handle(Server.java:324)
at org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:505)
at org.mortbay.jetty.HttpConnection$RequestHandler.content(HttpConnection.java:843)
at org.mortbay.jetty.HttpParser.parseNext(HttpParser.java:647)
at org.mortbay.jetty.HttpParser.parseAvailable(HttpParser.java:211)
at org.mortbay.jetty.HttpConnection.handle(HttpConnection.java:380)
at org.mortbay.io.nio.SelectChannelEndPoint.run(SelectChannelEndPoint.java:395)
at org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:488)
Caused by: java.lang.UnsatisfiedLinkError: org.gwtopenmaps.openlayers.client.LonLatImpl.create(DD)Lorg/gwtopenmaps/openlayers/client/util/JSObject;
at org.gwtopenmaps.openlayers.client.LonLatImpl.create(Native Method)
at org.gwtopenmaps.openlayers.client.LonLat.<init>(LonLat.java:26)
at de.iisys.sara2.sara2lightWebclient.server.SearchServiceImpl.doSearch(SearchServiceImpl.java:43)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at com.google.gwt.user.server.rpc.RPC.invokeAndEncodeResponse(RPC.java:561)
... 22 more
Does somebody knows how to handle this?