Read S3 video file, process it with ffmpeg and upload to S3 - amazon-web-services

I have a video stored in s3 bucket with authenticated-read ACL.
I need to read and make a trailer with ffmpeg (nodejs)
Here's the code I use to generate the trailer
exports.generatePreview = (req, res) => {
const getParams = {
Bucket: S3_CREDENTIALS.bucketName,
Key: req.params.key
}
s3.getSignedUrl('getObject', getParams, (err, signedRequest) => {
console.log(signedRequest, err, 'getSignedUrl')
ffmpeg(new URL(signedRequest))
.size('640x?')
.aspect('4:3')
.seekInput('3:00')
.duration('0:30')
.then(function (video) {
s3.putObject({ Bucket: S3_CREDENTIALS.bucketName, key: 'preview_' + req.body.key, Body: video }, function (err, data) {
console.log(err, data)
})
});
});
}
Unfortunately, the constructor path seems not to read remote url. If I try to execute an ffmpeg command line with the same signedurl (i.e. ffmpeg -i "https://[bucketname].s3.eu-west-1.amazonaws.com/[key.mp4]?[signedParams]" -vn -acodec pcm_s16le -ar 44100 -ac 2 video.wav)
The error I get is that the signedRequest url 'The input file does not exist'
It seems fs.readFileSync https is not supported even if I try the request with http with the same result. fs.readFileSync(signedurl) => gives the same result
How to overcome this issue?

If you're using node-ffmpeg this isn't possible because the library only accepts a string pointing to a local path, but fluent-ffmpeg does support readstreams so give that a try.
For example (untested, just spitballing):
const ffmpeg = require('fluent-ffmpeg');
const stream = require('stream');
exports.generatePreview = (req, res) => {
let params = {Bucket: S3_CREDENTIALS.bucketName, Key: req.params.key};
// Retrieve object stream
let readStream = s3.getObject(params).createReadStream();
// Set up the ffmpeg process
let ffmpegProcess = new ffmpeg(readStream)
//Add your args here
.toFormat('mp4');
ffmpegProcess.on('error', (err, stdout, stderr) => {
// Handle errors here
}).on('end', () => {
// Processing is complete
}).pipe(() => {
// Create a new stream
let pt = new stream.PassThrough();
// Reuse the same params object and set the Body to the stream
params.Key = 'preview_' + req.body.key;
params.Body = pt;
// Upload and wait for the result
s3.upload(params, (err, data) => {
if (err)
return console.error(err);
console.log("done");
})
});
});
This will have high memory requirements so if this is a Lambda function you might play around with retrieving only the first X bytes of the file and converting only that.

Related

Can't upload folder with large amount of files to google storage. I using "#ffmpeg-installer/ffmpeg" and #google-cloud/storage

I upload file to google storage using "#ffmpeg-installer/ffmpeg" and #google-cloud/storage in my node.js App.
Step 1. file uploading to fs is in child processes - one process for each type of resolution (totaly six).
step 2. encription (converting to stream)
step 3. upload to google storage
I use "Upload a directory to a bucket" in order to send the video from the client to the Google Cloud Storage bucket.
This way is working fine only with small video.
for example when I upload video with duration one hour it split on chunk and totally I get more three thousands files. But the problem occurs when there are more than 1500 files
So actually i upload folder with large amount of files, but not all of this files are uploaded to cloud.
maybe someone had the similar problem and helps fix it.
const uploadFolder = async (bucketName, directoryPath, socketInstance) => {
try {
let dirCtr = 1;
let itemCtr = 0;
const fileList = [];
const onComplete = async () => {
const folderName = nanoid(46);
await Promise.all(
fileList.map(filePath => {
const fileName = path.relative(directoryPath, filePath);
const destination = `${ folderName }/${ fileName }`;
return storage
.bucket(bucketName)
.upload(filePath, { destination })
.then(
uploadResp => ({ fileName: destination, status: uploadResp[0] }),
err => ({ fileName: destination, response: err })
);
})
);
if (socketInstance) socketInstance.emit('uploadProgress', {
message: `Added files to Google bucket`,
last: false,
part: false
});
return folderName;
};
const getFiles = async directory => {
const items = await fs.readdir(directory);
dirCtr--;
itemCtr += items.length;
for(const item of items) {
const fullPath = path.join(directory, item);
const stat = await fs.stat(fullPath);
itemCtr--;
if (stat.isFile()) {
fileList.push(fullPath);
} else if (stat.isDirectory()) {
dirCtr++;
await getFiles(fullPath);
}
}
}
await getFiles(directoryPath);
return onComplete();
} catch (e) {
log.error(e.message);
throw new Error('Can\'t store folder.');
}
};

"InvalidParameterType" error for image files sent as blob to AWS Textract from external source

CURRENTLY
I am trying to get AWS Textract working for images supplied from a function in Google Scripts, that is sent to a Lambda resolved. I am following documentation on https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Textract.html#analyzeDocument-property
My Google Scripts code:
function googleFunction(id) {
let file = DriveApp.getFileById(id);
console.log("File is a " + file.getMimeType());
let blob = file.getBlob();
let params = {
doc: blob,
};
var options = {
method: "PUT",
"Content-Type": "application/json",
payload: JSON.stringify(params),
};
let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}
My Lambda resolver code:
"use strict";
const AWS = require("aws-sdk");
exports.handler = async (event) => {
let params = JSON.parse(event.body);
console.log("Parse as document...");
let textract = new AWS.Textract();
let doc = params["doc"];
let config = {
Document: {
Bytes: doc,
FeatureTypes: ["TABLES"],
}
};
textract.analyzeDocument(config, function (err, data) {
console.log("analyzing...");
if (err) {
console.log(err, err.stack);
}
// an error occurred
else {
console.log("data:" + JSON.stringfy(data));
} // successful response
});
};
ISSUE
File is successfully sent from Google Scripts to Lambda, but the following error is returned:
"errorType": "InvalidParameterType",
"errorMessage": "Expected params.Document.Bytes to be a string, Buffer, Stream, Blob, or typed array object"
Questions
Is there a way of verifying what the format of the doc variable is, to ensure it meets AWS Textract's requirements?
Can anyone see a possible cause for the errors being returned?
NOTES
Textract works fine when the same file is uploaded to an S3 bucked, and supplied in the config using:
S3Object: { Bucket: 'bucket_name', Name: 'file_name' }
I have confirmed the file is a JPEG
Got it working with 2 changes:
added getBytes() to Google side code
added Buffer.from() to AWS side code
My Google Scripts code:
function googleFunction(id) {
let file = DriveApp.getFileById(id);
console.log("File is a " + file.getMimeType());
let blob = file.getBlob().getBytes();
let params = {
doc: blob,
};
var options = {
method: "PUT",
"Content-Type": "application/json",
payload: JSON.stringify(params),
};
let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}
My Lambda resolver code:
"use strict";
const AWS = require("aws-sdk");
exports.handler = async (event) => {
let params = JSON.parse(event.body);
console.log("Parse as document...");
let textract = new AWS.Textract();
let doc = params["doc"];
let config = {
Document: {
Bytes: Buffer.from(doc),
FeatureTypes: ["TABLES"],
}
};
textract.analyzeDocument(config, function (err, data) {
console.log("analyzing...");
if (err) {
console.log(err, err.stack);
}
// an error occurred
else {
console.log("data:" + JSON.stringfy(data));
} // successful response
});
};

calling imagemagick 'identify' with google cloud functions

I'm trying to use imagemagick in my Google Cloud function. The function is triggered by uploading a file to a Google Cloud Storage bucket. I have grander plans, but trying to get there one step at a time. Starting with identify.
// imagemagick_setup
const gm = require('gm').subClass({imageMagick: true});
const path = require('path');
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
exports.processListingImage = (event, context) => {
const object = event.data || event; // Node 6: event.data === Node 8+: event
const filename = object.name;
console.log("Filename: ", filename);
const fullFileObject = storage.bucket(object.bucket).file(object.name);
console.log("Calling resize function");
let resizePromise = resizeImage( fullFileObject );
<more stuff>
};
function resizeImage( file, sizes ) {
const tempLocalPath = `/tmp/${path.parse(file.name).base}`;
return file
.download({destination: tempLocalPath})
.catch(err => {
console.error('Failed to download file.', err);
return Promise.reject(err);
})
.then( () => {
// file now downloaded, get it's metadata
return new Promise((resolve, reject) => {
gm( tempLocalPath )
.identify( (err, result) => {
if (err)
{
console.log("Error reading metadata: ", err);
}
else
{
console.log("Well, there seems to be metadata: ", result);
}
});
});
});
} // end resizeImage()
The local file path is: "/tmp/andy-test.raw". But when the identify function runs, I get an error:
identify-im6.q16: unable to open image `/tmp/magick-12MgKrSna0qp9U.ppm': No such file or directory # error/blob.c/OpenBlob/2701.
Why is identify looking for a different file than what I (believe) I told it to look for? Eventually, I am going to resize the image and write it back out to Cloud Storage, but I wanted to get identify to run first..
Mark had the right answer - if I upload a jpg file, it works. Onto the next challenge.

Download file from Amazon S3 to lambda and extract it [duplicate]

Good day guys.
I have a simple question: How do I download an image from a S3 bucket to Lambda function temp folder for processing? Basically, I need to attach it to an email (this I can do when testing locally).
I have tried:
s3.download_file(bucket, key, '/tmp/image.png')
as well as (not sure which parameters will help me get the job done):
s3.getObject(params, (err, data) => {
if (err) {
console.log(err);
const message = `Error getting object ${key} from bucket ${bucket}.`;
console.log(message);
callback(message);
} else {
console.log('CONTENT TYPE:', data.ContentType);
callback(null, data.ContentType);
}
});
Like I said, simple question, which for some reason I can't find a solution for.
Thanks!
You can get the image using the aws s3 api, then write it to the tmp folder using fs.
var params = { Bucket: "BUCKET_NAME", Key: "OBJECT_KEY" };
s3.getObject(params, function(err, data){ if (err) {
console.error(err.code, "-", err.message);
return callback(err); }
fs.writeFile('/tmp/filename', data.Body, function(err){
if(err)
console.log(err.code, "-", err.message);
return callback(err);
});
});
Out of curiousity, why do you need to write the file in order to attach it? It seems kind of redundant to write the file to disk so that you can then read it from disk
If you're writing it straight to the filesystem you can also do it with streams. It may be a little faster/more memory friendly, especially in a memory-constrained environment like Lambda.
var fs = require('fs');
var path = require('path');
var params = {
Bucket: "mybucket",
Key: "image.png"
};
var tempFileName = path.join('/tmp', 'downloadedimage.png');
var tempFile = fs.createWriteStream(tempFileName);
s3.getObject(params).createReadStream().pipe(tempFile);
// Using NodeJS version 10.0 or later and promises
const fsPromise = require('fs').promises;
try {
const params = {
Bucket: 's3Bucket',
Key: 'file.txt',
};
const data = await s3.getObject(params).promise();
await fsPromise.writeFile('/tmp/file.txt', data.Body);
} catch(err) {
console.log(err);
}
I was having the same problem, and the issue was that I was using Runtime.NODEJS_12_X in my AWS lambda.
When I switched over to NODEJS_14_X it started working for me :').
Also
The /tmp is required. It will directly write to /tmp/file.ext.

Google Storage and Big files

I'm using Google Cloud Functions and bucket triggers to :
Push Video on Cloud Storage bucket
Extract audio from video file with ffmpeg and store it locally
Save the audio to another bucket
Extract the text from the audio
Save it on another bucket
Translate the txt file automatically
Everything is doing fine if the file is under 5MB, but after the file is never uploaded to the other bucket. I have tried to upload it on the same bucket I get the video from but no use.
I have checked the google storage nodejs code and the code is doing a stream automatically after 5Mb of upload but it never ends.
Somebody have already this issue ?
My code is doing :
// Imports the Google Cloud client library
const storage = require('#google-cloud/storage')();
const ffmpegPath = require('#ffmpeg-installer/ffmpeg').path
const ffmpeg = require('fluent-ffmpeg');
const path = require('path');
const fs = require('fs');
const rimraf = require('rimraf'); // rimraf directly
// Binaries for ffmpeg
ffmpeg.setFfmpegPath(ffmpegPath);
/**
* Triggered from a message on a Cloud Storage bucket.
*
* #param {!Object} event The Cloud Functions event.
* #param {!Function} The callback function.
*/
exports.processFile = function(event, callback) {
console.log(JSON.stringify(event));
console.log('Processing file: ' + event.data.name);
const bucket = storage.bucket(event.data.bucket);
const bucketAudio = storage.bucket('distant-bucket');
const videoFile = bucket.file(event.data.name);
const destination = '/tmp/' + path.basename( event.data.name );
return new Promise((resolve, reject) => { return videoFile.download({
destination: destination
}).then((error) => {
if (error.length > 0) {
reject(error);
} else {
resolve(destination);
}
})
})
.then((fileinfo) => {
const filename = destination.replace( '.mp4', '.flac' );
return new Promise((resolve, reject) => {
ffmpeg(fileinfo)
.videoBitrate(19200)
.inputOptions('-vn')
.format('flac')
.audioChannels(1)
.audioBitrate(44100)
.output(filename)
.on('end', function() {
console.log('extracted audio : '+event.data.name+" to "+filename);
resolve(filename);
})
.on('error', function(err, stdout, stderr) {
reject(err);
})
.run()
});
})
.then((flacOutput) => {
console.log('save file '+flacOutput);
const stats = fs.statSync(flacOutput)
console.log(stats.size / 1000000.0);
// TODO: upload FLAC file to Cloud Storage
return bucket
.upload( flacOutput ).catch((err) => {
console.error('Failed to upload flac.', err);
return Promise.reject(err);
});
}).then(() => {
console.log(`Audio uploaded.`);
// Delete the temporary file.
return new Promise((resolve, reject) => {
fs.unlink(destination);
fs.unlink(destination.replace( '.mp4', '.flac' ), (err) => {
if (err) {
reject(err);
} else {
resolve();
}
});
});
});
};