Send file from GCP bucket to 3rd party pdf converter - google-cloud-platform

I am trying to adapt a Qwiklabs tutorial to use pdfCrowd, rather than LibreOffice.
The service works by downloading a file from one GCP storage bucket for 'uploads', processing it, then uploading it to another bucket for 'processed' files, then deleting the original from the 'uploads' bucket.
This is the function that downloads, uploads, sends off for processing, then deletes. This is the code from the Qwiklabs tut and it works great.
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
await downloadFile(file.bucket, file.name);
const pdfFileName = await convertFile(file.name);
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
})
The original convertFile function is:
async function convertFile(fileName) {
const cmd = 'libreoffice --headless --convert-to pdf --outdir /tmp ' +
`"/tmp/${fileName}"`;
console.log(cmd);
const { stdout, stderr } = await exec(cmd);
if (stderr) {
throw stderr;
}
console.log(stdout);
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
The problem comes when I change my convertFile function. LibreOffice accepts file.name, but pdfCrowd wants the file path.
So I change the function with pdfCrowd to:
async function convertFile(fileName) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return console.error("Pdfcrowd Error: " + err);
console.log("Success: the file was created " + fileName);
});
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
Now the pdf conversion returns SUCCESS, but after a notification to say there is no file or directory as specified in the 'out' file path I passed to convertFileToFile. The file specified by _newPdfPath doesn't exist.
Error: Error: ENOENT: no such file or directory, stat '/tmp/mynew.pdf'
Success: the file was created /tmp/hello (31).pdf
The pdfCrowd function should be creating a file in the tmp directory, but is the async waiting for the file to be created in the tmp directory?
My complete code is:
const {promisify} = require('util');
const {Storage} = require('#google-cloud/storage');
const exec = promisify(require('child_process').exec);
const storage = new Storage();
const express = require('express');
const bodyParser = require('body-parser');
const app = express();
const pdfcrowd = require("pdfcrowd");
app.use(bodyParser.json());
const port = process.env.PORT || 8080;
app.listen(port, () => {
console.log('Listening on port', port);
});
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
// console.log("FILE=========", file, req.body.message.data)
await downloadFile(file.bucket, file.name);
const pdfFileName = await convertFile(file.name);
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
})
function decodeBase64Json(data) {
return JSON.parse(Buffer.from(data, 'base64').toString());
}
async function downloadFile(bucketName, fileName) {
const options = {destination: `/tmp/${fileName}`};
await storage.bucket(bucketName).file(fileName).download(options);
}
async function convertFile(fileName) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return console.error("Pdfcrowd Error: " + err);
console.log("Success: the file was created " + fileName);
});
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
async function deleteFile(bucketName, fileName) {
await storage.bucket(bucketName).file(fileName).delete();
}
async function uploadFile(bucketName, fileName) {
await storage.bucket(bucketName).upload(`/tmp/${fileName}`);
}

The problem is that your convertFile function finishes before convertFileToFile callback is invoked.
I'd pass callback for success and error into convertFile, e.g.
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
// console.log("FILE=========", file, req.body.message.data)
await downloadFile(file.bucket, file.name);
let on_pdf_done = async function(pdfFileName) {
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
};
let on_pdf_fail = function() {
res.set('Content-Type', 'text/plain');
res.send('\n\nERROR\n\n');
};
convertFile(file.name, on_pdf_done, on_pdf_fail);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
})
function convertFile(fileName, success_callback, fail_callback) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return fail_callback();
success_callback(fileName.replace(/\.\w+$/, '.pdf'));
});
}

Related

AWS: API gateway 502 error randomly with Runtime segmentation faults

I am using AWS and have an api which is called via API gateway which calls a node.js lambda function.
Very often but randomly I get 502 responses but when I immediately try again with the exact same request I get a normal response. So I decided to search the logs to see if I could find any issues.
The following is what I found for 1 of the requests:
RequestId: xxxxx Error: Runtime exited with error: signal: segmentation fault Runtime.ExitError
as well as:
xxxx ERROR Uncaught Exception
{
"errorType": "Error",
"errorMessage": "Quit inactivity timeout",
"code": "PROTOCOL_SEQUENCE_TIMEOUT",
"fatal": true,
"timeout": 30000,
"stack": [
"Error: Quit inactivity timeout",
" at Quit.<anonymous> (/opt/nodejs/node_modules/mysql/lib/protocol/Protocol.js:160:17)",
" at Quit.emit (node:events:527:28)",
" at Quit.emit (node:domain:475:12)",
" at Quit._onTimeout (/opt/nodejs/node_modules/mysql/lib/protocol/sequences/Sequence.js:124:8)",
" at Timer._onTimeout (/opt/nodejs/node_modules/mysql/lib/protocol/Timer.js:32:23)",
" at listOnTimeout (node:internal/timers:559:17)",
" at processTimers (node:internal/timers:502:7)"
]
}
the following is my reusable sql connector:
const CustomSecret = require('../secrets/CustomSecret');
const mysql = require("mysql");
module.exports = class MySqlConnect {
databaseCredObject;
constructor() {
}
async queryDb(sql, args) {
if (!this.databaseCredObject) {
await this.fetchSecret();
}
let connection = null;
const connection_settings = {
host: this.databaseCredObject.host,
user: this.databaseCredObject.username,
password: this.databaseCredObject.password,
database: 'logbook'
};
connection = mysql.createConnection(connection_settings);
return new Promise((resolve, reject) => {
connection.connect(function (err) {
if (err) {
console.log('error when connecting to db:', err);
} else {
console.log('Connected');
connection.query(sql, args, function (err, result) {
connection.end();
if (err) {
return reject(err);
}
return resolve(result);
});
}
});
});
}
async fetchSecret() {
const databaseCredString = await CustomSecret.getSecret('secretname', 'eu-west-2');
this.databaseCredObject = JSON.parse(databaseCredString);
}
}
Finally this is an example of my lambda function (shortened version):
const {compress, decompress} = require("compress-json");
const MySqlConnect = require("customPackagePath/MySqlConnect");
const CustomJwt = require("customPackagePath/CustomJwt");
const AWS = require("aws-sdk");
const warmer = require("lambda-warmer");
exports.handler = async (event) => {
if (await warmer(event)) {
console.log("Warming");
return 'warmed';
}
let responseCode = 200;
let response = {};
response.headers = {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*'
};
const bodyContent = JSON.parse(event.body);
const dataType = bodyContent.dataType;
const webAuth = new CustomJwt();
const decodedToken = webAuth.decodeToken(event.headers.Authorization);
const userUUID = decodedToken['uuid'];
const connection = new MySqlConnect();
let sql;
switch (dataType) {
case 'userPreferences':
sql = await connection.queryDb('SELECT * FROM user WHERE uuid = ?', [userUUID]);
break;
}
let data = [];
for (let index in sql) {
data.push(JSON.parse(JSON.stringify(sql[index])));
}
const returnData = {
data
};
let compressed = compress(returnData);
response.statusCode = responseCode;
response.body = JSON.stringify(compressed);
return response;
};
Now I am new to infrastructure stuff. But it seems to me that once a lambda function has been called, its not closing or ending correctly. Also I am using the lambda warmer to keep the functions warm as seen in the lambda code and not sure if that is causing any issues.
Appreciate any help with this as I can't seem to figure it out.
Thanks
After doing more research I decided to add this to my Lambda function:
exports.handler = async (event, context, callback) => {
and the return like this
callback(null, response);
and ever since this issue seems to have been resolved. I am not entirely sure why but for now its looking good :)

Cannot POST when sending request in Postman

I am having a small issue. I am trying to send a request in Postman, but I receive "
Cannot POST /api/logTemp/1/25
"
Here is my app.js:
const express = require('express')
const bodyParser = require('body-parser')
const cors= require('cors')
const fs= require('fs')
const path= require('path')
const morgan = require('morgan')
const router = require('./routes/route')
const app = express()
app.use(cors())
// parse application/x-www-form-urlencoded
app.use(bodyParser.urlencoded({ extended: false}))
// parse application/json
app.use(bodyParser.json())
app.use(morgan('dev'))
//create a write stream (in append mode)
var accessLogStream = fs.createWriteStream(path.join(__dirname, '/logs/access.log'), {flags: 'a'})
//setup the logger
app.use(morgan('combined', {stream: accessLogStream}))
app.use(router)
app.get('/', (req, res) => {
res.send('Hello World!')
})
const port = 3000
//app.listen(process.env.PORT || port, (err) => {
app.listen(port, () => {
console.log('Server started running on :' + port)
})
and here is my controller file:
const { getEnabledCategories } = require('trace_events');
const mysql = require('../database/db')
class MainController {
async logTemp(req, res){
console.log(req.params.temperature)
console.log(req.params.deviceID)
if(req.params.deviceID != null && req.params.temperature != null){
let deviceID = req.params.deviceID
let temperature = req.params.temperature;
var sql = `insert into log_temp (log_date, device_id, temp) values (now(),${deviceID}, ${temperature});`
mysql.query(sql, (error,data,fields) => {
if(error){
res.status(500)
res.send(error.message)
} else{
console.log(data)
res.json({
status: 200,
message: "Log uploaded successfully",
affectedRows: data.affectedRows
})
}
})
} else {
res.send('Por favor llena todos los datos!')
}
}
async getLogs(req, res){
console.log("Get Logs")
console.log(req.params.deviceID)
if(req.params.deviceID!=null){
let deviceID = req.params.deviceID;
var sql = `SELECT * FROM log_temp where device_id=${deviceID}`
mysql.query(sql, (error, data, fields) => {
if(error) {
res.status(500)
res.send(error.message)
} else {
console.log(data)
res.json({
data
})
}
})
}
}
}
const tempController = new MainController()
module.exports = tempController;
The code above was made in Visual Studio. It is odd because getLogs does work but logTemp does not. What I intend to do with logTemp is add a new value (which is the value temperature) to MySQL database. The connection to the database worked just fine, as well as localhost. If you need any more info in order to help me find a solution, please let me know and I will be more than happy to provide it. Also, i'm sorry for any grammar errors, english is not my first language :)

Can't upload folder with large amount of files to google storage. I using "#ffmpeg-installer/ffmpeg" and #google-cloud/storage

I upload file to google storage using "#ffmpeg-installer/ffmpeg" and #google-cloud/storage in my node.js App.
Step 1. file uploading to fs is in child processes - one process for each type of resolution (totaly six).
step 2. encription (converting to stream)
step 3. upload to google storage
I use "Upload a directory to a bucket" in order to send the video from the client to the Google Cloud Storage bucket.
This way is working fine only with small video.
for example when I upload video with duration one hour it split on chunk and totally I get more three thousands files. But the problem occurs when there are more than 1500 files
So actually i upload folder with large amount of files, but not all of this files are uploaded to cloud.
maybe someone had the similar problem and helps fix it.
const uploadFolder = async (bucketName, directoryPath, socketInstance) => {
try {
let dirCtr = 1;
let itemCtr = 0;
const fileList = [];
const onComplete = async () => {
const folderName = nanoid(46);
await Promise.all(
fileList.map(filePath => {
const fileName = path.relative(directoryPath, filePath);
const destination = `${ folderName }/${ fileName }`;
return storage
.bucket(bucketName)
.upload(filePath, { destination })
.then(
uploadResp => ({ fileName: destination, status: uploadResp[0] }),
err => ({ fileName: destination, response: err })
);
})
);
if (socketInstance) socketInstance.emit('uploadProgress', {
message: `Added files to Google bucket`,
last: false,
part: false
});
return folderName;
};
const getFiles = async directory => {
const items = await fs.readdir(directory);
dirCtr--;
itemCtr += items.length;
for(const item of items) {
const fullPath = path.join(directory, item);
const stat = await fs.stat(fullPath);
itemCtr--;
if (stat.isFile()) {
fileList.push(fullPath);
} else if (stat.isDirectory()) {
dirCtr++;
await getFiles(fullPath);
}
}
}
await getFiles(directoryPath);
return onComplete();
} catch (e) {
log.error(e.message);
throw new Error('Can\'t store folder.');
}
};

calling imagemagick 'identify' with google cloud functions

I'm trying to use imagemagick in my Google Cloud function. The function is triggered by uploading a file to a Google Cloud Storage bucket. I have grander plans, but trying to get there one step at a time. Starting with identify.
// imagemagick_setup
const gm = require('gm').subClass({imageMagick: true});
const path = require('path');
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
exports.processListingImage = (event, context) => {
const object = event.data || event; // Node 6: event.data === Node 8+: event
const filename = object.name;
console.log("Filename: ", filename);
const fullFileObject = storage.bucket(object.bucket).file(object.name);
console.log("Calling resize function");
let resizePromise = resizeImage( fullFileObject );
<more stuff>
};
function resizeImage( file, sizes ) {
const tempLocalPath = `/tmp/${path.parse(file.name).base}`;
return file
.download({destination: tempLocalPath})
.catch(err => {
console.error('Failed to download file.', err);
return Promise.reject(err);
})
.then( () => {
// file now downloaded, get it's metadata
return new Promise((resolve, reject) => {
gm( tempLocalPath )
.identify( (err, result) => {
if (err)
{
console.log("Error reading metadata: ", err);
}
else
{
console.log("Well, there seems to be metadata: ", result);
}
});
});
});
} // end resizeImage()
The local file path is: "/tmp/andy-test.raw". But when the identify function runs, I get an error:
identify-im6.q16: unable to open image `/tmp/magick-12MgKrSna0qp9U.ppm': No such file or directory # error/blob.c/OpenBlob/2701.
Why is identify looking for a different file than what I (believe) I told it to look for? Eventually, I am going to resize the image and write it back out to Cloud Storage, but I wanted to get identify to run first..
Mark had the right answer - if I upload a jpg file, it works. Onto the next challenge.

Google Storage and Big files

I'm using Google Cloud Functions and bucket triggers to :
Push Video on Cloud Storage bucket
Extract audio from video file with ffmpeg and store it locally
Save the audio to another bucket
Extract the text from the audio
Save it on another bucket
Translate the txt file automatically
Everything is doing fine if the file is under 5MB, but after the file is never uploaded to the other bucket. I have tried to upload it on the same bucket I get the video from but no use.
I have checked the google storage nodejs code and the code is doing a stream automatically after 5Mb of upload but it never ends.
Somebody have already this issue ?
My code is doing :
// Imports the Google Cloud client library
const storage = require('#google-cloud/storage')();
const ffmpegPath = require('#ffmpeg-installer/ffmpeg').path
const ffmpeg = require('fluent-ffmpeg');
const path = require('path');
const fs = require('fs');
const rimraf = require('rimraf'); // rimraf directly
// Binaries for ffmpeg
ffmpeg.setFfmpegPath(ffmpegPath);
/**
* Triggered from a message on a Cloud Storage bucket.
*
* #param {!Object} event The Cloud Functions event.
* #param {!Function} The callback function.
*/
exports.processFile = function(event, callback) {
console.log(JSON.stringify(event));
console.log('Processing file: ' + event.data.name);
const bucket = storage.bucket(event.data.bucket);
const bucketAudio = storage.bucket('distant-bucket');
const videoFile = bucket.file(event.data.name);
const destination = '/tmp/' + path.basename( event.data.name );
return new Promise((resolve, reject) => { return videoFile.download({
destination: destination
}).then((error) => {
if (error.length > 0) {
reject(error);
} else {
resolve(destination);
}
})
})
.then((fileinfo) => {
const filename = destination.replace( '.mp4', '.flac' );
return new Promise((resolve, reject) => {
ffmpeg(fileinfo)
.videoBitrate(19200)
.inputOptions('-vn')
.format('flac')
.audioChannels(1)
.audioBitrate(44100)
.output(filename)
.on('end', function() {
console.log('extracted audio : '+event.data.name+" to "+filename);
resolve(filename);
})
.on('error', function(err, stdout, stderr) {
reject(err);
})
.run()
});
})
.then((flacOutput) => {
console.log('save file '+flacOutput);
const stats = fs.statSync(flacOutput)
console.log(stats.size / 1000000.0);
// TODO: upload FLAC file to Cloud Storage
return bucket
.upload( flacOutput ).catch((err) => {
console.error('Failed to upload flac.', err);
return Promise.reject(err);
});
}).then(() => {
console.log(`Audio uploaded.`);
// Delete the temporary file.
return new Promise((resolve, reject) => {
fs.unlink(destination);
fs.unlink(destination.replace( '.mp4', '.flac' ), (err) => {
if (err) {
reject(err);
} else {
resolve();
}
});
});
});
};