Replacing Google Doc text with Spreadsheet Data using RegEx - regex

I am making a merge code to take data from my spreadsheet and populate merge tags in a Google Doc. The part of the code I am unable to write correctly is the part that writes the tags back to the Google Doc. I have been able to locate the tags but the code doesn't replace them.
Here is what I've written so far.
function mergeApplication() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName("Merge Data");
var range = sheet.getActiveRange();
var formSheet = ss.getSheetByName("Form Responses");
var lastRow = formSheet.getLastRow();
var lastColumn = sheet.getMaxColumns();
function checkAndComplete() {
var urlColumn = lastColumn;
var checkColumn = (urlColumn - 1);
var checkRange = sheet.getRange(2, checkColumn, (lastRow - 1), 1);
var check = checkRange.getBackgrounds();
var red = "#ff0404";
var yellow = "#ffec0a";
var green = "#3bec3b";
for (var i = 0; i < check.length; i++) {
if (check[i] == green) {
continue;
} else {
var statusCell = sheet.getRange((i+2), checkColumn, 1, 1);
var urlCell = sheet.getRange((i+2), urlColumn, 1, 1);
var dataRow = sheet.getRange((i+2), 1, 1, (lastColumn - 2));
function mergeTasks() {
function docCreator() {
// var templateConditionRange = sheet.getRange((i+2), column);
// var templateConditionCheck = templateConditionRange.getValues();
var docTemplate1 = DriveApp.getFileById(id);
// var docTemplate2 = DriveApp.getFileById(id);
// var docTemplate3 = DriveApp.getFileById(id);
var folderDestination = DriveApp.getFolderById(id);
var clientName = sheet.getRange((i+2), 3).getValue();
var date = sheet.getRange((i+2), 1).getValue();
// if (templateConditionCheck[i] == "") {
var docToUse = docTemplate1;
// }
// if (templateConditionCheck[i] == "") {
// var docToUse = docTemplate2;
// }
// if (templateConditionCheck[i] == "") {
// var docToUse = docTemplate3;
// }
var docName = "Merge Tester Doc for " + clientName + " [" + date + "]";
var docCopy = docToUse.makeCopy(docName, folderDestination);
var docId = docCopy.getId();
var docURL = DriveApp.getFileById(docId).getUrl();
var docToSend = DriveApp.getFileById(docId);
var docBody = DocumentApp.openById(docId).getBody().getText();
function tagReplace() {
var taggedArray = [docBody.match(/\<{2}[\w\d\S]+\>{2}/g)];
var headerArray = [sheet.getRange(1, 1, 1, (lastColumn - 2)).getValues()];
var dataArray = [dataRow.getValues()];
var strippedArray = [];
Logger.log("The preliminary length of taggedArray is " + taggedArray.length);
Logger.log(taggedArray);
function tagStrip() {
for (var t = 0; t < taggedArray.length; t++) {
var strippedString = taggedArray[t].slice(2, -3).toString();
strippedArray.push(strippedString);
Logger.log("The current strippedArray length is " + strippedArray.length);
}
Logger.log("The final strippedArray length is " + strippedArray.length);
Logger.log("The final taggedArray length is " + taggedArray.length);
Logger.log("The final, completed strippedArray is " + strippedArray);
}
function dataMatch() {
for (var s = 0; s < strippedArray.length;) {
for (var h = 0; h < headerArray.length;) {
if (strippedArray[s] == headerArray[h]) {
docBody.replaceText(taggedArray[s].String(), dataArray[h].String());
h=0;
s++;
} else {
h++;
}
}
}
}
tagStrip;
dataMatch;
}
function emailCreator() {
var emailTag = sheet.getRange((i+2), (urlColumn - 2)).getValue();
var emailBody = HtmlService.createHtmlOutputFromFile("Email Template").getContent();
var personalizers = clientName + " [" + date + "]";
var subject = "Merge Tester Email for " + personalizers;
MailApp.sendEmail(emailTag, subject, emailBody, {
name: "Christopher Anderson",
attachments: [docToSend],
html: emailBody,
});
}
tagReplace();
statusCell.setBackground(yellow);
emailCreator();
urlCell.setValue(docURL)
}
statusCell.setBackground(red);
docCreator();
statusCell.setBackground(green);
}
mergeTasks();
}
}
}
checkAndComplete();
}
The problem section is here:
function tagReplace() {
var taggedArray = [docBody.match(/\<{2}[\w\d\S]+\>{2}/g)];
var headerArray = [sheet.getRange(1, 1, 1, (lastColumn - 2)).getValues()];
var dataArray = [dataRow.getValues()];
var strippedArray = new Array();
Logger.log("The preliminary length of taggedArray is " + taggedArray.length);
Logger.log(taggedArray);
function tagStrip() {
for (var t = 0; t < taggedArray.length; t++) {
var strippedString = taggedArray[t].slice(2, -3).toString();
strippedArray.push(strippedString);
Logger.log("The current strippedArray length is " + strippedArray.length);
}
Logger.log("The final strippedArray length is " + strippedArray.length);
Logger.log("The final taggedArray length is " + taggedArray.length);
Logger.log("The final, completed strippedArray is " + strippedArray);
}
function dataMatch() {
for (var s = 0; s < strippedArray.length;) {
for (var h = 0; h < headerArray.length;) {
if (strippedArray[s] == headerArray[h]) {
docBody.replaceText(taggedArray[s].String(), dataArray[h].String());
h=0;
s++;
} else {
h++;
}
}
}
}
tagStrip;
dataMatch;
}
It doesn't even log anything having to do with strippedArray.
It seems to be skipping over that section entirely.
Am I using the correct method of completing this task and/or is there a simpler way of doing it?
It's worth mentioning that my tags in the doc have 2 "<>" around them. That's the reason for my RegEx looking how it does.
Also, when logging the .length of taggedArray, it returns a value of 1.

You never actually call tagStrip which is supposed to work on strippedArray.
You declare it with function tagStrip(){} and later you reference the function with tagStrip; but you never actually call it. The same is happening with dataMatch.
Try calling the two functions by writing
tagStrip();
dataMatch();
If you don't include the parentheses you don't call it you just run the function Objects as statements.

Here is a portion of code I use in my add-on Simply Send, I use the same <<>> merge tags.
//copy template
var mDocDrive = doc.makeCopy(title, folder);
var mDoc = DocumentApp.openById(mDocDrive.getId());
var mDocDriveApp = DriveApp.getFileById(mDoc.getId());
var docToAttach = mDoc.getUrl();
var driveToShare = mDocDriveApp;
// replace text inside template
var body = mDoc.getBody();
body.replaceText("<<SS Title>>", title);
body.replaceText("<<timestamp>>", lastR.timestamp);
body.replaceText("<<username>>", lastR.email);
for (i in lastR.theResponses){
var cur = lastR.theResponses[i];
var name = cur.title;
name = name.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); // this will allow fields that include special characters.
var response = cur.response;
var searchPattern = "<<"+name+">>";
body.replaceText(searchPattern, response);
}
// this will replace any unused tags with nothing.
var ques = getQuestionList();
for (j in ques){
var curq = ques[j].name;
curq = curq.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
var searchPattern2 = "<<"+curq+">>";
body.replaceText(searchPattern2, "");
}
mDoc.saveAndClose();
//create pdf
if (mmDefaults.shareAs == "pdf"){
// uncomment if you want to make the pdf in the merge folder
var asPdf = mDoc.getAs('application/pdf');
asPdf.setName(mDoc.getName()+ ".pdf");
var pdf = DriveApp.createFile(asPdf);
folder.addFile(pdf);
DriveApp.removeFile(pdf);
mDocDriveApp.setTrashed(true);
var docToAttach = pdf;
driveToShare = pdf;
}

Related

Extract pdf document with multi page

I am using Amazon's Textract service for extracting tables, Forms from pdf documnets.
The example provided at Github here is working for single page document only. But as per demo provided by AWS they are able to extract multi page pdf docs as well.
As per documentation we have to call same service for multi pages as well. But it is not working for me.
All the examples provided by them are either in python or java.
I am doing it in dotnet core.
Any help?
Here is my code.
public IActionResult FileExtract(string filename)
{
try
{
string lineText = "";
string wordText = "";
string fieldsText = "";
string fieldsText2 = "";
string tableText = "";
// Extracting file in below code.
var textractAnalysisClient = BuildTextractClient();
var document = PrepareDocument(textractAnalysisClient, "FORMS", filename);
document.Pages.ForEach(page =>
{
page.Lines.ForEach(line =>
{
lineText += "<button class='rawlabel'>" + line.Text + "</button>";
line.Words.ForEach(word =>
{
wordText += word.Text;
});
});
page.Form.Fields.ForEach(f =>
{
fieldsText += "<div><h5>" + f.Key + "</h5><p style='background-color:lightgray;width: 200px;padding: 6px;'>"
+ f.Value + "</p></div>";
});
var key = "Phone Number:";
var field = page.Form.GetFieldByKey(key);
if (field != null)
{
fieldsText2 += "Key: " + field.Key + " | Value: " + field.Value;
}
});
tableText = "<table id='customers'>";
document = PrepareDocument(textractAnalysisClient, "TABLES", filename);
document.Pages.ForEach(page =>
{
page.Tables.ForEach(table =>
{
var r = 0;
table.Rows.ForEach(row =>
{
r++;
tableText += "<tr>";
var c = 0;
row.Cells.ForEach(cell =>
{
c++;
tableText += "<td>";
tableText += cell.Text + "</td>";
});
tableText += "</tr>";
});
});
});
tableText += "</table>";
objJsonResponse.fieldsText = fieldsText;
objJsonResponse.fieldsText2 = fieldsText2;
objJsonResponse.lineText = lineText;
objJsonResponse.tableText = tableText;
objJsonResponse.wordText = wordText;
objJsonResponse.responsecode = 1;
return Json(objJsonResponse);
}
catch (Exception ex)
{
this.objJsonResponse.responsecode = -1;
this.objJsonResponse.error = "failed";
return Json(this.objJsonResponse);
}
}
static TextractTextAnalysisService BuildTextractClient()
{
var builder = new ConfigurationBuilder()
.SetBasePath(Environment.CurrentDirectory)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
.AddEnvironmentVariables()
.Build();
var awsOptions = builder.GetAWSOptions();
return new TextractTextAnalysisService(awsOptions.CreateServiceClient<IAmazonTextract>());
}
static TextractDocument PrepareDocument(TextractTextAnalysisService textractAnalysisClient, string type, string FormFile)
{
var task = textractAnalysisClient.StartDocumentAnalysis(BucketName, FormFile, type);
var jobId = task.Result;
textractAnalysisClient.WaitForJobCompletion(jobId);
var results = textractAnalysisClient.GetJobResults(jobId);
return new TextractDocument(results);
}

Google Script, how can use variables with regex search?

Very inexperienced coder here, I have recently gotten a script working that uses regex to search for two different words occurring within a certain word limit. So I can search for "the" and "account" occurring within 10 words of each other, then my script prints the sentence it occurs in. However, my work requires me to search for lots of different work combinations and it has become a pain having to enter each word manually into the string /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i; for example.
I would like to have something in the script where I can enter the words I want to search for just once, and they will be used in the string above. I have tried, what I think is, declaring variables like this:
var word1 = the
var word2 = account
/\W*(word1)\W*\s+(\w+\s+){0,10}(word2)|(word2)\s+(\w+\s+){0,10}(word1)/i;
But, again, very experienced coder so I'm a little out of my depth. Would really like something like the script snippet above to work in my full script listed below.
Here is my full working script without my attempt at declaring variables mentioned above:
var ss = SpreadsheetApp.getActiveSpreadsheet();
var historySheet = ss.getSheetByName('master');
var resultsSheet = ss.getSheetByName('results');
var totalRowsWithData = historySheet.getDataRange().getNumRows();
var data = historySheet.getRange(1, 1, totalRowsWithData, 3).getValues();
var regexp = /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i;
var result = [];
for (var i = 0; i < data.length; i += 1) {
var row = data[i];
var column = row[0];
if (regexp.exec(column) !== null) {
result.push(row); }}
if (result.length > 0) {
var resultsSheetDataRows = resultsSheet.getDataRange().getNumRows();
resultsSheetDataRows = resultsSheetDataRows === 1 ? resultsSheetDataRows : resultsSheetDataRows + 1;
var resultsSheetRange = resultsSheet.getRange(resultsSheetDataRows, 1, result.length, 3);
resultsSheetRange.setValues(result);}}
I tried this solution but not sure I have done it correctly as it only enters results in logs and not printing in the "results" sheet:
var ss = SpreadsheetApp.getActiveSpreadsheet();
var historySheet = ss.getSheetByName('Sheet1');
var resultsSheet = ss.getSheetByName('Results1');
var totalRowsWithData = historySheet.getDataRange().getNumRows();
var data = historySheet.getRange(1, 1, totalRowsWithData, 3).getValues();
const regexpTemplate = '\W*(word1)\W*\s+(\w+\s+){0,10}(word2)|(word2)\s+(\w+\s+){0,10}(word1)';
var word1 = 'test1';
var word2 = 'test2';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
var regexp = new RegExp(regexpString, 'i');
Logger.log(regexp); // /W*(the)W*s+(w+s+){0,10}(account)|(account)s+(w+s+){0,10}(the)/i
var result = [];
for (var i = 0; i < data.length; i += 1) {
var row = data[i];
var column = row[0];
if (regexp.exec(column) !== null) {
result.push(row); }}
if (result.length > 0) {
var resultsSheetDataRows = resultsSheet.getDataRange().getNumRows();
resultsSheetDataRows = resultsSheetDataRows === 1 ? resultsSheetDataRows : resultsSheetDataRows + 1;
var resultsSheetRange = resultsSheet.getRange(resultsSheetDataRows, 1, result.length, 3);
resultsSheetRange.setValues(result);}}
Use the RegExp contructor.
const regexpTemplate = '\\W*(word1)\\W*\\s+(\\w+\\s+){0,10}(word2)|(word2)\\s+(\\w+\\s+){0,10}(word1)';
var word1 = 'the';
var word2 = 'account';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
var regexp = new RegExp(regexpString, 'i');
Logger.log(regexp); // /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i
You can put this into a function to easily generate your new regular expression whenever you want to update the words.
/**
* Generate the regular expression with the provided words.
* #param {String} word1
* #param {String} word2
* #returns {RegExp}
*/
function generateRegExp(word1, word2) {
const regexpTemplate = '\\W*(word1)\\W*\\s+(\\w+\\s+){0,10}(word2)|(word2)\\s+(\\w+\\s+){0,10}(word1)';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
return new RegExp(regexpString, 'i');
}
/**
* Test the generateRegExp() function.
*/
function test_generateRegExp() {
var word1 = 'the';
var word2 = 'account';
var regexp = generateRegExp(word1, word2); // /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i
// Use regexp just as you do in your script
// i.e. if (regexp.exec(column) !== null) { result.push(row); }
}
Your final script could look something like this.
function printSentences() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var historySheet = ss.getSheetByName('Sheet1');
var resultsSheet = ss.getSheetByName('Results1');
var totalRowsWithData = historySheet.getDataRange().getNumRows();
var data = historySheet.getRange(1, 1, totalRowsWithData, 3).getValues();
var result = [];
var regexp = generateRegExp("the", "account");
for (var i = 0; i < data.length; i += 1) {
var row = data[i];
var column = row[0];
if (regexp.exec(column) !== null) {
result.push(row);
}
}
if (result.length > 0) {
var resultsSheetDataRows = resultsSheet.getDataRange().getNumRows();
resultsSheetDataRows = resultsSheetDataRows === 1 ? resultsSheetDataRows : resultsSheetDataRows + 1;
var resultsSheetRange = resultsSheet.getRange(resultsSheetDataRows, 1, result.length, 3);
resultsSheetRange.setValues(result);
}
}
/**
* Generate the regular expression with the provided words.
* #param {String} word1
* #param {String} word2
* #returns {RegExp}
*/
function generateRegExp(word1, word2) {
const regexpTemplate = '\\W*(word1)\\W*\\s+(\\w+\\s+){0,10}(word2)|(word2)\\s+(\\w+\\s+){0,10}(word1)';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
return new RegExp(regexpString, 'i');
}

Import Data from Gmail into Google Spreadsheet

I have an email sent out on a daily basis that I need to pull data from to a Google Sheet. I have read on this and found a solution that worked for other's, but I've been unable to get it to work on mine. This is the code I've tried modifying:
function menu(e) {
var ui = SpreadsheetApp.getUi();
ui.createMenu('Macros')
.addItem('parse mail', 'email')
.addToUi();
}
function parseMail(body) {
var a=[];
var keystr="First Name : ,Last Name : ,Customer ID : ,Invoice : ";
var keys=keystr.split(",");
var i,p,r;
for (i in keys) {
p=keys[i]+"\n([a-zA-Z0-9\ \,\.]*)\n";
r=new RegExp(p,"m");
try {a[i]=body.match(p)[1];}
catch (err) {a[i]="no match";}
}
}
function email() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var s = ss.getActiveSheet();
var threads = GmailApp.search('subject: "Fwd: ARB Subscription*"');
var a=[];
for (var i = 0; i < threads.length; i++) {
var messages = GmailApp.getMessagesForThread(threads[i]);
for (var j = 0; j < messages.length; j++) {
a[j]=parseMail(messages[j].getPlainBody());
}
}
var nextRow=s.getDataRange().getLastRow()+1;
var numRows=a.length;
var numCols=a.length[0];
s.getRange(nextRow,1,numRows,numCols).setValues(a);
}
I've tried modifying the code, but the error I am getting is Cannot convert Array to Object[][] Line 35. any advice would be greatly appreciated
Get the dimensions of the data range as below.
Also, when you are setting the values on the range the size of two-dimensional array a must match the size of the range.
var dataRange = s.getDataRange();
var nextRow = dataRange.getLastRow() + 1;
var numRows = dataRange.getNumRows();
var numCols = dataRange.getNumColumns();
s.getRange(nextRow,1,numRows,numCols).setValues(a);

Amazon Web service - signature

I've been receiving an error from Amazon web service - InvalidParameterValue
Either Action or Operation query parameter must be present.
I believe it is most likely due to the signature being incorrect as the XML document and Header matches that of a test I did in their scratchpad.
Does anything stand out as being incorrect?
Thanks,
Clare
private static string ConstructCanonicalQueryString(SortedDictionary<string, string> sortedParameters)
{
var builder = new StringBuilder();
if (sortedParameters.Count == 0)
{
builder.Append(string.Empty);
return builder.ToString();
}
foreach (var kvp in sortedParameters)
{
builder.Append(PercentEncodeRfc3986(kvp.Key));
builder.Append("=");
builder.Append(PercentEncodeRfc3986(kvp.Value));
builder.Append("&");
}
var canonicalString = builder.ToString();
return canonicalString.Substring(0, canonicalString.Length - 1);
}
private static string PercentEncodeRfc3986(string value)
{
value = HttpUtility.UrlEncode(string.IsNullOrEmpty(value) ? string.Empty : value, Encoding.UTF8);
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
value = value.Replace("'", "%27")
.Replace("(", "%28")
.Replace(")", "%29")
.Replace("*", "%2A")
.Replace("!", "%21")
.Replace("%7e", "~")
.Replace("+", "%20")
.Replace(":", "%3A");
var sbuilder = new StringBuilder(value);
for (var i = 0; i < sbuilder.Length; i++)
{
if (sbuilder[i] != '%')
{
continue;
}
if (!char.IsLetter(sbuilder[i + 1]) && !char.IsLetter(sbuilder[i + 2]))
{
continue;
}
sbuilder[i + 1] = char.ToUpper(sbuilder[i + 1]);
sbuilder[i + 2] = char.ToUpper(sbuilder[i + 2]);
}
return sbuilder.ToString();
}
public string SignRequest(Dictionary<string, string> parametersUrl, Dictionary<string, string>
parametersSignture)
{
var secret = Encoding.UTF8.GetBytes(parametersSignture["Secret"]);
var signer = new HMACSHA256(secret);
var pc = new ParamComparer();
var sortedParameters = new SortedDictionary<string, string>(parametersUrl, pc);
var orderedParameters = ConstructCanonicalQueryString(sortedParameters);
var builder = new StringBuilder();
builder.Append(parametersSignture["RequestMethod"])
.Append(" \n")
.Append(parametersSignture["EndPoint"])
.Append("\n")
.Append("/\n")
.Append(orderedParameters);
var stringToSign = builder.ToString();
var toSign = Encoding.UTF8.GetBytes(stringToSign);
var sigBytes = signer.ComputeHash(toSign);
var signature = Convert.ToBase64String(sigBytes);
return signature.Replace("=", "%3D").Replace("/", "%2F").Replace("+", "%2B");
}
public class ParamComparer : IComparer<string>
{
public int Compare(string p1, string p2)
{
return string.CompareOrdinal(p1, p2);
}
}
The issue was that the Action wasn't included correctly into the Request

the difference the regexp result

Why ghi's result is different with abc or def?
abc's result is abc: a-b-c-d-e-f
def's result is def: a-b-c-d-e-f
ghi's result is ghi: a-{1}-c-{3}-e-{5}
What is the reason?
function abc(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/;
var matches = exp.exec(lang);
while (matches) {
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
matches = exp.exec(lang);
}
console.log('abc: ' + lang);
}
abc();
function def(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/g;
var matches = exp.exec(lang);
while (matches) {
var exp = /\{(\d+)\}/g;
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
matches = exp.exec(lang);
}
console.log('def: ' + lang);
}
def();
function ghi(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/g;
var matches = exp.exec(lang);
while (matches) {
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
matches = exp.exec(lang);
}
console.log('ghi: ' + lang);
}
ghi();
Function ghi() needs to reset RegExp object's lastIndex inside the loop:
function ghi(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/g;
while (matches = exp.exec(lang)) {
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
exp.lastIndex = 0;
}
console.log('ghi: ' + lang);
}
Because of exp.lastIndex = 0; call now output will be same.
JavaScript's RegExp object is stateful. When global flag is used, and you call a method the same RegExp object, it will start on the next index from the end of the last match.