Replacing multiple URLs in a string by a links with regex - regex

I'm using a filter with AngularJS to replace URLs in strings by window.open() function and email by mailto :
app.filter('parseUrl', function() {
//URLs starting with http://, https://, or ftp://
var replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/gim;
//URLs starting with "www." (without // before it, or it'd re-link the ones done above).
var replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
//Change email addresses to mailto:: links.
var replacePattern3 = /(\w+#[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
return function(text, target, otherProp) {
var originText = text;
if(text == undefined || text == "") {
return "";
} else {
angular.forEach(text.match(replacePattern1), function(url) {
text = text.replace(replacePattern1, "<span onclick=\"window.open('$1', '_system');\" class='link_url'>$1</span>");
});
angular.forEach(text.match(replacePattern2), function(url) {
text = text.replace(replacePattern2, "<span onclick=\"window.open('http://$2', '_system');\" class='link_url'>$1$2</span>");
});
angular.forEach(text.match(replacePattern3), function(url) {
text = text.replace(replacePattern3, "$1");
});
return text;
}
};
});
It works well with one URL or one email, but when I have two or more URLs it's not working because it replace a multiple times the URLs by a span with window.open() function. As you can see in this JSFiddle : http://jsfiddle.net/wps94/1/
Do you have an idea to avoid this ? Maybe by changing the regex ?
Thank you

Try using a single regex and a replacement function:
var replacePattern = /\b((http:\/\/|https:\/\/|ftp:\/\/|mailto:|news:)|www\.|ftp\.|[^ \,\;\:\!\)\(\""\'\<\>\f\n\r\t\v]+#)([^ \,\;\:\!\)\(\""\'\<\>\f\n\r\t\v]+)\b/gim;
return function(text, target, otherProp) {
var originText = text;
if(text == undefined || text == "") {
return "";
} else {
return text.replace(replacePattern, function($0, $1) {
var match = $0;
var protocol = $1;
if ((/^www\./i).test(match))
{
return "<span onclick=\"window.open('http://" + match + "', '_system');\" class='link_url'>" + match + "</span>";
}
if ((/^ftp\./i).test(match))
{
return "<span onclick=\"window.open('ftp://" + match + "', '_system');\" class='link_url'>" + match + "</span>";
}
if (protocol && protocol.charAt(0) === '#')
{
return "" + match + "";
}
return "<span onclick=\"window.open('" + match + "', '_system');\" class='link_url'>" + match + "</span>";
});
}
};
http://jsfiddle.net/wps94/2/

In the forEachs, change text = to newText = and then return that instead…like this:
app.filter('parseUrl', function() {
//URLs starting with http://, https://, or ftp://
var replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/gim;
//URLs starting with "www." (without // before it, or it'd re-link the ones done above).
var replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
//Change email addresses to mailto:: links.
var replacePattern3 = /(\w+#[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
return function(text, target, otherProp) {
var originText = text;
if(text == undefined || text == "") {
return "";
} else {
var newText;
angular.forEach(text.match(replacePattern1), function(url) {
newText = text.replace(replacePattern1, "<span onclick=\"window.open('$1', '_system');\" class='link_url'>$1</span>");
});
angular.forEach(text.match(replacePattern2), function(url) {
newText = text.replace(replacePattern2, "<span onclick=\"window.open('http://$2', '_system');\" class='link_url'>$1$2</span>");
});
angular.forEach(text.match(replacePattern3), function(url) {
newText = text.replace(replacePattern3, "$1");
});
return newText;
}
};
});
Each pass through the forEach was looking at the value of text with the replacement already made for the first URL rather than the initial value.

Related

Extract pdf document with multi page

I am using Amazon's Textract service for extracting tables, Forms from pdf documnets.
The example provided at Github here is working for single page document only. But as per demo provided by AWS they are able to extract multi page pdf docs as well.
As per documentation we have to call same service for multi pages as well. But it is not working for me.
All the examples provided by them are either in python or java.
I am doing it in dotnet core.
Any help?
Here is my code.
public IActionResult FileExtract(string filename)
{
try
{
string lineText = "";
string wordText = "";
string fieldsText = "";
string fieldsText2 = "";
string tableText = "";
// Extracting file in below code.
var textractAnalysisClient = BuildTextractClient();
var document = PrepareDocument(textractAnalysisClient, "FORMS", filename);
document.Pages.ForEach(page =>
{
page.Lines.ForEach(line =>
{
lineText += "<button class='rawlabel'>" + line.Text + "</button>";
line.Words.ForEach(word =>
{
wordText += word.Text;
});
});
page.Form.Fields.ForEach(f =>
{
fieldsText += "<div><h5>" + f.Key + "</h5><p style='background-color:lightgray;width: 200px;padding: 6px;'>"
+ f.Value + "</p></div>";
});
var key = "Phone Number:";
var field = page.Form.GetFieldByKey(key);
if (field != null)
{
fieldsText2 += "Key: " + field.Key + " | Value: " + field.Value;
}
});
tableText = "<table id='customers'>";
document = PrepareDocument(textractAnalysisClient, "TABLES", filename);
document.Pages.ForEach(page =>
{
page.Tables.ForEach(table =>
{
var r = 0;
table.Rows.ForEach(row =>
{
r++;
tableText += "<tr>";
var c = 0;
row.Cells.ForEach(cell =>
{
c++;
tableText += "<td>";
tableText += cell.Text + "</td>";
});
tableText += "</tr>";
});
});
});
tableText += "</table>";
objJsonResponse.fieldsText = fieldsText;
objJsonResponse.fieldsText2 = fieldsText2;
objJsonResponse.lineText = lineText;
objJsonResponse.tableText = tableText;
objJsonResponse.wordText = wordText;
objJsonResponse.responsecode = 1;
return Json(objJsonResponse);
}
catch (Exception ex)
{
this.objJsonResponse.responsecode = -1;
this.objJsonResponse.error = "failed";
return Json(this.objJsonResponse);
}
}
static TextractTextAnalysisService BuildTextractClient()
{
var builder = new ConfigurationBuilder()
.SetBasePath(Environment.CurrentDirectory)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
.AddEnvironmentVariables()
.Build();
var awsOptions = builder.GetAWSOptions();
return new TextractTextAnalysisService(awsOptions.CreateServiceClient<IAmazonTextract>());
}
static TextractDocument PrepareDocument(TextractTextAnalysisService textractAnalysisClient, string type, string FormFile)
{
var task = textractAnalysisClient.StartDocumentAnalysis(BucketName, FormFile, type);
var jobId = task.Result;
textractAnalysisClient.WaitForJobCompletion(jobId);
var results = textractAnalysisClient.GetJobResults(jobId);
return new TextractDocument(results);
}

Google Script, how can use variables with regex search?

Very inexperienced coder here, I have recently gotten a script working that uses regex to search for two different words occurring within a certain word limit. So I can search for "the" and "account" occurring within 10 words of each other, then my script prints the sentence it occurs in. However, my work requires me to search for lots of different work combinations and it has become a pain having to enter each word manually into the string /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i; for example.
I would like to have something in the script where I can enter the words I want to search for just once, and they will be used in the string above. I have tried, what I think is, declaring variables like this:
var word1 = the
var word2 = account
/\W*(word1)\W*\s+(\w+\s+){0,10}(word2)|(word2)\s+(\w+\s+){0,10}(word1)/i;
But, again, very experienced coder so I'm a little out of my depth. Would really like something like the script snippet above to work in my full script listed below.
Here is my full working script without my attempt at declaring variables mentioned above:
var ss = SpreadsheetApp.getActiveSpreadsheet();
var historySheet = ss.getSheetByName('master');
var resultsSheet = ss.getSheetByName('results');
var totalRowsWithData = historySheet.getDataRange().getNumRows();
var data = historySheet.getRange(1, 1, totalRowsWithData, 3).getValues();
var regexp = /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i;
var result = [];
for (var i = 0; i < data.length; i += 1) {
var row = data[i];
var column = row[0];
if (regexp.exec(column) !== null) {
result.push(row); }}
if (result.length > 0) {
var resultsSheetDataRows = resultsSheet.getDataRange().getNumRows();
resultsSheetDataRows = resultsSheetDataRows === 1 ? resultsSheetDataRows : resultsSheetDataRows + 1;
var resultsSheetRange = resultsSheet.getRange(resultsSheetDataRows, 1, result.length, 3);
resultsSheetRange.setValues(result);}}
I tried this solution but not sure I have done it correctly as it only enters results in logs and not printing in the "results" sheet:
var ss = SpreadsheetApp.getActiveSpreadsheet();
var historySheet = ss.getSheetByName('Sheet1');
var resultsSheet = ss.getSheetByName('Results1');
var totalRowsWithData = historySheet.getDataRange().getNumRows();
var data = historySheet.getRange(1, 1, totalRowsWithData, 3).getValues();
const regexpTemplate = '\W*(word1)\W*\s+(\w+\s+){0,10}(word2)|(word2)\s+(\w+\s+){0,10}(word1)';
var word1 = 'test1';
var word2 = 'test2';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
var regexp = new RegExp(regexpString, 'i');
Logger.log(regexp); // /W*(the)W*s+(w+s+){0,10}(account)|(account)s+(w+s+){0,10}(the)/i
var result = [];
for (var i = 0; i < data.length; i += 1) {
var row = data[i];
var column = row[0];
if (regexp.exec(column) !== null) {
result.push(row); }}
if (result.length > 0) {
var resultsSheetDataRows = resultsSheet.getDataRange().getNumRows();
resultsSheetDataRows = resultsSheetDataRows === 1 ? resultsSheetDataRows : resultsSheetDataRows + 1;
var resultsSheetRange = resultsSheet.getRange(resultsSheetDataRows, 1, result.length, 3);
resultsSheetRange.setValues(result);}}
Use the RegExp contructor.
const regexpTemplate = '\\W*(word1)\\W*\\s+(\\w+\\s+){0,10}(word2)|(word2)\\s+(\\w+\\s+){0,10}(word1)';
var word1 = 'the';
var word2 = 'account';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
var regexp = new RegExp(regexpString, 'i');
Logger.log(regexp); // /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i
You can put this into a function to easily generate your new regular expression whenever you want to update the words.
/**
* Generate the regular expression with the provided words.
* #param {String} word1
* #param {String} word2
* #returns {RegExp}
*/
function generateRegExp(word1, word2) {
const regexpTemplate = '\\W*(word1)\\W*\\s+(\\w+\\s+){0,10}(word2)|(word2)\\s+(\\w+\\s+){0,10}(word1)';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
return new RegExp(regexpString, 'i');
}
/**
* Test the generateRegExp() function.
*/
function test_generateRegExp() {
var word1 = 'the';
var word2 = 'account';
var regexp = generateRegExp(word1, word2); // /\W*(the)\W*\s+(\w+\s+){0,10}(account)|(account)\s+(\w+\s+){0,10}(the)/i
// Use regexp just as you do in your script
// i.e. if (regexp.exec(column) !== null) { result.push(row); }
}
Your final script could look something like this.
function printSentences() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var historySheet = ss.getSheetByName('Sheet1');
var resultsSheet = ss.getSheetByName('Results1');
var totalRowsWithData = historySheet.getDataRange().getNumRows();
var data = historySheet.getRange(1, 1, totalRowsWithData, 3).getValues();
var result = [];
var regexp = generateRegExp("the", "account");
for (var i = 0; i < data.length; i += 1) {
var row = data[i];
var column = row[0];
if (regexp.exec(column) !== null) {
result.push(row);
}
}
if (result.length > 0) {
var resultsSheetDataRows = resultsSheet.getDataRange().getNumRows();
resultsSheetDataRows = resultsSheetDataRows === 1 ? resultsSheetDataRows : resultsSheetDataRows + 1;
var resultsSheetRange = resultsSheet.getRange(resultsSheetDataRows, 1, result.length, 3);
resultsSheetRange.setValues(result);
}
}
/**
* Generate the regular expression with the provided words.
* #param {String} word1
* #param {String} word2
* #returns {RegExp}
*/
function generateRegExp(word1, word2) {
const regexpTemplate = '\\W*(word1)\\W*\\s+(\\w+\\s+){0,10}(word2)|(word2)\\s+(\\w+\\s+){0,10}(word1)';
var regexpString = regexpTemplate.replace(/word1/g, word1).replace(/word2/g, word2);
return new RegExp(regexpString, 'i');
}

Apache Cordova : Office 365 API SharePointClient | How To download one drive files?

I am able to list all the files that i have for my one drive account but need to know how I could download them to my Android or Windows mobile phone.
I tried using the weburl that I receive from the item but looks like it will need the token.
Current JavaScript code :
var AuthenticationContext = new O365Auth.Context();
var discoveryContext = new O365Discovery.Context(); // new DiscoveryServices.Context(new AuthenticationContext(authUrl), appId, redirectUrl);
discoveryContext.services(AuthenticationContext.getAccessTokenFn('Microsoft.SharePoint')).then(function (capabilities) {
capabilities.forEach(function (v) {
if (v.capability === 'MyFiles') {
var msg = "";
var sharePointnew = new Microsoft.CoreServices.SharePointClient(v.resourceId + '/_api/v1.0/me/',
AuthenticationContext.getAccessTokenFn(v.resourceId));
Microsoft.FileServices.FileFetcher
var elementInfo = document.getElementById('popup_body');
document.getElementById('popup_header').innerHTML = "My One Drive Files";
/* This should open a popup */
document.getElementById('popup_div').style.display = "block";
document.getElementById('popup_overlay').style.display = "block";
elementInfo.innerHTML += 'One Drive Files:';
var fileName = 'demo.txt';
var store = cordova.file.dataDirectory;
var fileTransfer = new FileTransfer();
console.log("About to start transfer");
sharePointnew.files.getItems().fetch().then(function (result) {
msg = '';
result.currentPage.forEach(function (item) {
elementInfo.innerHTML += "<br />" + item.name + "<br />";
fileTransfer.download(item.webUrl, store + fileName,
function (entry) {
console.log("Success!");
},
function (err) {
console.log("Error");
console.dir(err);
});
msg += item._odataType + ' "' + item.name + '"\n';
var s = "";
});
console.log('All file system items: \n' + msg);
}, function (error) {
console.error(error);
});
}
});

extending EmberDefaultResolver with Ember-App-Kit

I'm making a custom resolver based on the pattern below from Robin Ward [ video / 15sec]
which is a trick to have a mobile device look for "mob_template.hbs" first before loading "template.hbs"
App.Resolver = EmberDefaultResolver.extend({
resolveTemplate: function(parsedName){
var t = this._super(parsedName);
if App.mobileActive){
return this._super('mob_' + parsedName) || t;
}
return t;
}
});
However I'm using Ember App Kit, which uses a special version of the resolver:
I can't really tell what's going on in there or what I would need to do to produce similar functionality. Anyone have any idea?
I've tried something like this but it results in nothing being resolved:
var App = Ember.Application.extend({
//...
Resolver: Ember.DefaultResolver.extend({
resolve: function(fullName) {
var parsedName = this.parseName(fullName),
resolveMethodName = parsedName.resolveMethodName;
if (!(parsedName.name && parsedName.type)) {
throw new TypeError("Invalid fullName: `" + fullName + "`, must be of the form `type:name` ");
}
if (this[resolveMethodName]) {
if (window.screen_type == 'mobile'){
var resolved = this[resolveMethodName](parsedName + '_mobile');
} else{
var resolved = this[resolveMethodName](parsedName);
}
if (resolved) { return resolved; }
}
return this.resolveOther(parsedName);
},
})
});
Apparently parsedName is not a string of the template name in the EAK resolver, it has some props representing the template name though, parsedName.fullNameWithoutType being the one to target:
var CustomResolver = Resolver.extend({
resolveTemplate: function(parsedName){
var resolve = this._super(parsedName);
if (['foo','bar'].indexOf(window.special_prop) > -1){
var orig__parsedName_name = parsedName.name;
parsedName.name = parsedName.name + '_' + window.special_prop;
parsedName.fullName = parsedName.fullName + '_' + window.special_prop;
parsedName.fullNameWithoutType = parsedName.fullNameWithoutType + '_' + window.special_prop;
resolve = this._super(parsedName) || resolve;
}
return resolve;
} });
var App = Ember.Application.extend({ //... Resolver: CustomResolver });

the difference the regexp result

Why ghi's result is different with abc or def?
abc's result is abc: a-b-c-d-e-f
def's result is def: a-b-c-d-e-f
ghi's result is ghi: a-{1}-c-{3}-e-{5}
What is the reason?
function abc(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/;
var matches = exp.exec(lang);
while (matches) {
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
matches = exp.exec(lang);
}
console.log('abc: ' + lang);
}
abc();
function def(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/g;
var matches = exp.exec(lang);
while (matches) {
var exp = /\{(\d+)\}/g;
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
matches = exp.exec(lang);
}
console.log('def: ' + lang);
}
def();
function ghi(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/g;
var matches = exp.exec(lang);
while (matches) {
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
matches = exp.exec(lang);
}
console.log('ghi: ' + lang);
}
ghi();
Function ghi() needs to reset RegExp object's lastIndex inside the loop:
function ghi(){
var lang = "{0}-{1}-{2}-{3}-{4}-{5}";
var args = ["a","b","c","d","e","f"];
var exp = /\{(\d+)\}/g;
while (matches = exp.exec(lang)) {
var index = parseInt(matches[1], 10);
lang = lang.replace(matches[0], args[index]);
exp.lastIndex = 0;
}
console.log('ghi: ' + lang);
}
Because of exp.lastIndex = 0; call now output will be same.
JavaScript's RegExp object is stateful. When global flag is used, and you call a method the same RegExp object, it will start on the next index from the end of the last match.