How to search and replace multiline string in files using linux commands - regex

i have many files which contain similar string which is multiline string for example :
<script> var i = 100
var j = 200
var x = 1000 </script>
and it can be look like this:
<script> var i = 100
var j = 200
var x = 1000 </script>
or
<script> var i = 100
var j = 200
var x = 1000 </script>
and i want to replace it with
<script> var i = 100
var j = 200
var x = xxxx </script>
Notice that the line can be also none spaced and sometimes it can be tabs
The case i have problem is the multiline , if it was simple one line it easir ,

Multi line replacements are easy in perl:
perl -0 -pe 's/<script>\s*var\s+i\s+=\s+100\s+var\s+j\s+=\s+200\s+var\s+x\s+=\s+1000\s+<\/script>/<script> var i =100\n var j =100\n var x = xxxx <\/script>/g' input-file
Or (slightly more readable):
perl -0 -pe 's/<script>\s*
var\s+ i\s+ =\s+ 100\s+
var\s+ j\s+ =\s+ 200\s+
var\s+ x\s+ =\s+1000\s+
<\/script>/<script> var i =100\n var j =100\n var x = xxxx <\/script>/gx' input-file

Related

Find and change cyrillic word with boundary in google scripts

The problem is that \b doesn't work with Russian and Ukrainian letters.
Here I try to find all matches of a word 'февраля' it the text, change them to tempword, then make it a link and change it back to 'февраля'.
function addLinks(word, siteurl) {
var id = 'doc\'s ID';
var doc = DocumentApp.openById(id);
var body = doc.getBody();
var tempword = 'ASDFDSGDDKDSL2';
var searchText = "\\b"+word+"\\b";
var element = body.findText(searchText);
console.log(element);
while (element) {
var start = element.getStartOffset();
var text = element.getElement().asText();
text.replaceText(searchText, tempword);
text.setLinkUrl(start, start + tempword.length - 1, siteurl);
element = body.findText(searchText);
}
body.replaceText(tempword, word);
}
addLinks('февраля', 'example.com');
It works as it should, if I change Russian word 'февраля' to English 'february'.
addLinks('february', 'example.com');
I need regular expression, because if I just look for 'февраля' script will apply it to other words like 'февралям', 'февралями' etc.
So, it is a question, how to make it work.
Mistake "Exception: Invalid regular expression pattern" occurs with this code:
var searchText = "(?<=[\\s,.:;\"']|^)"+word+"(?=[\\s,.:;\"']|$)";
or this:
var searchText = "(^|\s)"+word+"(?=\s|$)";
and some other.
Here is my solution:
function main() {
addLinks('февраля', 'example.com');
}
function addLinks(word, url) {
var doc = DocumentApp.getActiveDocument();
var pgfs = doc.getParagraphs();
var bound = '[^А-яЁё]'; // any letter except Russian one
var patterns = [
{regex: bound + word + bound, start: 1, end: 1}, // word inside of line
{regex: '^' + word + bound, start: 0, end: 1}, // word at the start
{regex: bound + word + '$', start: 1, end: 0}, // word at the end
{regex: '^' + word + '$', start: 0, end: 0} // word = line
];
for (var pgf of pgfs) for (var pattern of patterns) {
var location = pgf.findText(pattern.regex);
while (location) {
var start = location.getStartOffset() + pattern.start;
var end = location.getEndOffsetInclusive() - pattern.end;
pgf.editAsText().setLinkUrl(start, end, url);
location = pgf.findText(pattern.regex, location);
}
}
}
Test output:
It handles well the word placed at the start or at the end of the line (or both). And it gives no the weird error message.

Regular expression to extract Words inside nested parentheses

im looking for the regexp that make able to do this tasks
message Body Input: Test1 (Test2) (test3) (ti,ab(text(text here(possible text)text(possible text(more text))))) end (text)
the result that i want Result: (text(text here(possible text)text(possible text(more text))))
I want to collect everything that is inside ti,ab(................)
var messageBody = message.getPlainBody()
var ssFile = DriveApp.getFileById(id);
DriveApp.getFolderById(folder.getId()).addFile(ssFile);
var ss = SpreadsheetApp.open(ssFile);
var sheet = ss.getSheets()[0];
sheet.insertColumnAfter(sheet.getLastColumn());
SpreadsheetApp.flush();
var sheet = ss.getSheets()[0];
var range = sheet.getRange(1, 1, sheet.getLastRow(), sheet.getLastColumn() + 1)
var values = range.getValues();
values[0][sheet.getLastColumn()] = "Search Strategy";
for (var i = 1; i < values.length; i++) {
//here my Regexp
var y = messageBody.match(/\((ti,ab.*)\)/ig);
if (y);
values[i][values[i].length - 1] = y.toString();
range.setValues(values);
The only solution you may use here is to extract all substrings inside parentheses and then filter them to get all those that start with ti,ab:
var a = [], r = [], result;
var txt = "Test1 (Test2) (test3) (ti,ab(text(text here(possible text)text(possible text(more text))))) end (text)";
for(var i=0; i < txt.length; i++){
if(txt.charAt(i) == '(') {
a.push(i);
}
if(txt.charAt(i) == ')') {
r.push(txt.substring(a.pop()+1,i));
}
}
result = r.filter(function(x) { return /^ti,ab\(/.test(x); })
.map(function(y) {return y.substring(6,y.length-1);})
console.log(result);
The nested parentheses function is borrowed from Nested parentheses get string one by one. The /^ti,ab\(/ regex matches ti,ab( at the start of the string.
The above solution allows extracting nested parentheses inside nested parentheses. If you do not need it, use
var txt = "Test1 (Test2) ((ti,ab(text(text here))) AND ab(test3) Near Ti(test4) NOT ti,ab,su(test5) NOT su(Test6))";
var start=0, r = [], level=0;
for (var j = 0; j < txt.length; j++) {
if (txt.charAt(j) == '(') {
if (level === 0) start=j;
++level;
}
if (txt.charAt(j) == ')') {
if (level > 0) {
--level;
}
if (level === 0) {
r.push(txt.substring(start, j+1));
}
}
}
console.log("r: ", r);
var rx = "\\b(?:ti|ab|su)(?:,(ti|ab|su))*\\(";
var result = r.filter(function(y) { return new RegExp(rx, "i").test(y); })
.map(function(x) {
return x.replace(new RegExp(rx, "ig"), '(')
});
console.log("Result:",result);
The pattern used to filter and remove the unnecessary words
\b(?:ti|ab|su)(?:,(ti|ab|su))*\(
Details
\b - a word boundary
(?:ti|ab|su) - 1 of the alternatives,
(?:,(ti|ab|su))* - 0 or more repetitions of , followed with 1 of the 3 alternatives
\( - a (.
The match is replaced with ( to restore it in the match.

Replacement matching regex with anchor tag?

I have a problem when using Regex. I have a html document which create an anchor link when it matches condition.
An example html:
Căn cứ Luật Tổ chức HĐND và UBND ngày 26/11/2003;
Căn cứ Nghị định số 63/2010/NĐ-CP ngày 08/6/2010 của Chính phủ về
kiểm soát thủ tục hành chính;
Căn cứ Quyết định số 165/2011/QĐ-UBND ngày 06/5/2011 của UBND tỉnh
ban hành Quy định kiểm soát thủ tục hành chính trên địa bàn tỉnh;
Căn cứ Quyết định số 278/2011/QĐ-UBND ngày 02/8/2011 của UBND tỉnh
ban hành Quy chế phối hợp thực hiện thống kê, công bố, công khai thủ
tục hành chính và tiếp nhận, xử lý phản ánh, kiến nghị của cá nhân, tổ
chức về quy định hành chính trên địa bàn tỉnh;
Xét đề nghị của Giám đốc Sở Công Thương tại Tờ trình số
304/TTr-SCT ngày 29 tháng 5 năm 2013
I want to match these bold texts and make anchor links from these. If it has, try ignore. Link example 63/2010/NĐ-CP
var matchLegals = new Regex(#"(?:[\d]+\/?)\d+\/[a-z\dA-Z_ÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚĂĐĨŨƠàáâãèéêìíòóôõùúăđĩũơƯĂẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼỀỀỂưăạảấầẩẫậắằẳẵặẹẻẽềềểỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỦỨỪễệỉịọỏốồổỗộớờởỡợụủứừỬỮỰỲỴÝỶỸửữựỳỵỷỹ\-]+", RegexOptions.Compiled);
var doc = new HtmlDocument();
doc.LoadHtml(htmlString);
var allElements = doc.DocumentNode.SelectSingleNode("//div[#class='main-content']").Descendants();
foreach (var node in allElements)
{
var matches = matchLegals.Matches(node.InnerHtml);
foreach (Match m in matches)
{
var k = m.Value;
//dont know what to do
}
}
What can i do this
Many thanks.
I assume your regex pattern is OK and works. Another assumption is that node.InnerHtml doesn't contain any <a> tags already encompassing any of the potential matches.
In this case, it's as simple as doing something like this:
node.InnerHtml = Regex.Replace(node.InnerHtml, "[your pattern here]", "<a href='query=$&'>$&</a>");
...
doc.Save("output.html");
Note, that you may need to work on the href component - I'm unsure how your link should be built.
you match text and replace:
<script>
var s = '...';
var matchs = s.match(/\d{2,3}\/\d{4}\/[a-zA-Z\-áàảãạăâắằấầặẵẫậéèẻẽẹêếềểễệóòỏõọôốồổỗộơớờởỡợíìỉĩịđùúủũụưứửữựÀÁÂÃÈÉÊÌÍÒÓÔÕÙÚĂĐĨŨƠƯĂẠẢẤẦẨẪẬẮẰẲẴẶẸẺẼÊỀỂỄỆỈỊỌỎỐỒỔỖỘỚỜỞỠỢỤỨỪỬỮỰỲỴÝỶỸửữựỵỷỹ]+/gi);
if (matchs != null) {
for(var i=0; i<matchs.length;i++){
var val = matchs[i];
s = s.replace(val, '<a href="?key=' + val + '"/>' + val + '</a>');
}
}
document.write(s);
</script>
#Shaamaan thank for your advice. After few hours of coding, it works now
var content = doc.DocumentNode.SelectSingleNode("//div[#class='main-content']");
var items = content.SelectNodes(".//text()[normalize-space(.) != '']");
foreach (HtmlNode node in items)
{
if (!matchLegals.IsMatch(node.InnerText) || node.ParentNode.Name == "a")
{
continue;
}
var texts = node.InnerHtml.Trim();
node.InnerHtml = matchLegals.Replace(texts, a => string.Format("<a href='/search?q={0}'>{0}</a>",a.Value));
}

Passing the lat, lng information to the google map script

I've have an index.html file that includes a gogglemap.js file to display a map of users location. Currently I am attempting to add the proper code to the index.html to pass the lat, lng info to the js file.
Here is filler content for the index file to show what I am attempting to do:
<h3><display user city></h3> <---- this needs to display users city and has filler text to show what I am trying to accomplish.
<div id="map"></div>
<script>var lat=12.356;var lng=-19.31;var country="User Country";var city="User City";</script> <----- seems like it is getting the lat/lng somehow before the index page loads and inserting this script into the index file?
Here is the js file code:
var styles = [{yourcustomstyle}]
var myLatlng = { lat: lat, lng: lng };
function initialize() {
var mapOptions = {
zoom: 12,
center: myLatlng,
styles: styles,
panControl: false,
zoomControl: false,
mapTypeControl: false,
scaleControl: false,
streetViewControl: false,
overviewMapControl: false
};
var map = new google.maps.Map( document.getElementById('map'), mapOptions );
for (var a = 0; a < 6; a++) {
c = Math.random();
c = c * (0 == 1E6 * c % 2 ? 1 : -1);
d = Math.random()
d = d * (0 == 1E6 * d % 2 ? 1 : -1);
c = new google.maps.LatLng( lat + 0.08 * c + 0.052, lng + 0.2 * d + 0.08),
marker = new google.maps.Marker({
map: map,
position: c,
icon: 'marker.png'
});
}
}
function loadScript() {
var script = document.createElement('script');
script.type = 'text/javascript';
script.src = 'https://maps.googleapis.com/maps/api/js?v=3.exp&callback=initialize';
document.body.appendChild( script );
}
window.onload = loadScript;

powershell and regex

any ideas??
I have this code in hundreds of pages I need to remove it from all these pages but because of the " var s " where everypage is a different 4 digit number i cant just do a find and replace replace with nothing
.
How can I create a powershell command using regex on those four digits.
<script language="javascript" type="text/javascript">
var d = '<%=joindomain%>';
var s = '9244';
var a = '<%=Request.QueryString("aff") %>';
var c = '<%=Request.QueryString("camp") %>';
var r = '<%=referer %>';
</script>
Thank you
Rico
Not sure how general you can be here... Code below may do more than you ask for, if that is the case - just add more things to disambiguate it. Anyway, it did the job for me... ;)
#'
foo
<script language="javascript" type="text/javascript">
var d = '<%=joindomain%>';
var s = '9244';
var a = '<%=Request.QueryString("aff") %>';
var c = '<%=Request.QueryString("camp") %>';
var r = '<%=referer %>';
</script>
bar
'# -replace '<script language[\s\S]*?var s = ''\d{4}''[\s\S]*?</script>'
it will remove any piece of text between "" that contains string "var s = '####'" (4 digits). Notice that I would have to escape things for regex and single quotes for it to work. At some point you may consider [Regex]::Escape method handy, if you want to be very specific...