Matching exactly two consecutive spaces in Google Apps Scripts - regex

I'm trying to match exactly two consecutive spaces using DocumentApp.getActiveDocument().getBody().replaceText() and replace them with a single space.
Unfortunately, it only supports some of regex (https://github.com/google/re2/wiki/Syntax).
I've tried DocumentApp.getActiveDocument().getBody().replaceText("[^ ] {2}[^ ]", " ") but that matches the characters sourrounding the text aswell.
I've tried DocumentApp.getActiveDocument().getBody().replaceText("([^ ]) {2}([^ ])", "$1 $2") but this outputs "$1 $2" rather then "character character"
I've tried DocumentApp.getActiveDocument().getBody().replaceText(" {2}", " ") but that also matches two spaces within a greater group of spaces.

It was difficult (for me) to write a single regular expression for required replacements, because surrounding characters (non-spaces) were also replaced each time. Moreover, in general case we should take into account special cases when spaces position is at the very beginning of the string or at the end.
As a result I suggest 2 functions for all kinds of replacements below:
function replaceDoubleSpace() {
var body = DocumentApp.getActiveDocument().getBody();
var count = replaceWithPattern('^ $', body);
Logger.log(count + ' replacement(s) done for the entire string');
count = replaceWithPattern('[^ ]{1} [^ ]{1}', body);
Logger.log(count + ' replacement(s) done inside the string');
count = replaceWithPattern('^ [^ ]{1}', body);
Logger.log(count + ' replacement(s) done at the beginning of the string');
count = replaceWithPattern('[^ ]{1} $', body);
Logger.log(count + ' replacement(s) done at the end of the string');
}
function replaceWithPattern(pat, body) {
var patterns = [];
var count = 0;
while (true) {
var range = body.findText(pat);
if (range == null) break;
var text = range.getElement().asText().getText();
var pos = range.getStartOffset() + 1;
text = text.substring(0, pos) + text.substring(pos + 1);
range.getElement().asText().setText(text);
count++;
}
return count;
}
Of course, the first function may be simplified, but it becomes less readable in this case:
function replaceDoubleSpace() {
var body = DocumentApp.getActiveDocument().getBody();
var count = replaceWithPattern('^ $|[^ ]{1} [^ ]{1}|^ [^ ]{1}|[^ ]{1} $', body);
Logger.log(count + ' replacement(s) done');
}

Related

Regexp help - is this possible at all with regexp?

I'm still struggling with regexp, wondering if this is at all possible.
I need to parse variable names from expression, but I need to skip ones within string literals and ones after "dot".
so for expression like:
'test' + (n + text.length)
I would like to get only n and text.
I'm using /([a-z_][a-z0-9_]*)/gi
but it gives me test,n,text,length
Thanks for help:)
If your input is not too complicated, here is a possible regex option:
var re = /'[^'\\]*(?:\\.[^'\\]*)*'|"[^"\\]*(?:\\.[^"\\]*)*"|(?:^|[^.])\b(\w+)/g;
var str = '\'test\\\' this\' + "Missing \\\"here\\\"" + (n + text.length)';
document.body.innerHTML = "Testing string: <b>" + str + "</b><br/>";
var res = [];
while ((m = re.exec(str)) !== null) {
if (m[1]) { res.push(m[1]); }
}
document.body.innerHTML += JSON.stringify(res, 0, 4);
The regex details:
'[^'\\]*(?:\\.[^'\\]*)*' - single quoted string literals (supporting escaped sequences)
| - or
"[^"\\]*(?:\\.[^"\\]*)*" - double quoted string literals (supporting escaped sequences)
| - or
(?:^|[^.])\b(\w+) - 1+ word characters that are either right at the string start or after a non-dot and preceded with a word boundary (placed inside Group 1)
See the regex demo.

regex equals to something exactly but does not equal to something else

my regex query below does an exact match of a word say Bob or Bill for example
var regExp = new RegExp("^" + inputVal + "$", 'i');
what i want it to do is match anything exactly (Bob or Bill Etc) but not match Fred
so match anything exactly except for Fred, does that make sense?
anyone help me out as to how i do that?
Thanks
EDIT 2:
i thought id show my actual script instead, what im doing is searching a table, and im page load i want to hide rows that contain a string. so if exlucde lenght is greater than 0 hide that row...
function searchPagingTable(inputVal, tablename, fixedsearch, exclude) {
var table = $(tablename);
table.find('tr:not(.header)').each(function (index, row) {
var allCells = $(row).find('td');
if (allCells.length > 0) {
var found = false;
allCells.each(function (index, td) {
if (fixedsearch == 1) {
var regExp = new RegExp("^" + inputVal + "$", 'i');
}
else if (exclude.length > 0)
{
var regExp = new RegExp("^(?!" + exclude + ")", "i");
}
else {
var regExp = new RegExp(inputVal, 'i');
}
if (regExp.test($(td).text())) {
found = true;
return false;
}
});
if (found == true) $(row).show().removeClass('exclude'); else $(row).hide().addClass('exclude');
}
});
pa
ginate();
}
That would be
var exclude = "Fred"
var regExp = new RegExp("^(?!.*" + exclude + ")", "i");
This regex matches any string except those that contain Fred. It doesn't actually match any characters in the string, but that's sufficient if you're just looking for a true/false result.
This will also find strings that contain Alfred or Fredo, so if you don't want that, you need to tell the regex only to look for entire words using word boundaries:
var regExp = new RegExp("^(?!.*\\b" + exclude + "\\b)", "i");
You need to make sure that your exclude string only contains ASCII letters/digits (or underscores) for this to work correctly.
You could populate a list of names you wish to match against:
var validNames = ['bob', 'bill'];
Then lowercase each input and match against the list:
if (validNames.indexOf(inputVal.toLowerCase()) != -1) {
// it's a good name
}
For older browsers you have to shim Array.indexOf()
var re = new RegExp('^\\s*Fred\\s*$','i');
if (inputVal.match(re)) {
// Fred has been found
} else {
// Anything has been found
}

Regex exclude exact digit from digits

Hello my fellow dream builders.
I am parsing time from twitter and I am using this regex:
{
match = /^[1]/.exec(obj.tweetTime);
if(match != null){
time = "1 hour ago";
}
else
{
match = /^[0-9]{1,2}/.exec(obj.tweetTime);
time = match + " hours ago";
}
}
My question is, if there is simpler way to do this? As you can see, I have 2 digits for time. I just want to format my print right. Hour/Hours as you can see.
Is it possible to write only 1 regex and use only 1 conditional bracket?
PS: I am beginner at regex, and I know /^[0-9]{1,2}/ allow numbers from 0 to 99 practically, but as I said it works for my needs, just asking if it is possible to do this properly, since I lack knowledge.
Thank you, much love <3
I would do it like this:
var match = obj.tweetTime.match(/^\d+$/);
if (match) {
var time = match[0] + ' hour' + (match[0] == 1 ? '' : 's') + ' ago';
}
EDIT Turns out the string is formatted! In which case:
var match = obj.tweetTime.match(/^(\d+)([smhd])$/);
if (match) {
var units = { s: 'second', m: 'minute', h: 'hour', d: 'day' },
time = match[1] + ' ' + units[match[2]] + (match[1] == 1 ? '' : 's') + ' ago';
}
To explain the regex:
^ Anchor matches to the beginning of the string
(\d+) Capture one or more digits in first group
([smhd]) Capture s, m, h or d in second group
$ Anchor to end of string

Regex for the string with '#'

I wondering how should be the regex string for the string containig '#'
e.g.
abc#def#ghj#ijk
I wanna get
#def
#ghj
#ijk
I tried #[\S]+ but it selects the whole #def#ghj#ijk Any ideas ?
Edit
The code below selects only #Me instead of #MessageBox. Why ?
var m = new RegExp('#[^\s#]+').exec('http://localhost/Lorem/10#MessageBox');
if (m != null) {
var s = '';
for (i = 0; i < m.length; i++) {
s = s + m[i] + "\n";
}
}
Edit 2
the double backslash solved that problem. '#[^\\s#]+'
Try #[^\s#]+ to match # followed by a sequence of one or mor characters which are neither # nor whitespace.
Match all characters that are not #:
#[^#]+

Regular expression to match word pairs joined with colons

I don't know regular expression at all. Can anybody help me with one very simple regular expression which is,
extracting 'word:word' from a sentence. e.g "Java Tutorial Format:Pdf With Location:Tokyo Javascript"?
Little modification:
the first 'word' is from a list but second is anything. "word1 in [ABC, FGR, HTY]"
guys situation demands a little more
modification.
The matching form can be "word11:word12 word13 .. " till the next "word21: ... " .
things are becoming complex with sec.....i have to learn reg ex :(
thanks in advance.
You can use the regex:
\w+:\w+
Explanation:
\w - single char which is either a letter(uppercase or lowercase), digit or a _.
\w+ - one or more of above char..basically a word
so \w+:\w+
would match a pair of words separated by a colon.
Try \b(\S+?):(\S+?)\b. Group 1 will capture "Format" and group 2, "Pdf".
A working example:
<html>
<head>
<script type="text/javascript">
function test() {
var re = /\b(\S+?):(\S+?)\b/g; // without 'g' matches only the first
var text = "Java Tutorial Format:Pdf With Location:Tokyo Javascript";
var match = null;
while ( (match = re.exec(text)) != null) {
alert(match[1] + " -- " + match[2]);
}
}
</script>
</head>
<body onload="test();">
</body>
</html>
A good reference for regexes is https://developer.mozilla.org/en/Core_JavaScript_1.5_Reference/Global_Objects/RegExp
Use this snippet :
$str=" this is pavun:kumar hello world bk:systesm" ;
if ( preg_match_all ( '/(\w+\:\w+)/',$str ,$val ) )
{
print_r ( $val ) ;
}
else
{
print "Not matched \n";
}
Continuing Jaú's function with your additional requirement:
function test() {
var words = ['Format', 'Location', 'Size'],
text = "Java Tutorial Format:Pdf With Location:Tokyo Language:Javascript",
match = null;
var re = new RegExp( '(' + words.join('|') + '):(\\w+)', 'g');
while ( (match = re.exec(text)) != null) {
alert(match[1] + " = " + match[2]);
}
}
I am currently solving that problem in my nodejs app and found that this is, what I guess, suitable for colon-paired wordings:
([\w]+:)("(([^"])*)"|'(([^'])*)'|(([^\s])*))
It also matches quoted value. like a:"b" c:'d e' f:g
Example coding in es6:
const regex = /([\w]+:)("(([^"])*)"|'(([^'])*)'|(([^\s])*))/g;
const str = `category:"live casino" gsp:S1aik-UBnl aa:"b" c:'d e' f:g`;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Example coding in PHP
$re = '/([\w]+:)("(([^"])*)"|\'(([^\'])*)\'|(([^\s])*))/';
$str = 'category:"live casino" gsp:S1aik-UBnl aa:"b" c:\'d e\' f:g';
preg_match_all($re, $str, $matches, PREG_SET_ORDER, 0);
// Print the entire match result
var_dump($matches);
You can check/test your regex expressions using this online tool: https://regex101.com
Btw, if not deleted by regex101.com, you can browse that example coding here
here's the non regex way, in your favourite language, split on white spaces, go through the element, check for ":" , print them if found. Eg Python
>>> s="Java Tutorial Format:Pdf With Location:Tokyo Javascript"
>>> for i in s.split():
... if ":" in i:
... print i
...
Format:Pdf
Location:Tokyo
You can do further checks to make sure its really "someword:someword" by splitting again on ":" and checking if there are 2 elements in the splitted list. eg
>>> for i in s.split():
... if ":" in i:
... a=i.split(":")
... if len(a) == 2:
... print i
...
Format:Pdf
Location:Tokyo
([^:]+):(.+)
Meaning: (everything except : one or more times), :, (any character one ore more time)
You'll find good manuals on the net... Maybe it's time for you to learn...