I would like to match with RegExp a number between X and Y. Is that possible?
([0-9]+) will match any number, how could I do to match a number between, for instance, 110 and 2234?
According to Generate a Regular Expression to Match an Arbitrary Numeric Range, and after generating such a regex for your example at Regex_For_Range:
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
would do the trick.
The process would be (still following that Regex generator):
First, break into equal length ranges:
110 - 999
1000 - 2234
Second, break into ranges that yield simple regexes:
110 - 199
200 - 999
1000 - 1999
2000 - 2199
2200 - 2229
2230 - 2234
Turn each range into a regex:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Collapse adjacent powers of 10:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Combining the regexes above yields:
0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])
Next we'll try factoring out common prefixes using a tree:
Parse into tree based on regex prefixes:
. 1 [1-9] [0-9]
+ [0-9]{3}
+ [2-9] [0-9]{2}
+ 2 [01] [0-9]{2}
+ 2 [0-2] [0-9]
+ 3 [0-4]
Turning the parse tree into a regex yields:
0*(1([1-9][0-9]|[0-9]{3})|[2-9][0-9]{2}|2([01][0-9]{2}|2([0-2][0-9]|3[0-4])))
We choose the shorter one as our result.
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
This is not the sort of thing regexes excel at. You will probably find it easier to ensure that you have the right number of digits /^([0-9]{3,4})$/ and then do further checks against the capture.
• Numeric range regex generator
Since online number range regex generator services often become unavailable after some period of time (this one is still alive at the time of writing the post), I think it would be nice to have it here.
How-to:
Scroll to the bottom of this answer
Click Run code snippet
Make sure you type the minimum and maximum threshold values in the text input fields, mark all the options you need below and hit Generate button:
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).toRegexRange=e()}}(function(){return function(){return function e(t,n,r){function i(u,a){if(!n[u]){if(!t[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(o)return o(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var f=n[u]={exports:{}};t[u][0].call(f.exports,function(e){return i(t[u][1][e]||e)},f,f.exports,e,t,n,r)}return n[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}}()({1:[function(e,t,n){"use strict";const r=e("is-number"),i=(e,t,n)=>{if(!1===r(e))throw new TypeError("toRegexRange: expected the first argument to be a number");if(void 0===t||e===t)return String(e);if(!1===r(t))throw new TypeError("toRegexRange: expected the second argument to be a number.");let o={relaxZeros:!0,...n};"boolean"==typeof o.strictZeros&&(o.relaxZeros=!1===o.strictZeros);let s=e+":"+t+"="+String(o.relaxZeros)+String(o.shorthand)+String(o.capture)+String(o.wrap);if(i.cache.hasOwnProperty(s))return i.cache[s].result;let c=Math.min(e,t),f=Math.max(e,t);if(1===Math.abs(c-f)){let n=e+"|"+t;return o.capture?`(${n})`:!1===o.wrap?n:`(?:${n})`}let l=h(e)||h(t),d={min:e,max:t,a:c,b:f},p=[],g=[];if(l&&(d.isPadded=l,d.maxLen=String(d.max).length),c<0){g=u(f<0?Math.abs(f):1,Math.abs(c),d,o),c=d.a=0}return f>=0&&(p=u(c,f,d,o)),d.negatives=g,d.positives=p,d.result=function(e,t,n){let r=a(e,t,"-",!1,n)||[],i=a(t,e,"",!1,n)||[],o=a(e,t,"-?",!0,n)||[];return r.concat(o).concat(i).join("|")}(g,p,o),!0===o.capture?d.result=`(${d.result})`:!1!==o.wrap&&p.length+g.length>1&&(d.result=`(?:${d.result})`),i.cache[s]=d,d.result};function o(e,t,n){if(e===t)return{pattern:e,count:[],digits:0};let r=function(e,t){let n=[];for(let r=0;r<e.length;r++)n.push([e[r],t[r]]);return n}(e,t),i=r.length,o="",u=0;for(let e=0;e<i;e++){let[t,i]=r[e];t===i?o+=t:"0"!==t||"9"!==i?o+=p(t,i,n):u++}return u&&(o+=!0===n.shorthand?"\\d":"[0-9]"),{pattern:o,count:[u],digits:i}}function u(e,t,n,r){let i,u=function(e,t){let n=1,r=1,i=f(e,n),o=new Set([t]);for(;e<=i&&i<=t;)o.add(i),i=f(e,n+=1);for(i=l(t+1,r)-1;e<i&&i<=t;)o.add(i),i=l(t+1,r+=1)-1;return(o=[...o]).sort(s),o}(e,t),a=[],c=e;for(let e=0;e<u.length;e++){let t=u[e],s=o(String(c),String(t),r),f="";n.isPadded||!i||i.pattern!==s.pattern?(n.isPadded&&(f=g(t,n,r)),s.string=f+s.pattern+d(s.count),a.push(s),c=t+1,i=s):(i.count.length>1&&i.count.pop(),i.count.push(s.count[0]),i.string=i.pattern+d(i.count),c=t+1)}return a}function a(e,t,n,r,i){let o=[];for(let i of e){let{string:e}=i;r||c(t,"string",e)||o.push(n+e),r&&c(t,"string",e)&&o.push(n+e)}return o}function s(e,t){return e>t?1:t>e?-1:0}function c(e,t,n){return e.some(e=>e[t]===n)}function f(e,t){return Number(String(e).slice(0,-t)+"9".repeat(t))}function l(e,t){return e-e%Math.pow(10,t)}function d(e){let[t=0,n=""]=e;return n||t>1?`{${t+(n?","+n:"")}}`:""}function p(e,t,n){return`[${e}${t-e==1?"":"-"}${t}]`}function h(e){return/^-?(0+)\d/.test(e)}function g(e,t,n){if(!t.isPadded)return e;let r=Math.abs(t.maxLen-String(e).length),i=!1!==n.relaxZeros;switch(r){case 0:return"";case 1:return i?"0?":"0";case 2:return i?"0{0,2}":"00";default:return i?`0{0,${r}}`:`0{${r}}`}}i.cache={},i.clearCache=(()=>i.cache={}),t.exports=i},{"is-number":2}],2:[function(e,t,n){"use strict";t.exports=function(e){return"number"==typeof e?e-e==0:"string"==typeof e&&""!==e.trim()&&(Number.isFinite?Number.isFinite(+e):isFinite(+e))}},{}]},{},[1])(1)});
$( document ).ready( function() {
$( "#rangeLeft, #rangeRight" ).keydown( function() {
clearDisplay();
} );
$('#wholestring').click(function() {
$('#wholestring').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#wb').click(function() {
$('#wb').attr('checked', 'checked');
$('#wholestring').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#dgtb').click(function() {
$('#dgtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#wholestring').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#whtb').click(function() {
$('#whtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#wholestring').attr('checked', false);
})
$( "#run" ).click( function() {
clearDisplay();
var rangeLeft = $( "#rangeLeft" ).val();
var rangeRight = $( "#rangeRight" ).val();
if ( ! checkRanges( rangeLeft, rangeRight ) ) return;
let source = toRegexRange(rangeLeft, rangeRight);
if ($('#frac').is(':checked')) {
source = source + '(?:\\.\\d+)?';
}
if ($('#allowzero').is(':checked')) {
source = "0*" + source;
}
if ($('#neg').is(':checked')) {
source = "-?" + source;
}
if ( $('#wholestring').is(':checked')) {
source = '^' + source + '$';
} else if ( $('#wb').is(':checked')) {
source = '\\b' + source + '\\b';
} else if ( $('#whtb').is(':checked')) {
source = '(?<!\\S)' + source + '(?!\\S)';
} else if ( $('#dgtb').is(':checked')) {
source = '(?<!\\d)' + source + '(?!\\d)';
}
$( "#result" ).append( "<B>" + source.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') + "</B><BR/>" );
} );
} );
function checkRanges( rangeLeft, rangeRight ) {
if ( /\D/.test( rangeLeft ) || /\D/.test( rangeRight ) ) {
$( "#result" ).append( "Type two numbers<BR/>" );
return false;
}
rangeLeft = parseInt( rangeLeft );
rangeRight = parseInt( rangeRight );
if ( isNaN( rangeLeft ) || isNaN( rangeRight ) ) $( "#result" ).append( "Range boundaries are not specified<BR/>" );
if ( rangeLeft < 0 ) $( "#result" ).append( "Left boundary is less than 0<BR/>" );
if ( rangeRight < 0 ) $( "#result" ).append( "Right boundary is less than 0<BR/>" );
if ( rangeLeft > rangeRight ) $( "#result" ).append( "Left boundary is greater than the right boundary<BR/>" );
return( !(
rangeLeft < 0 ||
rangeRight < 0 ||
rangeLeft > rangeRight ||
isNaN( rangeLeft ) ||
isNaN( rangeRight )
) );
}
function clearDisplay() {
$( "#result" ).html( "" );
$( "#test" ).hide();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Type in minimum and maximum values and click <i>Generate</i>.</p>
<INPUT id="rangeLeft" value=1 /> - <INPUT id="rangeRight" value=365 />
<BR/>
<BUTTON id="run">Generate</BUTTON>
<pre><code id="result" /></pre>
<div>
<label><input type="checkbox" id="wholestring"/>Match whole string</label> <br/>
<label><input type="checkbox" id="wb"/>Match within word boundaries</label><br/>
<label><input type="checkbox" id="dgtb"/>Match when not enclosed with digits</label><br/>
<label><input type="checkbox" id="whtb"/>Match when enclosed with whitespaces or start/end of string</label><br/>
<label><input type="checkbox" id="allowzero"/>Allow leading zeros</label><br/>
<label><input type="checkbox" id="neg"/>Optionally match negative numbers</label><br/>
<label><input type="checkbox" id="frac"/>Optionally match fractional digits (floats)</label><br/>
</div>
Most of the JavaScript code here is borrowed from Алгоритм для преобразования диапазона номеров в регулярное выражение and to-regex-range npm library.
While you could do it with some absurd looking regex (as VonC answered), regex really isn't supposed to do this.. Why not defer the number checking to the redirected-to-script?
If numbers 110-2234 go to script1, and 1-109 go to script2, it would be much simpler to direct all numbers at a router script, and have it redirect to the correct location (via HTTP redirects)..
In .htaccess:
RewriteRule ^view/([0-9]+)/?$ router.php?page=$1 [L]
..then in router.php, something like:
<?PHP
if(
int($_GET['page']) > 110 &&
int($_GET['page']) < 2234
){
header("Status: 301 Moved Permanently\nLocation: /script1");
}else{
header("Status: 404 Not Found");
}
?>
You can put the regexes for the following ranges together:
1[1-9]\d = 110-199
[2-9]\d\d = 200-999
1\d\d\d = 1000-1999
2[0-1]\d\d= 2000-2199
22[0-2]\d = 2200-2229
223[0-4] = 2230-2234
to form:
(1[1-9]\d|[2-9]\d\d|1\d\d\d|2[0-1]\d\d|22[0-2]\d|223[0-4])
\d means [0-9], but in three less characters
It's possible allbeit not pretty.
\b(?:[1][1][0-9]|1\d{3}|223[0-4]|2[0-1]\d\d|2[0-2][0-3][0-4])\b
I emailed Phillip Hazel, the author of PCRE, in 2006 what he thought of math's in regex:
Perhaps this lies out of the scope of the project in your view: The ability to treat numbers as being numbers and not text, this would definitely be a worthwhile feature.
Allowing you to do some basic math checks on matched digits, like: is the 2nd matched digit higher or lower, is the third digit a multiple off the 1st, and many more complicated cases I won't elaborate on just to get my point accross.
Do you feel this exceeds the realm of textmatching?
to which I got the following reply:
Yes, I think I do, and also, it is not
something that is available in Perl
regular expressions. I know that PCRE
does have some extensions from Perl,
but nothing as major as that (you
could perhaps hack something up using
callouts, but that would be a bit ad
hoc, and no doubt exceedingly messy!).
Philip
and I couldn't agree more now in `09. Just match all numbers and do number validation in whatever language you're doing the matching with.
Also if you want to locate or find if your path name contains a year, and take it out as a string, you can try something like this:
path1 = r'X:\S\Something_2019\y2019\AB19778_description\subfolder1\subfolder2'
find = re.findall(r'.*(y[1-2][0,9][0-9]{2})', path1)
mystring = find[0]
print(mystring)
Checks in 'path1' if there is year string with format 'yYYYY'. So with letter 'y' as prefix (relevant for my study case).
This will return string 'y2019'.
Related
I would like to match with RegExp a number between X and Y. Is that possible?
([0-9]+) will match any number, how could I do to match a number between, for instance, 110 and 2234?
According to Generate a Regular Expression to Match an Arbitrary Numeric Range, and after generating such a regex for your example at Regex_For_Range:
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
would do the trick.
The process would be (still following that Regex generator):
First, break into equal length ranges:
110 - 999
1000 - 2234
Second, break into ranges that yield simple regexes:
110 - 199
200 - 999
1000 - 1999
2000 - 2199
2200 - 2229
2230 - 2234
Turn each range into a regex:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Collapse adjacent powers of 10:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Combining the regexes above yields:
0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])
Next we'll try factoring out common prefixes using a tree:
Parse into tree based on regex prefixes:
. 1 [1-9] [0-9]
+ [0-9]{3}
+ [2-9] [0-9]{2}
+ 2 [01] [0-9]{2}
+ 2 [0-2] [0-9]
+ 3 [0-4]
Turning the parse tree into a regex yields:
0*(1([1-9][0-9]|[0-9]{3})|[2-9][0-9]{2}|2([01][0-9]{2}|2([0-2][0-9]|3[0-4])))
We choose the shorter one as our result.
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
This is not the sort of thing regexes excel at. You will probably find it easier to ensure that you have the right number of digits /^([0-9]{3,4})$/ and then do further checks against the capture.
• Numeric range regex generator
Since online number range regex generator services often become unavailable after some period of time (this one is still alive at the time of writing the post), I think it would be nice to have it here.
How-to:
Scroll to the bottom of this answer
Click Run code snippet
Make sure you type the minimum and maximum threshold values in the text input fields, mark all the options you need below and hit Generate button:
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).toRegexRange=e()}}(function(){return function(){return function e(t,n,r){function i(u,a){if(!n[u]){if(!t[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(o)return o(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var f=n[u]={exports:{}};t[u][0].call(f.exports,function(e){return i(t[u][1][e]||e)},f,f.exports,e,t,n,r)}return n[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}}()({1:[function(e,t,n){"use strict";const r=e("is-number"),i=(e,t,n)=>{if(!1===r(e))throw new TypeError("toRegexRange: expected the first argument to be a number");if(void 0===t||e===t)return String(e);if(!1===r(t))throw new TypeError("toRegexRange: expected the second argument to be a number.");let o={relaxZeros:!0,...n};"boolean"==typeof o.strictZeros&&(o.relaxZeros=!1===o.strictZeros);let s=e+":"+t+"="+String(o.relaxZeros)+String(o.shorthand)+String(o.capture)+String(o.wrap);if(i.cache.hasOwnProperty(s))return i.cache[s].result;let c=Math.min(e,t),f=Math.max(e,t);if(1===Math.abs(c-f)){let n=e+"|"+t;return o.capture?`(${n})`:!1===o.wrap?n:`(?:${n})`}let l=h(e)||h(t),d={min:e,max:t,a:c,b:f},p=[],g=[];if(l&&(d.isPadded=l,d.maxLen=String(d.max).length),c<0){g=u(f<0?Math.abs(f):1,Math.abs(c),d,o),c=d.a=0}return f>=0&&(p=u(c,f,d,o)),d.negatives=g,d.positives=p,d.result=function(e,t,n){let r=a(e,t,"-",!1,n)||[],i=a(t,e,"",!1,n)||[],o=a(e,t,"-?",!0,n)||[];return r.concat(o).concat(i).join("|")}(g,p,o),!0===o.capture?d.result=`(${d.result})`:!1!==o.wrap&&p.length+g.length>1&&(d.result=`(?:${d.result})`),i.cache[s]=d,d.result};function o(e,t,n){if(e===t)return{pattern:e,count:[],digits:0};let r=function(e,t){let n=[];for(let r=0;r<e.length;r++)n.push([e[r],t[r]]);return n}(e,t),i=r.length,o="",u=0;for(let e=0;e<i;e++){let[t,i]=r[e];t===i?o+=t:"0"!==t||"9"!==i?o+=p(t,i,n):u++}return u&&(o+=!0===n.shorthand?"\\d":"[0-9]"),{pattern:o,count:[u],digits:i}}function u(e,t,n,r){let i,u=function(e,t){let n=1,r=1,i=f(e,n),o=new Set([t]);for(;e<=i&&i<=t;)o.add(i),i=f(e,n+=1);for(i=l(t+1,r)-1;e<i&&i<=t;)o.add(i),i=l(t+1,r+=1)-1;return(o=[...o]).sort(s),o}(e,t),a=[],c=e;for(let e=0;e<u.length;e++){let t=u[e],s=o(String(c),String(t),r),f="";n.isPadded||!i||i.pattern!==s.pattern?(n.isPadded&&(f=g(t,n,r)),s.string=f+s.pattern+d(s.count),a.push(s),c=t+1,i=s):(i.count.length>1&&i.count.pop(),i.count.push(s.count[0]),i.string=i.pattern+d(i.count),c=t+1)}return a}function a(e,t,n,r,i){let o=[];for(let i of e){let{string:e}=i;r||c(t,"string",e)||o.push(n+e),r&&c(t,"string",e)&&o.push(n+e)}return o}function s(e,t){return e>t?1:t>e?-1:0}function c(e,t,n){return e.some(e=>e[t]===n)}function f(e,t){return Number(String(e).slice(0,-t)+"9".repeat(t))}function l(e,t){return e-e%Math.pow(10,t)}function d(e){let[t=0,n=""]=e;return n||t>1?`{${t+(n?","+n:"")}}`:""}function p(e,t,n){return`[${e}${t-e==1?"":"-"}${t}]`}function h(e){return/^-?(0+)\d/.test(e)}function g(e,t,n){if(!t.isPadded)return e;let r=Math.abs(t.maxLen-String(e).length),i=!1!==n.relaxZeros;switch(r){case 0:return"";case 1:return i?"0?":"0";case 2:return i?"0{0,2}":"00";default:return i?`0{0,${r}}`:`0{${r}}`}}i.cache={},i.clearCache=(()=>i.cache={}),t.exports=i},{"is-number":2}],2:[function(e,t,n){"use strict";t.exports=function(e){return"number"==typeof e?e-e==0:"string"==typeof e&&""!==e.trim()&&(Number.isFinite?Number.isFinite(+e):isFinite(+e))}},{}]},{},[1])(1)});
$( document ).ready( function() {
$( "#rangeLeft, #rangeRight" ).keydown( function() {
clearDisplay();
} );
$('#wholestring').click(function() {
$('#wholestring').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#wb').click(function() {
$('#wb').attr('checked', 'checked');
$('#wholestring').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#dgtb').click(function() {
$('#dgtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#wholestring').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#whtb').click(function() {
$('#whtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#wholestring').attr('checked', false);
})
$( "#run" ).click( function() {
clearDisplay();
var rangeLeft = $( "#rangeLeft" ).val();
var rangeRight = $( "#rangeRight" ).val();
if ( ! checkRanges( rangeLeft, rangeRight ) ) return;
let source = toRegexRange(rangeLeft, rangeRight);
if ($('#frac').is(':checked')) {
source = source + '(?:\\.\\d+)?';
}
if ($('#allowzero').is(':checked')) {
source = "0*" + source;
}
if ($('#neg').is(':checked')) {
source = "-?" + source;
}
if ( $('#wholestring').is(':checked')) {
source = '^' + source + '$';
} else if ( $('#wb').is(':checked')) {
source = '\\b' + source + '\\b';
} else if ( $('#whtb').is(':checked')) {
source = '(?<!\\S)' + source + '(?!\\S)';
} else if ( $('#dgtb').is(':checked')) {
source = '(?<!\\d)' + source + '(?!\\d)';
}
$( "#result" ).append( "<B>" + source.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') + "</B><BR/>" );
} );
} );
function checkRanges( rangeLeft, rangeRight ) {
if ( /\D/.test( rangeLeft ) || /\D/.test( rangeRight ) ) {
$( "#result" ).append( "Type two numbers<BR/>" );
return false;
}
rangeLeft = parseInt( rangeLeft );
rangeRight = parseInt( rangeRight );
if ( isNaN( rangeLeft ) || isNaN( rangeRight ) ) $( "#result" ).append( "Range boundaries are not specified<BR/>" );
if ( rangeLeft < 0 ) $( "#result" ).append( "Left boundary is less than 0<BR/>" );
if ( rangeRight < 0 ) $( "#result" ).append( "Right boundary is less than 0<BR/>" );
if ( rangeLeft > rangeRight ) $( "#result" ).append( "Left boundary is greater than the right boundary<BR/>" );
return( !(
rangeLeft < 0 ||
rangeRight < 0 ||
rangeLeft > rangeRight ||
isNaN( rangeLeft ) ||
isNaN( rangeRight )
) );
}
function clearDisplay() {
$( "#result" ).html( "" );
$( "#test" ).hide();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Type in minimum and maximum values and click <i>Generate</i>.</p>
<INPUT id="rangeLeft" value=1 /> - <INPUT id="rangeRight" value=365 />
<BR/>
<BUTTON id="run">Generate</BUTTON>
<pre><code id="result" /></pre>
<div>
<label><input type="checkbox" id="wholestring"/>Match whole string</label> <br/>
<label><input type="checkbox" id="wb"/>Match within word boundaries</label><br/>
<label><input type="checkbox" id="dgtb"/>Match when not enclosed with digits</label><br/>
<label><input type="checkbox" id="whtb"/>Match when enclosed with whitespaces or start/end of string</label><br/>
<label><input type="checkbox" id="allowzero"/>Allow leading zeros</label><br/>
<label><input type="checkbox" id="neg"/>Optionally match negative numbers</label><br/>
<label><input type="checkbox" id="frac"/>Optionally match fractional digits (floats)</label><br/>
</div>
Most of the JavaScript code here is borrowed from Алгоритм для преобразования диапазона номеров в регулярное выражение and to-regex-range npm library.
While you could do it with some absurd looking regex (as VonC answered), regex really isn't supposed to do this.. Why not defer the number checking to the redirected-to-script?
If numbers 110-2234 go to script1, and 1-109 go to script2, it would be much simpler to direct all numbers at a router script, and have it redirect to the correct location (via HTTP redirects)..
In .htaccess:
RewriteRule ^view/([0-9]+)/?$ router.php?page=$1 [L]
..then in router.php, something like:
<?PHP
if(
int($_GET['page']) > 110 &&
int($_GET['page']) < 2234
){
header("Status: 301 Moved Permanently\nLocation: /script1");
}else{
header("Status: 404 Not Found");
}
?>
You can put the regexes for the following ranges together:
1[1-9]\d = 110-199
[2-9]\d\d = 200-999
1\d\d\d = 1000-1999
2[0-1]\d\d= 2000-2199
22[0-2]\d = 2200-2229
223[0-4] = 2230-2234
to form:
(1[1-9]\d|[2-9]\d\d|1\d\d\d|2[0-1]\d\d|22[0-2]\d|223[0-4])
\d means [0-9], but in three less characters
It's possible allbeit not pretty.
\b(?:[1][1][0-9]|1\d{3}|223[0-4]|2[0-1]\d\d|2[0-2][0-3][0-4])\b
I emailed Phillip Hazel, the author of PCRE, in 2006 what he thought of math's in regex:
Perhaps this lies out of the scope of the project in your view: The ability to treat numbers as being numbers and not text, this would definitely be a worthwhile feature.
Allowing you to do some basic math checks on matched digits, like: is the 2nd matched digit higher or lower, is the third digit a multiple off the 1st, and many more complicated cases I won't elaborate on just to get my point accross.
Do you feel this exceeds the realm of textmatching?
to which I got the following reply:
Yes, I think I do, and also, it is not
something that is available in Perl
regular expressions. I know that PCRE
does have some extensions from Perl,
but nothing as major as that (you
could perhaps hack something up using
callouts, but that would be a bit ad
hoc, and no doubt exceedingly messy!).
Philip
and I couldn't agree more now in `09. Just match all numbers and do number validation in whatever language you're doing the matching with.
Also if you want to locate or find if your path name contains a year, and take it out as a string, you can try something like this:
path1 = r'X:\S\Something_2019\y2019\AB19778_description\subfolder1\subfolder2'
find = re.findall(r'.*(y[1-2][0,9][0-9]{2})', path1)
mystring = find[0]
print(mystring)
Checks in 'path1' if there is year string with format 'yYYYY'. So with letter 'y' as prefix (relevant for my study case).
This will return string 'y2019'.
I would like to match with RegExp a number between X and Y. Is that possible?
([0-9]+) will match any number, how could I do to match a number between, for instance, 110 and 2234?
According to Generate a Regular Expression to Match an Arbitrary Numeric Range, and after generating such a regex for your example at Regex_For_Range:
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
would do the trick.
The process would be (still following that Regex generator):
First, break into equal length ranges:
110 - 999
1000 - 2234
Second, break into ranges that yield simple regexes:
110 - 199
200 - 999
1000 - 1999
2000 - 2199
2200 - 2229
2230 - 2234
Turn each range into a regex:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Collapse adjacent powers of 10:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Combining the regexes above yields:
0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])
Next we'll try factoring out common prefixes using a tree:
Parse into tree based on regex prefixes:
. 1 [1-9] [0-9]
+ [0-9]{3}
+ [2-9] [0-9]{2}
+ 2 [01] [0-9]{2}
+ 2 [0-2] [0-9]
+ 3 [0-4]
Turning the parse tree into a regex yields:
0*(1([1-9][0-9]|[0-9]{3})|[2-9][0-9]{2}|2([01][0-9]{2}|2([0-2][0-9]|3[0-4])))
We choose the shorter one as our result.
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
This is not the sort of thing regexes excel at. You will probably find it easier to ensure that you have the right number of digits /^([0-9]{3,4})$/ and then do further checks against the capture.
• Numeric range regex generator
Since online number range regex generator services often become unavailable after some period of time (this one is still alive at the time of writing the post), I think it would be nice to have it here.
How-to:
Scroll to the bottom of this answer
Click Run code snippet
Make sure you type the minimum and maximum threshold values in the text input fields, mark all the options you need below and hit Generate button:
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).toRegexRange=e()}}(function(){return function(){return function e(t,n,r){function i(u,a){if(!n[u]){if(!t[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(o)return o(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var f=n[u]={exports:{}};t[u][0].call(f.exports,function(e){return i(t[u][1][e]||e)},f,f.exports,e,t,n,r)}return n[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}}()({1:[function(e,t,n){"use strict";const r=e("is-number"),i=(e,t,n)=>{if(!1===r(e))throw new TypeError("toRegexRange: expected the first argument to be a number");if(void 0===t||e===t)return String(e);if(!1===r(t))throw new TypeError("toRegexRange: expected the second argument to be a number.");let o={relaxZeros:!0,...n};"boolean"==typeof o.strictZeros&&(o.relaxZeros=!1===o.strictZeros);let s=e+":"+t+"="+String(o.relaxZeros)+String(o.shorthand)+String(o.capture)+String(o.wrap);if(i.cache.hasOwnProperty(s))return i.cache[s].result;let c=Math.min(e,t),f=Math.max(e,t);if(1===Math.abs(c-f)){let n=e+"|"+t;return o.capture?`(${n})`:!1===o.wrap?n:`(?:${n})`}let l=h(e)||h(t),d={min:e,max:t,a:c,b:f},p=[],g=[];if(l&&(d.isPadded=l,d.maxLen=String(d.max).length),c<0){g=u(f<0?Math.abs(f):1,Math.abs(c),d,o),c=d.a=0}return f>=0&&(p=u(c,f,d,o)),d.negatives=g,d.positives=p,d.result=function(e,t,n){let r=a(e,t,"-",!1,n)||[],i=a(t,e,"",!1,n)||[],o=a(e,t,"-?",!0,n)||[];return r.concat(o).concat(i).join("|")}(g,p,o),!0===o.capture?d.result=`(${d.result})`:!1!==o.wrap&&p.length+g.length>1&&(d.result=`(?:${d.result})`),i.cache[s]=d,d.result};function o(e,t,n){if(e===t)return{pattern:e,count:[],digits:0};let r=function(e,t){let n=[];for(let r=0;r<e.length;r++)n.push([e[r],t[r]]);return n}(e,t),i=r.length,o="",u=0;for(let e=0;e<i;e++){let[t,i]=r[e];t===i?o+=t:"0"!==t||"9"!==i?o+=p(t,i,n):u++}return u&&(o+=!0===n.shorthand?"\\d":"[0-9]"),{pattern:o,count:[u],digits:i}}function u(e,t,n,r){let i,u=function(e,t){let n=1,r=1,i=f(e,n),o=new Set([t]);for(;e<=i&&i<=t;)o.add(i),i=f(e,n+=1);for(i=l(t+1,r)-1;e<i&&i<=t;)o.add(i),i=l(t+1,r+=1)-1;return(o=[...o]).sort(s),o}(e,t),a=[],c=e;for(let e=0;e<u.length;e++){let t=u[e],s=o(String(c),String(t),r),f="";n.isPadded||!i||i.pattern!==s.pattern?(n.isPadded&&(f=g(t,n,r)),s.string=f+s.pattern+d(s.count),a.push(s),c=t+1,i=s):(i.count.length>1&&i.count.pop(),i.count.push(s.count[0]),i.string=i.pattern+d(i.count),c=t+1)}return a}function a(e,t,n,r,i){let o=[];for(let i of e){let{string:e}=i;r||c(t,"string",e)||o.push(n+e),r&&c(t,"string",e)&&o.push(n+e)}return o}function s(e,t){return e>t?1:t>e?-1:0}function c(e,t,n){return e.some(e=>e[t]===n)}function f(e,t){return Number(String(e).slice(0,-t)+"9".repeat(t))}function l(e,t){return e-e%Math.pow(10,t)}function d(e){let[t=0,n=""]=e;return n||t>1?`{${t+(n?","+n:"")}}`:""}function p(e,t,n){return`[${e}${t-e==1?"":"-"}${t}]`}function h(e){return/^-?(0+)\d/.test(e)}function g(e,t,n){if(!t.isPadded)return e;let r=Math.abs(t.maxLen-String(e).length),i=!1!==n.relaxZeros;switch(r){case 0:return"";case 1:return i?"0?":"0";case 2:return i?"0{0,2}":"00";default:return i?`0{0,${r}}`:`0{${r}}`}}i.cache={},i.clearCache=(()=>i.cache={}),t.exports=i},{"is-number":2}],2:[function(e,t,n){"use strict";t.exports=function(e){return"number"==typeof e?e-e==0:"string"==typeof e&&""!==e.trim()&&(Number.isFinite?Number.isFinite(+e):isFinite(+e))}},{}]},{},[1])(1)});
$( document ).ready( function() {
$( "#rangeLeft, #rangeRight" ).keydown( function() {
clearDisplay();
} );
$('#wholestring').click(function() {
$('#wholestring').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#wb').click(function() {
$('#wb').attr('checked', 'checked');
$('#wholestring').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#dgtb').click(function() {
$('#dgtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#wholestring').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#whtb').click(function() {
$('#whtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#wholestring').attr('checked', false);
})
$( "#run" ).click( function() {
clearDisplay();
var rangeLeft = $( "#rangeLeft" ).val();
var rangeRight = $( "#rangeRight" ).val();
if ( ! checkRanges( rangeLeft, rangeRight ) ) return;
let source = toRegexRange(rangeLeft, rangeRight);
if ($('#frac').is(':checked')) {
source = source + '(?:\\.\\d+)?';
}
if ($('#allowzero').is(':checked')) {
source = "0*" + source;
}
if ($('#neg').is(':checked')) {
source = "-?" + source;
}
if ( $('#wholestring').is(':checked')) {
source = '^' + source + '$';
} else if ( $('#wb').is(':checked')) {
source = '\\b' + source + '\\b';
} else if ( $('#whtb').is(':checked')) {
source = '(?<!\\S)' + source + '(?!\\S)';
} else if ( $('#dgtb').is(':checked')) {
source = '(?<!\\d)' + source + '(?!\\d)';
}
$( "#result" ).append( "<B>" + source.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') + "</B><BR/>" );
} );
} );
function checkRanges( rangeLeft, rangeRight ) {
if ( /\D/.test( rangeLeft ) || /\D/.test( rangeRight ) ) {
$( "#result" ).append( "Type two numbers<BR/>" );
return false;
}
rangeLeft = parseInt( rangeLeft );
rangeRight = parseInt( rangeRight );
if ( isNaN( rangeLeft ) || isNaN( rangeRight ) ) $( "#result" ).append( "Range boundaries are not specified<BR/>" );
if ( rangeLeft < 0 ) $( "#result" ).append( "Left boundary is less than 0<BR/>" );
if ( rangeRight < 0 ) $( "#result" ).append( "Right boundary is less than 0<BR/>" );
if ( rangeLeft > rangeRight ) $( "#result" ).append( "Left boundary is greater than the right boundary<BR/>" );
return( !(
rangeLeft < 0 ||
rangeRight < 0 ||
rangeLeft > rangeRight ||
isNaN( rangeLeft ) ||
isNaN( rangeRight )
) );
}
function clearDisplay() {
$( "#result" ).html( "" );
$( "#test" ).hide();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Type in minimum and maximum values and click <i>Generate</i>.</p>
<INPUT id="rangeLeft" value=1 /> - <INPUT id="rangeRight" value=365 />
<BR/>
<BUTTON id="run">Generate</BUTTON>
<pre><code id="result" /></pre>
<div>
<label><input type="checkbox" id="wholestring"/>Match whole string</label> <br/>
<label><input type="checkbox" id="wb"/>Match within word boundaries</label><br/>
<label><input type="checkbox" id="dgtb"/>Match when not enclosed with digits</label><br/>
<label><input type="checkbox" id="whtb"/>Match when enclosed with whitespaces or start/end of string</label><br/>
<label><input type="checkbox" id="allowzero"/>Allow leading zeros</label><br/>
<label><input type="checkbox" id="neg"/>Optionally match negative numbers</label><br/>
<label><input type="checkbox" id="frac"/>Optionally match fractional digits (floats)</label><br/>
</div>
Most of the JavaScript code here is borrowed from Алгоритм для преобразования диапазона номеров в регулярное выражение and to-regex-range npm library.
While you could do it with some absurd looking regex (as VonC answered), regex really isn't supposed to do this.. Why not defer the number checking to the redirected-to-script?
If numbers 110-2234 go to script1, and 1-109 go to script2, it would be much simpler to direct all numbers at a router script, and have it redirect to the correct location (via HTTP redirects)..
In .htaccess:
RewriteRule ^view/([0-9]+)/?$ router.php?page=$1 [L]
..then in router.php, something like:
<?PHP
if(
int($_GET['page']) > 110 &&
int($_GET['page']) < 2234
){
header("Status: 301 Moved Permanently\nLocation: /script1");
}else{
header("Status: 404 Not Found");
}
?>
You can put the regexes for the following ranges together:
1[1-9]\d = 110-199
[2-9]\d\d = 200-999
1\d\d\d = 1000-1999
2[0-1]\d\d= 2000-2199
22[0-2]\d = 2200-2229
223[0-4] = 2230-2234
to form:
(1[1-9]\d|[2-9]\d\d|1\d\d\d|2[0-1]\d\d|22[0-2]\d|223[0-4])
\d means [0-9], but in three less characters
It's possible allbeit not pretty.
\b(?:[1][1][0-9]|1\d{3}|223[0-4]|2[0-1]\d\d|2[0-2][0-3][0-4])\b
I emailed Phillip Hazel, the author of PCRE, in 2006 what he thought of math's in regex:
Perhaps this lies out of the scope of the project in your view: The ability to treat numbers as being numbers and not text, this would definitely be a worthwhile feature.
Allowing you to do some basic math checks on matched digits, like: is the 2nd matched digit higher or lower, is the third digit a multiple off the 1st, and many more complicated cases I won't elaborate on just to get my point accross.
Do you feel this exceeds the realm of textmatching?
to which I got the following reply:
Yes, I think I do, and also, it is not
something that is available in Perl
regular expressions. I know that PCRE
does have some extensions from Perl,
but nothing as major as that (you
could perhaps hack something up using
callouts, but that would be a bit ad
hoc, and no doubt exceedingly messy!).
Philip
and I couldn't agree more now in `09. Just match all numbers and do number validation in whatever language you're doing the matching with.
Also if you want to locate or find if your path name contains a year, and take it out as a string, you can try something like this:
path1 = r'X:\S\Something_2019\y2019\AB19778_description\subfolder1\subfolder2'
find = re.findall(r'.*(y[1-2][0,9][0-9]{2})', path1)
mystring = find[0]
print(mystring)
Checks in 'path1' if there is year string with format 'yYYYY'. So with letter 'y' as prefix (relevant for my study case).
This will return string 'y2019'.
I would like to match with RegExp a number between X and Y. Is that possible?
([0-9]+) will match any number, how could I do to match a number between, for instance, 110 and 2234?
According to Generate a Regular Expression to Match an Arbitrary Numeric Range, and after generating such a regex for your example at Regex_For_Range:
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
would do the trick.
The process would be (still following that Regex generator):
First, break into equal length ranges:
110 - 999
1000 - 2234
Second, break into ranges that yield simple regexes:
110 - 199
200 - 999
1000 - 1999
2000 - 2199
2200 - 2229
2230 - 2234
Turn each range into a regex:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Collapse adjacent powers of 10:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Combining the regexes above yields:
0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])
Next we'll try factoring out common prefixes using a tree:
Parse into tree based on regex prefixes:
. 1 [1-9] [0-9]
+ [0-9]{3}
+ [2-9] [0-9]{2}
+ 2 [01] [0-9]{2}
+ 2 [0-2] [0-9]
+ 3 [0-4]
Turning the parse tree into a regex yields:
0*(1([1-9][0-9]|[0-9]{3})|[2-9][0-9]{2}|2([01][0-9]{2}|2([0-2][0-9]|3[0-4])))
We choose the shorter one as our result.
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
This is not the sort of thing regexes excel at. You will probably find it easier to ensure that you have the right number of digits /^([0-9]{3,4})$/ and then do further checks against the capture.
• Numeric range regex generator
Since online number range regex generator services often become unavailable after some period of time (this one is still alive at the time of writing the post), I think it would be nice to have it here.
How-to:
Scroll to the bottom of this answer
Click Run code snippet
Make sure you type the minimum and maximum threshold values in the text input fields, mark all the options you need below and hit Generate button:
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).toRegexRange=e()}}(function(){return function(){return function e(t,n,r){function i(u,a){if(!n[u]){if(!t[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(o)return o(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var f=n[u]={exports:{}};t[u][0].call(f.exports,function(e){return i(t[u][1][e]||e)},f,f.exports,e,t,n,r)}return n[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}}()({1:[function(e,t,n){"use strict";const r=e("is-number"),i=(e,t,n)=>{if(!1===r(e))throw new TypeError("toRegexRange: expected the first argument to be a number");if(void 0===t||e===t)return String(e);if(!1===r(t))throw new TypeError("toRegexRange: expected the second argument to be a number.");let o={relaxZeros:!0,...n};"boolean"==typeof o.strictZeros&&(o.relaxZeros=!1===o.strictZeros);let s=e+":"+t+"="+String(o.relaxZeros)+String(o.shorthand)+String(o.capture)+String(o.wrap);if(i.cache.hasOwnProperty(s))return i.cache[s].result;let c=Math.min(e,t),f=Math.max(e,t);if(1===Math.abs(c-f)){let n=e+"|"+t;return o.capture?`(${n})`:!1===o.wrap?n:`(?:${n})`}let l=h(e)||h(t),d={min:e,max:t,a:c,b:f},p=[],g=[];if(l&&(d.isPadded=l,d.maxLen=String(d.max).length),c<0){g=u(f<0?Math.abs(f):1,Math.abs(c),d,o),c=d.a=0}return f>=0&&(p=u(c,f,d,o)),d.negatives=g,d.positives=p,d.result=function(e,t,n){let r=a(e,t,"-",!1,n)||[],i=a(t,e,"",!1,n)||[],o=a(e,t,"-?",!0,n)||[];return r.concat(o).concat(i).join("|")}(g,p,o),!0===o.capture?d.result=`(${d.result})`:!1!==o.wrap&&p.length+g.length>1&&(d.result=`(?:${d.result})`),i.cache[s]=d,d.result};function o(e,t,n){if(e===t)return{pattern:e,count:[],digits:0};let r=function(e,t){let n=[];for(let r=0;r<e.length;r++)n.push([e[r],t[r]]);return n}(e,t),i=r.length,o="",u=0;for(let e=0;e<i;e++){let[t,i]=r[e];t===i?o+=t:"0"!==t||"9"!==i?o+=p(t,i,n):u++}return u&&(o+=!0===n.shorthand?"\\d":"[0-9]"),{pattern:o,count:[u],digits:i}}function u(e,t,n,r){let i,u=function(e,t){let n=1,r=1,i=f(e,n),o=new Set([t]);for(;e<=i&&i<=t;)o.add(i),i=f(e,n+=1);for(i=l(t+1,r)-1;e<i&&i<=t;)o.add(i),i=l(t+1,r+=1)-1;return(o=[...o]).sort(s),o}(e,t),a=[],c=e;for(let e=0;e<u.length;e++){let t=u[e],s=o(String(c),String(t),r),f="";n.isPadded||!i||i.pattern!==s.pattern?(n.isPadded&&(f=g(t,n,r)),s.string=f+s.pattern+d(s.count),a.push(s),c=t+1,i=s):(i.count.length>1&&i.count.pop(),i.count.push(s.count[0]),i.string=i.pattern+d(i.count),c=t+1)}return a}function a(e,t,n,r,i){let o=[];for(let i of e){let{string:e}=i;r||c(t,"string",e)||o.push(n+e),r&&c(t,"string",e)&&o.push(n+e)}return o}function s(e,t){return e>t?1:t>e?-1:0}function c(e,t,n){return e.some(e=>e[t]===n)}function f(e,t){return Number(String(e).slice(0,-t)+"9".repeat(t))}function l(e,t){return e-e%Math.pow(10,t)}function d(e){let[t=0,n=""]=e;return n||t>1?`{${t+(n?","+n:"")}}`:""}function p(e,t,n){return`[${e}${t-e==1?"":"-"}${t}]`}function h(e){return/^-?(0+)\d/.test(e)}function g(e,t,n){if(!t.isPadded)return e;let r=Math.abs(t.maxLen-String(e).length),i=!1!==n.relaxZeros;switch(r){case 0:return"";case 1:return i?"0?":"0";case 2:return i?"0{0,2}":"00";default:return i?`0{0,${r}}`:`0{${r}}`}}i.cache={},i.clearCache=(()=>i.cache={}),t.exports=i},{"is-number":2}],2:[function(e,t,n){"use strict";t.exports=function(e){return"number"==typeof e?e-e==0:"string"==typeof e&&""!==e.trim()&&(Number.isFinite?Number.isFinite(+e):isFinite(+e))}},{}]},{},[1])(1)});
$( document ).ready( function() {
$( "#rangeLeft, #rangeRight" ).keydown( function() {
clearDisplay();
} );
$('#wholestring').click(function() {
$('#wholestring').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#wb').click(function() {
$('#wb').attr('checked', 'checked');
$('#wholestring').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#dgtb').click(function() {
$('#dgtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#wholestring').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#whtb').click(function() {
$('#whtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#wholestring').attr('checked', false);
})
$( "#run" ).click( function() {
clearDisplay();
var rangeLeft = $( "#rangeLeft" ).val();
var rangeRight = $( "#rangeRight" ).val();
if ( ! checkRanges( rangeLeft, rangeRight ) ) return;
let source = toRegexRange(rangeLeft, rangeRight);
if ($('#frac').is(':checked')) {
source = source + '(?:\\.\\d+)?';
}
if ($('#allowzero').is(':checked')) {
source = "0*" + source;
}
if ($('#neg').is(':checked')) {
source = "-?" + source;
}
if ( $('#wholestring').is(':checked')) {
source = '^' + source + '$';
} else if ( $('#wb').is(':checked')) {
source = '\\b' + source + '\\b';
} else if ( $('#whtb').is(':checked')) {
source = '(?<!\\S)' + source + '(?!\\S)';
} else if ( $('#dgtb').is(':checked')) {
source = '(?<!\\d)' + source + '(?!\\d)';
}
$( "#result" ).append( "<B>" + source.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') + "</B><BR/>" );
} );
} );
function checkRanges( rangeLeft, rangeRight ) {
if ( /\D/.test( rangeLeft ) || /\D/.test( rangeRight ) ) {
$( "#result" ).append( "Type two numbers<BR/>" );
return false;
}
rangeLeft = parseInt( rangeLeft );
rangeRight = parseInt( rangeRight );
if ( isNaN( rangeLeft ) || isNaN( rangeRight ) ) $( "#result" ).append( "Range boundaries are not specified<BR/>" );
if ( rangeLeft < 0 ) $( "#result" ).append( "Left boundary is less than 0<BR/>" );
if ( rangeRight < 0 ) $( "#result" ).append( "Right boundary is less than 0<BR/>" );
if ( rangeLeft > rangeRight ) $( "#result" ).append( "Left boundary is greater than the right boundary<BR/>" );
return( !(
rangeLeft < 0 ||
rangeRight < 0 ||
rangeLeft > rangeRight ||
isNaN( rangeLeft ) ||
isNaN( rangeRight )
) );
}
function clearDisplay() {
$( "#result" ).html( "" );
$( "#test" ).hide();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Type in minimum and maximum values and click <i>Generate</i>.</p>
<INPUT id="rangeLeft" value=1 /> - <INPUT id="rangeRight" value=365 />
<BR/>
<BUTTON id="run">Generate</BUTTON>
<pre><code id="result" /></pre>
<div>
<label><input type="checkbox" id="wholestring"/>Match whole string</label> <br/>
<label><input type="checkbox" id="wb"/>Match within word boundaries</label><br/>
<label><input type="checkbox" id="dgtb"/>Match when not enclosed with digits</label><br/>
<label><input type="checkbox" id="whtb"/>Match when enclosed with whitespaces or start/end of string</label><br/>
<label><input type="checkbox" id="allowzero"/>Allow leading zeros</label><br/>
<label><input type="checkbox" id="neg"/>Optionally match negative numbers</label><br/>
<label><input type="checkbox" id="frac"/>Optionally match fractional digits (floats)</label><br/>
</div>
Most of the JavaScript code here is borrowed from Алгоритм для преобразования диапазона номеров в регулярное выражение and to-regex-range npm library.
While you could do it with some absurd looking regex (as VonC answered), regex really isn't supposed to do this.. Why not defer the number checking to the redirected-to-script?
If numbers 110-2234 go to script1, and 1-109 go to script2, it would be much simpler to direct all numbers at a router script, and have it redirect to the correct location (via HTTP redirects)..
In .htaccess:
RewriteRule ^view/([0-9]+)/?$ router.php?page=$1 [L]
..then in router.php, something like:
<?PHP
if(
int($_GET['page']) > 110 &&
int($_GET['page']) < 2234
){
header("Status: 301 Moved Permanently\nLocation: /script1");
}else{
header("Status: 404 Not Found");
}
?>
You can put the regexes for the following ranges together:
1[1-9]\d = 110-199
[2-9]\d\d = 200-999
1\d\d\d = 1000-1999
2[0-1]\d\d= 2000-2199
22[0-2]\d = 2200-2229
223[0-4] = 2230-2234
to form:
(1[1-9]\d|[2-9]\d\d|1\d\d\d|2[0-1]\d\d|22[0-2]\d|223[0-4])
\d means [0-9], but in three less characters
It's possible allbeit not pretty.
\b(?:[1][1][0-9]|1\d{3}|223[0-4]|2[0-1]\d\d|2[0-2][0-3][0-4])\b
I emailed Phillip Hazel, the author of PCRE, in 2006 what he thought of math's in regex:
Perhaps this lies out of the scope of the project in your view: The ability to treat numbers as being numbers and not text, this would definitely be a worthwhile feature.
Allowing you to do some basic math checks on matched digits, like: is the 2nd matched digit higher or lower, is the third digit a multiple off the 1st, and many more complicated cases I won't elaborate on just to get my point accross.
Do you feel this exceeds the realm of textmatching?
to which I got the following reply:
Yes, I think I do, and also, it is not
something that is available in Perl
regular expressions. I know that PCRE
does have some extensions from Perl,
but nothing as major as that (you
could perhaps hack something up using
callouts, but that would be a bit ad
hoc, and no doubt exceedingly messy!).
Philip
and I couldn't agree more now in `09. Just match all numbers and do number validation in whatever language you're doing the matching with.
Also if you want to locate or find if your path name contains a year, and take it out as a string, you can try something like this:
path1 = r'X:\S\Something_2019\y2019\AB19778_description\subfolder1\subfolder2'
find = re.findall(r'.*(y[1-2][0,9][0-9]{2})', path1)
mystring = find[0]
print(mystring)
Checks in 'path1' if there is year string with format 'yYYYY'. So with letter 'y' as prefix (relevant for my study case).
This will return string 'y2019'.
I would like to match with RegExp a number between X and Y. Is that possible?
([0-9]+) will match any number, how could I do to match a number between, for instance, 110 and 2234?
According to Generate a Regular Expression to Match an Arbitrary Numeric Range, and after generating such a regex for your example at Regex_For_Range:
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
would do the trick.
The process would be (still following that Regex generator):
First, break into equal length ranges:
110 - 999
1000 - 2234
Second, break into ranges that yield simple regexes:
110 - 199
200 - 999
1000 - 1999
2000 - 2199
2200 - 2229
2230 - 2234
Turn each range into a regex:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Collapse adjacent powers of 10:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Combining the regexes above yields:
0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])
Next we'll try factoring out common prefixes using a tree:
Parse into tree based on regex prefixes:
. 1 [1-9] [0-9]
+ [0-9]{3}
+ [2-9] [0-9]{2}
+ 2 [01] [0-9]{2}
+ 2 [0-2] [0-9]
+ 3 [0-4]
Turning the parse tree into a regex yields:
0*(1([1-9][0-9]|[0-9]{3})|[2-9][0-9]{2}|2([01][0-9]{2}|2([0-2][0-9]|3[0-4])))
We choose the shorter one as our result.
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
This is not the sort of thing regexes excel at. You will probably find it easier to ensure that you have the right number of digits /^([0-9]{3,4})$/ and then do further checks against the capture.
• Numeric range regex generator
Since online number range regex generator services often become unavailable after some period of time (this one is still alive at the time of writing the post), I think it would be nice to have it here.
How-to:
Scroll to the bottom of this answer
Click Run code snippet
Make sure you type the minimum and maximum threshold values in the text input fields, mark all the options you need below and hit Generate button:
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).toRegexRange=e()}}(function(){return function(){return function e(t,n,r){function i(u,a){if(!n[u]){if(!t[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(o)return o(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var f=n[u]={exports:{}};t[u][0].call(f.exports,function(e){return i(t[u][1][e]||e)},f,f.exports,e,t,n,r)}return n[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}}()({1:[function(e,t,n){"use strict";const r=e("is-number"),i=(e,t,n)=>{if(!1===r(e))throw new TypeError("toRegexRange: expected the first argument to be a number");if(void 0===t||e===t)return String(e);if(!1===r(t))throw new TypeError("toRegexRange: expected the second argument to be a number.");let o={relaxZeros:!0,...n};"boolean"==typeof o.strictZeros&&(o.relaxZeros=!1===o.strictZeros);let s=e+":"+t+"="+String(o.relaxZeros)+String(o.shorthand)+String(o.capture)+String(o.wrap);if(i.cache.hasOwnProperty(s))return i.cache[s].result;let c=Math.min(e,t),f=Math.max(e,t);if(1===Math.abs(c-f)){let n=e+"|"+t;return o.capture?`(${n})`:!1===o.wrap?n:`(?:${n})`}let l=h(e)||h(t),d={min:e,max:t,a:c,b:f},p=[],g=[];if(l&&(d.isPadded=l,d.maxLen=String(d.max).length),c<0){g=u(f<0?Math.abs(f):1,Math.abs(c),d,o),c=d.a=0}return f>=0&&(p=u(c,f,d,o)),d.negatives=g,d.positives=p,d.result=function(e,t,n){let r=a(e,t,"-",!1,n)||[],i=a(t,e,"",!1,n)||[],o=a(e,t,"-?",!0,n)||[];return r.concat(o).concat(i).join("|")}(g,p,o),!0===o.capture?d.result=`(${d.result})`:!1!==o.wrap&&p.length+g.length>1&&(d.result=`(?:${d.result})`),i.cache[s]=d,d.result};function o(e,t,n){if(e===t)return{pattern:e,count:[],digits:0};let r=function(e,t){let n=[];for(let r=0;r<e.length;r++)n.push([e[r],t[r]]);return n}(e,t),i=r.length,o="",u=0;for(let e=0;e<i;e++){let[t,i]=r[e];t===i?o+=t:"0"!==t||"9"!==i?o+=p(t,i,n):u++}return u&&(o+=!0===n.shorthand?"\\d":"[0-9]"),{pattern:o,count:[u],digits:i}}function u(e,t,n,r){let i,u=function(e,t){let n=1,r=1,i=f(e,n),o=new Set([t]);for(;e<=i&&i<=t;)o.add(i),i=f(e,n+=1);for(i=l(t+1,r)-1;e<i&&i<=t;)o.add(i),i=l(t+1,r+=1)-1;return(o=[...o]).sort(s),o}(e,t),a=[],c=e;for(let e=0;e<u.length;e++){let t=u[e],s=o(String(c),String(t),r),f="";n.isPadded||!i||i.pattern!==s.pattern?(n.isPadded&&(f=g(t,n,r)),s.string=f+s.pattern+d(s.count),a.push(s),c=t+1,i=s):(i.count.length>1&&i.count.pop(),i.count.push(s.count[0]),i.string=i.pattern+d(i.count),c=t+1)}return a}function a(e,t,n,r,i){let o=[];for(let i of e){let{string:e}=i;r||c(t,"string",e)||o.push(n+e),r&&c(t,"string",e)&&o.push(n+e)}return o}function s(e,t){return e>t?1:t>e?-1:0}function c(e,t,n){return e.some(e=>e[t]===n)}function f(e,t){return Number(String(e).slice(0,-t)+"9".repeat(t))}function l(e,t){return e-e%Math.pow(10,t)}function d(e){let[t=0,n=""]=e;return n||t>1?`{${t+(n?","+n:"")}}`:""}function p(e,t,n){return`[${e}${t-e==1?"":"-"}${t}]`}function h(e){return/^-?(0+)\d/.test(e)}function g(e,t,n){if(!t.isPadded)return e;let r=Math.abs(t.maxLen-String(e).length),i=!1!==n.relaxZeros;switch(r){case 0:return"";case 1:return i?"0?":"0";case 2:return i?"0{0,2}":"00";default:return i?`0{0,${r}}`:`0{${r}}`}}i.cache={},i.clearCache=(()=>i.cache={}),t.exports=i},{"is-number":2}],2:[function(e,t,n){"use strict";t.exports=function(e){return"number"==typeof e?e-e==0:"string"==typeof e&&""!==e.trim()&&(Number.isFinite?Number.isFinite(+e):isFinite(+e))}},{}]},{},[1])(1)});
$( document ).ready( function() {
$( "#rangeLeft, #rangeRight" ).keydown( function() {
clearDisplay();
} );
$('#wholestring').click(function() {
$('#wholestring').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#wb').click(function() {
$('#wb').attr('checked', 'checked');
$('#wholestring').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#dgtb').click(function() {
$('#dgtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#wholestring').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#whtb').click(function() {
$('#whtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#wholestring').attr('checked', false);
})
$( "#run" ).click( function() {
clearDisplay();
var rangeLeft = $( "#rangeLeft" ).val();
var rangeRight = $( "#rangeRight" ).val();
if ( ! checkRanges( rangeLeft, rangeRight ) ) return;
let source = toRegexRange(rangeLeft, rangeRight);
if ($('#frac').is(':checked')) {
source = source + '(?:\\.\\d+)?';
}
if ($('#allowzero').is(':checked')) {
source = "0*" + source;
}
if ($('#neg').is(':checked')) {
source = "-?" + source;
}
if ( $('#wholestring').is(':checked')) {
source = '^' + source + '$';
} else if ( $('#wb').is(':checked')) {
source = '\\b' + source + '\\b';
} else if ( $('#whtb').is(':checked')) {
source = '(?<!\\S)' + source + '(?!\\S)';
} else if ( $('#dgtb').is(':checked')) {
source = '(?<!\\d)' + source + '(?!\\d)';
}
$( "#result" ).append( "<B>" + source.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') + "</B><BR/>" );
} );
} );
function checkRanges( rangeLeft, rangeRight ) {
if ( /\D/.test( rangeLeft ) || /\D/.test( rangeRight ) ) {
$( "#result" ).append( "Type two numbers<BR/>" );
return false;
}
rangeLeft = parseInt( rangeLeft );
rangeRight = parseInt( rangeRight );
if ( isNaN( rangeLeft ) || isNaN( rangeRight ) ) $( "#result" ).append( "Range boundaries are not specified<BR/>" );
if ( rangeLeft < 0 ) $( "#result" ).append( "Left boundary is less than 0<BR/>" );
if ( rangeRight < 0 ) $( "#result" ).append( "Right boundary is less than 0<BR/>" );
if ( rangeLeft > rangeRight ) $( "#result" ).append( "Left boundary is greater than the right boundary<BR/>" );
return( !(
rangeLeft < 0 ||
rangeRight < 0 ||
rangeLeft > rangeRight ||
isNaN( rangeLeft ) ||
isNaN( rangeRight )
) );
}
function clearDisplay() {
$( "#result" ).html( "" );
$( "#test" ).hide();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Type in minimum and maximum values and click <i>Generate</i>.</p>
<INPUT id="rangeLeft" value=1 /> - <INPUT id="rangeRight" value=365 />
<BR/>
<BUTTON id="run">Generate</BUTTON>
<pre><code id="result" /></pre>
<div>
<label><input type="checkbox" id="wholestring"/>Match whole string</label> <br/>
<label><input type="checkbox" id="wb"/>Match within word boundaries</label><br/>
<label><input type="checkbox" id="dgtb"/>Match when not enclosed with digits</label><br/>
<label><input type="checkbox" id="whtb"/>Match when enclosed with whitespaces or start/end of string</label><br/>
<label><input type="checkbox" id="allowzero"/>Allow leading zeros</label><br/>
<label><input type="checkbox" id="neg"/>Optionally match negative numbers</label><br/>
<label><input type="checkbox" id="frac"/>Optionally match fractional digits (floats)</label><br/>
</div>
Most of the JavaScript code here is borrowed from Алгоритм для преобразования диапазона номеров в регулярное выражение and to-regex-range npm library.
While you could do it with some absurd looking regex (as VonC answered), regex really isn't supposed to do this.. Why not defer the number checking to the redirected-to-script?
If numbers 110-2234 go to script1, and 1-109 go to script2, it would be much simpler to direct all numbers at a router script, and have it redirect to the correct location (via HTTP redirects)..
In .htaccess:
RewriteRule ^view/([0-9]+)/?$ router.php?page=$1 [L]
..then in router.php, something like:
<?PHP
if(
int($_GET['page']) > 110 &&
int($_GET['page']) < 2234
){
header("Status: 301 Moved Permanently\nLocation: /script1");
}else{
header("Status: 404 Not Found");
}
?>
You can put the regexes for the following ranges together:
1[1-9]\d = 110-199
[2-9]\d\d = 200-999
1\d\d\d = 1000-1999
2[0-1]\d\d= 2000-2199
22[0-2]\d = 2200-2229
223[0-4] = 2230-2234
to form:
(1[1-9]\d|[2-9]\d\d|1\d\d\d|2[0-1]\d\d|22[0-2]\d|223[0-4])
\d means [0-9], but in three less characters
It's possible allbeit not pretty.
\b(?:[1][1][0-9]|1\d{3}|223[0-4]|2[0-1]\d\d|2[0-2][0-3][0-4])\b
I emailed Phillip Hazel, the author of PCRE, in 2006 what he thought of math's in regex:
Perhaps this lies out of the scope of the project in your view: The ability to treat numbers as being numbers and not text, this would definitely be a worthwhile feature.
Allowing you to do some basic math checks on matched digits, like: is the 2nd matched digit higher or lower, is the third digit a multiple off the 1st, and many more complicated cases I won't elaborate on just to get my point accross.
Do you feel this exceeds the realm of textmatching?
to which I got the following reply:
Yes, I think I do, and also, it is not
something that is available in Perl
regular expressions. I know that PCRE
does have some extensions from Perl,
but nothing as major as that (you
could perhaps hack something up using
callouts, but that would be a bit ad
hoc, and no doubt exceedingly messy!).
Philip
and I couldn't agree more now in `09. Just match all numbers and do number validation in whatever language you're doing the matching with.
Also if you want to locate or find if your path name contains a year, and take it out as a string, you can try something like this:
path1 = r'X:\S\Something_2019\y2019\AB19778_description\subfolder1\subfolder2'
find = re.findall(r'.*(y[1-2][0,9][0-9]{2})', path1)
mystring = find[0]
print(mystring)
Checks in 'path1' if there is year string with format 'yYYYY'. So with letter 'y' as prefix (relevant for my study case).
This will return string 'y2019'.
I would like to match with RegExp a number between X and Y. Is that possible?
([0-9]+) will match any number, how could I do to match a number between, for instance, 110 and 2234?
According to Generate a Regular Expression to Match an Arbitrary Numeric Range, and after generating such a regex for your example at Regex_For_Range:
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
would do the trick.
The process would be (still following that Regex generator):
First, break into equal length ranges:
110 - 999
1000 - 2234
Second, break into ranges that yield simple regexes:
110 - 199
200 - 999
1000 - 1999
2000 - 2199
2200 - 2229
2230 - 2234
Turn each range into a regex:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Collapse adjacent powers of 10:
1[1-9][0-9]
[2-9][0-9]{2}
1[0-9]{3}
2[01][0-9]{2}
22[0-2][0-9]
223[0-4]
Combining the regexes above yields:
0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])
Next we'll try factoring out common prefixes using a tree:
Parse into tree based on regex prefixes:
. 1 [1-9] [0-9]
+ [0-9]{3}
+ [2-9] [0-9]{2}
+ 2 [01] [0-9]{2}
+ 2 [0-2] [0-9]
+ 3 [0-4]
Turning the parse tree into a regex yields:
0*(1([1-9][0-9]|[0-9]{3})|[2-9][0-9]{2}|2([01][0-9]{2}|2([0-2][0-9]|3[0-4])))
We choose the shorter one as our result.
\b0*(1[1-9][0-9]|[2-9][0-9]{2}|1[0-9]{3}|2[01][0-9]{2}|22[0-2][0-9]|223[0-4])\b
This is not the sort of thing regexes excel at. You will probably find it easier to ensure that you have the right number of digits /^([0-9]{3,4})$/ and then do further checks against the capture.
• Numeric range regex generator
Since online number range regex generator services often become unavailable after some period of time (this one is still alive at the time of writing the post), I think it would be nice to have it here.
How-to:
Scroll to the bottom of this answer
Click Run code snippet
Make sure you type the minimum and maximum threshold values in the text input fields, mark all the options you need below and hit Generate button:
!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).toRegexRange=e()}}(function(){return function(){return function e(t,n,r){function i(u,a){if(!n[u]){if(!t[u]){var s="function"==typeof require&&require;if(!a&&s)return s(u,!0);if(o)return o(u,!0);var c=new Error("Cannot find module '"+u+"'");throw c.code="MODULE_NOT_FOUND",c}var f=n[u]={exports:{}};t[u][0].call(f.exports,function(e){return i(t[u][1][e]||e)},f,f.exports,e,t,n,r)}return n[u].exports}for(var o="function"==typeof require&&require,u=0;u<r.length;u++)i(r[u]);return i}}()({1:[function(e,t,n){"use strict";const r=e("is-number"),i=(e,t,n)=>{if(!1===r(e))throw new TypeError("toRegexRange: expected the first argument to be a number");if(void 0===t||e===t)return String(e);if(!1===r(t))throw new TypeError("toRegexRange: expected the second argument to be a number.");let o={relaxZeros:!0,...n};"boolean"==typeof o.strictZeros&&(o.relaxZeros=!1===o.strictZeros);let s=e+":"+t+"="+String(o.relaxZeros)+String(o.shorthand)+String(o.capture)+String(o.wrap);if(i.cache.hasOwnProperty(s))return i.cache[s].result;let c=Math.min(e,t),f=Math.max(e,t);if(1===Math.abs(c-f)){let n=e+"|"+t;return o.capture?`(${n})`:!1===o.wrap?n:`(?:${n})`}let l=h(e)||h(t),d={min:e,max:t,a:c,b:f},p=[],g=[];if(l&&(d.isPadded=l,d.maxLen=String(d.max).length),c<0){g=u(f<0?Math.abs(f):1,Math.abs(c),d,o),c=d.a=0}return f>=0&&(p=u(c,f,d,o)),d.negatives=g,d.positives=p,d.result=function(e,t,n){let r=a(e,t,"-",!1,n)||[],i=a(t,e,"",!1,n)||[],o=a(e,t,"-?",!0,n)||[];return r.concat(o).concat(i).join("|")}(g,p,o),!0===o.capture?d.result=`(${d.result})`:!1!==o.wrap&&p.length+g.length>1&&(d.result=`(?:${d.result})`),i.cache[s]=d,d.result};function o(e,t,n){if(e===t)return{pattern:e,count:[],digits:0};let r=function(e,t){let n=[];for(let r=0;r<e.length;r++)n.push([e[r],t[r]]);return n}(e,t),i=r.length,o="",u=0;for(let e=0;e<i;e++){let[t,i]=r[e];t===i?o+=t:"0"!==t||"9"!==i?o+=p(t,i,n):u++}return u&&(o+=!0===n.shorthand?"\\d":"[0-9]"),{pattern:o,count:[u],digits:i}}function u(e,t,n,r){let i,u=function(e,t){let n=1,r=1,i=f(e,n),o=new Set([t]);for(;e<=i&&i<=t;)o.add(i),i=f(e,n+=1);for(i=l(t+1,r)-1;e<i&&i<=t;)o.add(i),i=l(t+1,r+=1)-1;return(o=[...o]).sort(s),o}(e,t),a=[],c=e;for(let e=0;e<u.length;e++){let t=u[e],s=o(String(c),String(t),r),f="";n.isPadded||!i||i.pattern!==s.pattern?(n.isPadded&&(f=g(t,n,r)),s.string=f+s.pattern+d(s.count),a.push(s),c=t+1,i=s):(i.count.length>1&&i.count.pop(),i.count.push(s.count[0]),i.string=i.pattern+d(i.count),c=t+1)}return a}function a(e,t,n,r,i){let o=[];for(let i of e){let{string:e}=i;r||c(t,"string",e)||o.push(n+e),r&&c(t,"string",e)&&o.push(n+e)}return o}function s(e,t){return e>t?1:t>e?-1:0}function c(e,t,n){return e.some(e=>e[t]===n)}function f(e,t){return Number(String(e).slice(0,-t)+"9".repeat(t))}function l(e,t){return e-e%Math.pow(10,t)}function d(e){let[t=0,n=""]=e;return n||t>1?`{${t+(n?","+n:"")}}`:""}function p(e,t,n){return`[${e}${t-e==1?"":"-"}${t}]`}function h(e){return/^-?(0+)\d/.test(e)}function g(e,t,n){if(!t.isPadded)return e;let r=Math.abs(t.maxLen-String(e).length),i=!1!==n.relaxZeros;switch(r){case 0:return"";case 1:return i?"0?":"0";case 2:return i?"0{0,2}":"00";default:return i?`0{0,${r}}`:`0{${r}}`}}i.cache={},i.clearCache=(()=>i.cache={}),t.exports=i},{"is-number":2}],2:[function(e,t,n){"use strict";t.exports=function(e){return"number"==typeof e?e-e==0:"string"==typeof e&&""!==e.trim()&&(Number.isFinite?Number.isFinite(+e):isFinite(+e))}},{}]},{},[1])(1)});
$( document ).ready( function() {
$( "#rangeLeft, #rangeRight" ).keydown( function() {
clearDisplay();
} );
$('#wholestring').click(function() {
$('#wholestring').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#wb').click(function() {
$('#wb').attr('checked', 'checked');
$('#wholestring').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#dgtb').click(function() {
$('#dgtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#wholestring').attr('checked', false);
$('#whtb').attr('checked', false);
})
$('#whtb').click(function() {
$('#whtb').attr('checked', 'checked');
$('#wb').attr('checked', false);
$('#dgtb').attr('checked', false);
$('#wholestring').attr('checked', false);
})
$( "#run" ).click( function() {
clearDisplay();
var rangeLeft = $( "#rangeLeft" ).val();
var rangeRight = $( "#rangeRight" ).val();
if ( ! checkRanges( rangeLeft, rangeRight ) ) return;
let source = toRegexRange(rangeLeft, rangeRight);
if ($('#frac').is(':checked')) {
source = source + '(?:\\.\\d+)?';
}
if ($('#allowzero').is(':checked')) {
source = "0*" + source;
}
if ($('#neg').is(':checked')) {
source = "-?" + source;
}
if ( $('#wholestring').is(':checked')) {
source = '^' + source + '$';
} else if ( $('#wb').is(':checked')) {
source = '\\b' + source + '\\b';
} else if ( $('#whtb').is(':checked')) {
source = '(?<!\\S)' + source + '(?!\\S)';
} else if ( $('#dgtb').is(':checked')) {
source = '(?<!\\d)' + source + '(?!\\d)';
}
$( "#result" ).append( "<B>" + source.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') + "</B><BR/>" );
} );
} );
function checkRanges( rangeLeft, rangeRight ) {
if ( /\D/.test( rangeLeft ) || /\D/.test( rangeRight ) ) {
$( "#result" ).append( "Type two numbers<BR/>" );
return false;
}
rangeLeft = parseInt( rangeLeft );
rangeRight = parseInt( rangeRight );
if ( isNaN( rangeLeft ) || isNaN( rangeRight ) ) $( "#result" ).append( "Range boundaries are not specified<BR/>" );
if ( rangeLeft < 0 ) $( "#result" ).append( "Left boundary is less than 0<BR/>" );
if ( rangeRight < 0 ) $( "#result" ).append( "Right boundary is less than 0<BR/>" );
if ( rangeLeft > rangeRight ) $( "#result" ).append( "Left boundary is greater than the right boundary<BR/>" );
return( !(
rangeLeft < 0 ||
rangeRight < 0 ||
rangeLeft > rangeRight ||
isNaN( rangeLeft ) ||
isNaN( rangeRight )
) );
}
function clearDisplay() {
$( "#result" ).html( "" );
$( "#test" ).hide();
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p>Type in minimum and maximum values and click <i>Generate</i>.</p>
<INPUT id="rangeLeft" value=1 /> - <INPUT id="rangeRight" value=365 />
<BR/>
<BUTTON id="run">Generate</BUTTON>
<pre><code id="result" /></pre>
<div>
<label><input type="checkbox" id="wholestring"/>Match whole string</label> <br/>
<label><input type="checkbox" id="wb"/>Match within word boundaries</label><br/>
<label><input type="checkbox" id="dgtb"/>Match when not enclosed with digits</label><br/>
<label><input type="checkbox" id="whtb"/>Match when enclosed with whitespaces or start/end of string</label><br/>
<label><input type="checkbox" id="allowzero"/>Allow leading zeros</label><br/>
<label><input type="checkbox" id="neg"/>Optionally match negative numbers</label><br/>
<label><input type="checkbox" id="frac"/>Optionally match fractional digits (floats)</label><br/>
</div>
Most of the JavaScript code here is borrowed from Алгоритм для преобразования диапазона номеров в регулярное выражение and to-regex-range npm library.
While you could do it with some absurd looking regex (as VonC answered), regex really isn't supposed to do this.. Why not defer the number checking to the redirected-to-script?
If numbers 110-2234 go to script1, and 1-109 go to script2, it would be much simpler to direct all numbers at a router script, and have it redirect to the correct location (via HTTP redirects)..
In .htaccess:
RewriteRule ^view/([0-9]+)/?$ router.php?page=$1 [L]
..then in router.php, something like:
<?PHP
if(
int($_GET['page']) > 110 &&
int($_GET['page']) < 2234
){
header("Status: 301 Moved Permanently\nLocation: /script1");
}else{
header("Status: 404 Not Found");
}
?>
You can put the regexes for the following ranges together:
1[1-9]\d = 110-199
[2-9]\d\d = 200-999
1\d\d\d = 1000-1999
2[0-1]\d\d= 2000-2199
22[0-2]\d = 2200-2229
223[0-4] = 2230-2234
to form:
(1[1-9]\d|[2-9]\d\d|1\d\d\d|2[0-1]\d\d|22[0-2]\d|223[0-4])
\d means [0-9], but in three less characters
It's possible allbeit not pretty.
\b(?:[1][1][0-9]|1\d{3}|223[0-4]|2[0-1]\d\d|2[0-2][0-3][0-4])\b
I emailed Phillip Hazel, the author of PCRE, in 2006 what he thought of math's in regex:
Perhaps this lies out of the scope of the project in your view: The ability to treat numbers as being numbers and not text, this would definitely be a worthwhile feature.
Allowing you to do some basic math checks on matched digits, like: is the 2nd matched digit higher or lower, is the third digit a multiple off the 1st, and many more complicated cases I won't elaborate on just to get my point accross.
Do you feel this exceeds the realm of textmatching?
to which I got the following reply:
Yes, I think I do, and also, it is not
something that is available in Perl
regular expressions. I know that PCRE
does have some extensions from Perl,
but nothing as major as that (you
could perhaps hack something up using
callouts, but that would be a bit ad
hoc, and no doubt exceedingly messy!).
Philip
and I couldn't agree more now in `09. Just match all numbers and do number validation in whatever language you're doing the matching with.
Also if you want to locate or find if your path name contains a year, and take it out as a string, you can try something like this:
path1 = r'X:\S\Something_2019\y2019\AB19778_description\subfolder1\subfolder2'
find = re.findall(r'.*(y[1-2][0,9][0-9]{2})', path1)
mystring = find[0]
print(mystring)
Checks in 'path1' if there is year string with format 'yYYYY'. So with letter 'y' as prefix (relevant for my study case).
This will return string 'y2019'.