Get the second eval (regex with wildcards)? - regex

I am reading an HTML document. So far I have been using HTML::TreeBuilder with HTML::Element and look_down, but now I am stuck with the content of a script <script>...</script>
<script language="JavaScript">
eval(function(p,a,c,k,e,r){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--)r[e(c)]=k[c]||e(c);k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('7 F={f:"P+/=",Q:z(5){7 8="";7 s,k,l,v,t,h,j;7 i=0;5=F.I(5);G(i<5.B){s=5.m(i++);k=5.m(i++);l=5.m(i++);v=s>>2;t=((s&3)<<4)|(k>>4);h=((k&H)<<2)|(l>>6);j=l&u;o(J(k)){h=j=C}w o(J(l)){j=C}8=8+p.f.q(v)+p.f.q(t)+p.f.q(h)+p.f.q(j)}D 8},R:z(5){7 8="";7 s,k,l;7 v,t,h,j;7 i=0;5=5.K(/[^A-S-T-9\\+\\/\\=]/g,"");G(i<5.B){v=p.f.E(5.q(i++));t=p.f.E(5.q(i++));h=p.f.E(5.q(i++));j=p.f.E(5.q(i++));s=(v<<2)|(t>>4);k=((t&H)<<4)|(h>>2);l=((h&3)<<6)|j;8=8+b.d(s);o(h!=C){8=8+b.d(k)}o(j!=C){8=8+b.d(l)}}8=F.L(8);D 8},I:z(e){e=e.K(/\\r\\n/g,"\\n");7 a="";U(7 n=0;n<e.B;n++){7 c=e.m(n);o(c<x){a+=b.d(c)}w o((c>V)&&(c<W)){a+=b.d((c>>6)|X);a+=b.d((c&u)|x)}w{a+=b.d((c>>M)|N);a+=b.d(((c>>6)&u)|x);a+=b.d((c&u)|x)}}D a},L:z(a){7 e="";7 i=0;7 c=Y=y=0;G(i<a.B){c=a.m(i);o(c<x){e+=b.d(c);i++}w o((c>Z)&&(c<N)){y=a.m(i+1);e+=b.d(((c&10)<<6)|(y&u));i+=2}w{y=a.m(i+1);O=a.m(i+2);e+=b.d(((c&H)<<M)|((y&u)<<6)|(O&u));i+=3}}D e}}',62,63,'|||||input||var|output||utftext|String||fromCharCode|string|_keyStr||enc3||enc4|chr2|chr3|charCodeAt||if|this|charAt||chr1|enc2|63|enc1|else|128|c2|function||length|64|return|indexOf|Base64|while|15|_utf8_encode|isNaN|replace|_utf8_decode|12|224|c3|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789|encode|decode|Za|z0|for|127|2048|192|c1|191|31'.split('|'),0,{}));
eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(c/a))+String.fromCharCode(c%a+161)};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\[\xa1-\xff]+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp(e(c),'g'),k[c])}}return p}('¦ £=\'¥+¢+¤+¢+\';¡.«();¡.§(©.¨(£));¡.ª();',11,11,'document|PC9pZnJhbWU|ba2se|PGlmcmFtZSB3aWR0aCA9ICIxMDAlIiBoZWlnaHQgPSAiMTAwJSIgc2Nyb2xsaW5nID0gImF1dG8iIGZyYW1lYm9yZGVyID0gIjAiIHNyYz0iJiMxMDQ7JiMxMTY7JiMxMTY7JiMxMTI7JiM1ODsmIzQ3OyYjNDc7JiMxMTc7JiMxMDg7JiM0NjsmIzExNjsmIzExMTsmIzQ3OyYjNTY7JiMxMTQ7JiM5ODsmIzEyMTsmIzExNzsmIzU3OyYjMTEzOyYjMTAwOyI|PGlmcmFtZSB3aWR0aCA9ICIwIiBoZWlnaHQgPSAiMCIgc2Nyb2xsaW5nID0gImF1dG8iIGZyYW1lYm9yZGVyID0gIjAiIHNyYz0iaHR0cDovL2dvb2dsZS5kZSI|var|write|decode|Base64|close|open'.split('|'),0,{}))
</script>
I want to get the text from the second eval eval(......) to
I tried
my ($var) = $response->decoded_content =~ /^eval(.*?)\/script/
but I get both evals, which is obvious.
EDIT : Added raw source

This program shows how you might go about it. /eval/ finds the first occurrence of eval, while /.*eval/ find the last occurrence.
I have used an HTML document that is empty apart from a single <script> element in the <head> section.
The call to look_down will find all <script> elements with a language attribute equal to JavaScript and put them in the array #script. In this case there is only one, so I use $script[0]. Depending on your HTML you may need to select one of several elements.
A call to as_text ignores <script> and <style> elements, so I have to use content_list to get the text inside the <script> element. This text is put into $content, and everything from the last occurrence of eval onwards is copied to $eval.
I hope this helps.
use strict;
use warnings;
use HTML::TreeBuilder;
my $tree = HTML::TreeBuilder->new_from_file(\*DATA);
my #script = $tree->look_down(_tag => 'script', language => 'JavaScript');
my ($content) = $script[0]->content_list;
my ($eval) = $content =~ /.*(eval.+\S)/s;
print $eval;
__DATA__
<html>
<head>
<script language="JavaScript">
eval(function(p,a,c,k,e,r){e=function(c){return(c<a?'':e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--)r[e(c)]=k[c]||e(c);k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('7 F={f:"P+/=",Q:z(5){7 8="";7 s,k,l,v,t,h,j;7 i=0;5=F.I(5);G(i<5.B){s=5.m(i++);k=5.m(i++);l=5.m(i++);v=s>>2;t=((s&3)<<4)|(k>>4);h=((k&H)<<2)|(l>>6);j=l&u;o(J(k)){h=j=C}w o(J(l)){j=C}8=8+p.f.q(v)+p.f.q(t)+p.f.q(h)+p.f.q(j)}D 8},R:z(5){7 8="";7 s,k,l;7 v,t,h,j;7 i=0;5=5.K(/[^A-S-T-9\\+\\/\\=]/g,"");G(i<5.B){v=p.f.E(5.q(i++));t=p.f.E(5.q(i++));h=p.f.E(5.q(i++));j=p.f.E(5.q(i++));s=(v<<2)|(t>>4);k=((t&H)<<4)|(h>>2);l=((h&3)<<6)|j;8=8+b.d(s);o(h!=C){8=8+b.d(k)}o(j!=C){8=8+b.d(l)}}8=F.L(8);D 8},I:z(e){e=e.K(/\\r\\n/g,"\\n");7 a="";U(7 n=0;n<e.B;n++){7 c=e.m(n);o(c<x){a+=b.d(c)}w o((c>V)&&(c<W)){a+=b.d((c>>6)|X);a+=b.d((c&u)|x)}w{a+=b.d((c>>M)|N);a+=b.d(((c>>6)&u)|x);a+=b.d((c&u)|x)}}D a},L:z(a){7 e="";7 i=0;7 c=Y=y=0;G(i<a.B){c=a.m(i);o(c<x){e+=b.d(c);i++}w o((c>Z)&&(c<N)){y=a.m(i+1);e+=b.d(((c&10)<<6)|(y&u));i+=2}w{y=a.m(i+1);O=a.m(i+2);e+=b.d(((c&H)<<M)|((y&u)<<6)|(O&u));i+=3}}D e}}',62,63,'|||||input||var|output||utftext|String||fromCharCode|string|_keyStr||enc3||enc4|chr2|chr3|charCodeAt||if|this|charAt||chr1|enc2|63|enc1|else|128|c2|function||length|64|return|indexOf|Base64|while|15|_utf8_encode|isNaN|replace|_utf8_decode|12|224|c3|ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789|encode|decode|Za|z0|for|127|2048|192|c1|191|31'.split('|'),0,{}));
eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(c/a))+String.fromCharCode(c%a+161)};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\[\xa1-\xff]+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp(e(c),'g'),k[c])}}return p}('¦ £=\'¥+¢+¤+¢+\';¡.«();¡.§(©.¨(£));¡.ª();',11,11,'document|PC9pZnJhbWU|ba2se|PGlmcmFtZSB3aWR0aCA9ICIxMDAlIiBoZWlnaHQgPSAiMTAwJSIgc2Nyb2xsaW5nID0gImF1dG8iIGZyYW1lYm9yZGVyID0gIjAiIHNyYz0iJiMxMDQ7JiMxMTY7JiMxMTY7JiMxMTI7JiM1ODsmIzQ3OyYjNDc7JiMxMTc7JiMxMDg7JiM0NjsmIzExNjsmIzExMTsmIzQ3OyYjNTY7JiMxMTQ7JiM5ODsmIzEyMTsmIzExNzsmIzU3OyYjMTEzOyYjMTAwOyI|PGlmcmFtZSB3aWR0aCA9ICIwIiBoZWlnaHQgPSAiMCIgc2Nyb2xsaW5nID0gImF1dG8iIGZyYW1lYm9yZGVyID0gIjAiIHNyYz0iaHR0cDovL2dvb2dsZS5kZSI|var|write|decode|Base64|close|open'.split('|'),0,{}))
</script>
</head>
<body> </body>
</html>
output
eval(function(p,a,c,k,e,d){e=function(c){return(c<a?'':e(c/a))+String.fromCharCode(c%a+161)};if(!''.replace(/^/,String)){while(c--){d[e(c)]=k[c]||e(c)}k=[function(e){return d[e]}];e=function(){return'\[\xa1-\xff]+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp(e(c),'g'),k[c])}}return p}('¦ £=\'¥+¢+¤+¢+\';¡.«();¡.§(©.¨(£));¡.ª();',11,11,'document|PC9pZnJhbWU|ba2se|PGlmcmFtZSB3aWR0aCA9ICIxMDAlIiBoZWlnaHQgPSAiMTAwJSIgc2Nyb2xsaW5nID0gImF1dG8iIGZyYW1lYm9yZGVyID0gIjAiIHNyYz0iJiMxMDQ7JiMxMTY7JiMxMTY7JiMxMTI7JiM1ODsmIzQ3OyYjNDc7JiMxMTc7JiMxMDg7JiM0NjsmIzExNjsmIzExMTsmIzQ3OyYjNTY7JiMxMTQ7JiM5ODsmIzEyMTsmIzExNzsmIzU3OyYjMTEzOyYjMTAwOyI|PGlmcmFtZSB3aWR0aCA9ICIwIiBoZWlnaHQgPSAiMCIgc2Nyb2xsaW5nID0gImF1dG8iIGZyYW1lYm9yZGVyID0gIjAiIHNyYz0iaHR0cDovL2dvb2dsZS5kZSI|var|write|decode|Base64|close|open'.split('|'),0,{}))

Use regex pattern
\beval\(.*\S(?!.*eval)(?=\s*<\/script>)
or
\beval\(.*\K\beval\(.*\S(?=\s*<\/script>)

Just match it twice:
/^.*?eval\([^)]+\).*?(eval\([^)]+\))/
DEMO

For now this one works for me
/eval\(function\(p,a,c,k,e,d\)\{.*\}\)\)/gmsi
Thank you all for your help, i did a mistake by not putting the whole script content in the beginning.

Related

How can I remove inner text between all <script>..</script/> by a regular expression?

I am trying to use regular expressions, to remove all the content between two strings ...
Suppose this is my content:
<h2>Misrepresentation of the Facts</h2>
</script>
<!-- Articles - Leaderboard 728x90 -->
</iframe></ins></ins></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script>
<h2>Who Can Commit the Crime</h2>
I want to remove all content between </script>
<!-- Articles - Leaderboard 728x90 -->
</iframe></ins></ins></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script>
Any help would be most appreciated.
<\/script>(?:[^<]*(?!.)+<\/script>
<\/script>(?:[^<]*(?!.)+<\/script>
I'm just guessing that these expressions being replaced by an empty string might work:
<\/script>[\s\S]*?<\/script>
<\/script>[\d\D]*?<\/script>
<\/script>[\w\W]*?<\/script>
Please see the demo here.
Escaping is just for demoing, and can be removed.

grep regex query

I ran an inline sed on a big directory of php files. The intention was to convert the http:// prefix of urls to protocol-relative urls // so that they could even work with https links. Inadvertently I ended up doing this to several hundreds of curl queries, and unfortunately those dont like those urls.
So I tried to find these with grep for pattern //. The problem is that comments also begin with //. I tried chaining grep in a pipe so that I can exclude comments.
I was trying to exclude 1 or more spaces at the beginning of the line, since most of my comments seemed indented. But its not working.
grep --color=always -inr '//' *php | grep -v '^\s+//'
My reasoning is that the first grep matches comments with two slashes, then the second one excludes those lines where the line begins with one or more spaces. However it doesnt seem to work like that. Here's a sample I got:
tvsearch.php:3:// Resource for iteration of nested php arrays
tvsearch.php:4:// //stackoverflow.com/a/3684584/1305947
tvsearch.php:14:// define('__ROOT__', dirname(dirname(__FILE__)));
tvsearch.php:15:// require_once(__ROOT__.'/htdocs/config.php');
tvsearch.php:16:// require_once(__ROOT__.'/htdocs/sqlfunctions.php');
tvsearch.php:17:// GetCredentialsDB();
tvsearch.php:19: // <link href="/css/bootstrap.min.css" rel="stylesheet">
tvsearch.php:20: // <link href="https://code.jquery.com/ui/1.11.3/themes/smoothness/jquery-ui.css" rel="Stylesheet"></link>
tvsearch.php:21: // <script src="/js/bootstrap.js"></script>
tvsearch.php:22: // <script src="https://code.jquery.com/ui/1.11.4/jquery-ui.js"></script>
tvsearch.php:25:// <link href="/css/grid.css" rel="stylesheet">
tvsearch.php:26:// <link href="/css/cover.css?v=1" rel="stylesheet">
tvsearch.php:44: <!-- link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.5/css/bootstrap.min.css" integrity="sha384-AysaV+vQoT3kOAXZkl02PThvDr8HYKPZhNT5h/CXfBThSRXQ6jW5DO2ekP5ViFdi" crossorigin="anonymous"> -->
tvsearch.php:52: <!-- <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.1/jquery.min.js" integrity="sha384-3ceskX3iaEnIogmQchP8opvBy3Mi7Ce34nWjpBIwVTHfGYWQS9jwHDVRnpKKHJg7" crossorigin="anonymous"></script> -->
tvsearch.php:53: <!-- <script src="https://cdnjs.cloudflare.com/ajax/libs/tether/1.3.7/js/tether.min.js" integrity="sha384-XTs3FgkjiBgo8qjEjBk0tGmf3wPrWtA6coPfQDfFEY8AnYJwjalXCiosYRBIBZX8" crossorigin="anonymous"></script> -->
tvsearch.php:56: <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.5/js/bootstrap.min.js" integrity="sha384-BLiI7JTZm+JWlgKa0M0kGRpJbF2J8q+qreVrKBC47e3K6BW78kGLrCkeRX6I9RoK" crossorigin="anonymous"></script>
tvsearch.php:100: <li>TV.com</li>';
tvshowcarousel.php:54: <li>TV.com</li>
tvtest.php:2:// Resource for iteration of nested php arrays
tvtest.php:3:// //stackoverflow.com/a/3684584/1305947
tvtest.php:11:// require_once "/tvmaze/TVMazeIncludes.php";
tvtest.php:18: //Return all tv shows relating to the given input
tvtest.php:19: // $showinfo = $Client->TVMaze->search("Arrow");
tvtest.php:21: //Return the most relevant tv show to the given input
tvtest.php:23: // Array [0] contains general info about the show
tvtest.php:29: // print_r($showinfo);
tvtest.php:30: // Array [1] contains all episode information
tvtest.php:32: //
tvtest.php:33: // // print_r($showinfo[1]);
tvtest.php:37: // print_r($innerArray);
tvtest.php:40: // echo "<p>Key:$key</p>";
tvtest.php:41: // echo "<p>Value:$value</p>";
tvtest.php:47: // print "<p>Season:".$season." Episode:".$episode."</p>";
tvtest.php:57: // print $showinfo[1]['season']
tvtest.php:58: // $tmpArray = $showinfo[1];
tvtest.php:59: // foreach ($tmpArray as $innerArray) {
tvtest.php:60: // print $innerArray['season'];
tvtest.php:61: // }
tvtest.php:65: // print_r($showinfo[0]->[summary]);
How am I going the wrong way about this? What I need is to match only lines like these:
tvshowcarousel.php:54: <li>TV.com</li>
torcontrol.php: curl_setopt($ch,CURLOPT_URL,"//".$this->host."/gui/?action=forcestart".$hashes);
To summarize, The problem: Devise a grep query to find // within the line, but not beginning the line (excluding any spaces before them)
Maybe you can focus on the pattern you are trying to catch instead of the pattern you are not trying to catch.
For example, if all // are between quotes you can try something like this:
grep --color=always -inr '"//.*"' *php

RegExp replace all but selected

So I'm trying to erase everything except the matched case in this 1900 line document with Notepad++ RegExp Find/Replace, so that I only have the file names, which shorten it to under about 1000 lines at minimum. I know the code that selects the text ((?<=/images/item/)(.*)(?=" a) but the problem is I don't know how to make it erase anything that doesn't match that case. Here's a portion of the document.
using notepad++, it would find and select abyssal-scepter.gif, aegis-of-the-legion.gif, etc
<img src="/images/item/abyssal-scepter.gif" alt="LoL Item: Abyssal Scepter"><br> <div id="id_77" class="tier-wrapper drag-items health magic-resist health-regen champ-box float-left ajax-tooltip {t:'Item',i:'77'} classic-and-dominion filter-is-dominion filter-is-classic filter-tier-advanced filter-bonus-aura filter-category-health filter-category-magic-resist filter-category-health-regen ui-draggable ui-draggable-handle">
<img src="/images/item/aegis-of-the-legion.gif" alt="LoL Item: Aegis of the Legion"><br> <div id="id_235" class="tier-wrapper drag-items ability-power movement champ-box float-left ajax-tooltip {t:'Item',i:'235'} filter-tier-advanced filter-bonus-unique-passive filter-category-ability-power filter-category-movement ui-draggable ui-draggable-handle">
<img src="/images/item/aether-wisp.gif" alt="LoL Item: Aether Wisp"><br>
<div class="info">
<div class="champ-name">Aether Wisp</div>
<div class="champ-sub">
<img src="/images/gold.png" alt="Item Cost" style="width:16px; vertical-align:middle;"> 850 / 415
</div>
</div>
</div>
<div id="id_21" class="tier-wrapper drag-items ability-power champ-box float-left ajax-tooltip {t:'Item',i:'21'} classic-and-dominion filter-is-dominion filter-is-classic filter-tier-basic filter-category-ability-power ui-draggable ui-draggable-handle">
<img src="/images/item/amplifying-tome.gif" alt="LoL Item: Amplifying Tome"><br>
<div class="info">
<div class="champ-name">Amplifying Tome</div>
<div class="champ-sub">
I'm not familiar with RegExp, so to summarize, I need it to look like this at the end of it.
abyssal-scepter.gif
aegis-of-thelegion.gif
aether-wisp.gif
amplifying-tome.gif
Thank you for your time
A Notepad++ solution:
Find what : .*?/images/item/(.*?)"|.*
Replace with : $1\n
Search mode : Regular expression (with ". matches newline" checked)
The result will have an extra linefeed at the end.
But that shouldn't pose a problem I suppose.
Maybe this can help. or not since you dropped the Javascript tag out of your original post
<script type="text/javascript">
var thestring = "<img src=\"/images/item/aegis-of-the-legion.gif\" alt=\"LoL Item: Aegis of the Legion\"><br>";
var thestring2 = "<img src=\"/images/otherstuff/aegis-of-the-legion.gif\" alt=\"LoL Item: Aegis of the Legion\"><br>";
function ParseIt(incomingstring) {
var pattern = /"\/images\/item\/(.*)" /;
if (pattern.test(incomingstring)) {
return pattern.exec(incomingstring)[1];
}
else {
return "";
}
//return pattern.test(incomingstring) ? pattern.exec(incomingstring)[1] : "";
}
</script>
Calling ParseIt(thestring) returns "aegis-of-the-legion.gif"
Calling ParseIt(thestring2) return ""
Since you are doing this in NP++, this works for me. In cases like this where speed and results are more important than specific technique, I'll usually run several regexes. First, I'll get each tag on its own line by doing a search for > and replacing it with >\n. This gets each tag on its own line for simpler processing. Then a replace of ^>*<.*?".*?/?([\w\d\-_]+\.\w{2,4})?".*>.*$ with $1 will will extract all the filenames from the tags, removing the unneeded text. Then, finally, to clear all the tags that didn't have a filename in them, just replace <.*> with an empty string. Finally, use Edit>Line Operations>Remove empty lines, and you'll have the result you're looking for. It's not a 100% regex solution, but this is a one time action that you just need a simple result from.

Regex to parse script tags excluding script tags inside js

Need regular expression to correctly parse script tags. I tried to use
/<script([^>]*)>(.*)<\/script>/iUsg, but it matches script tags inside strings inside script tag. Please help!
Sample code i need to match:
<html>
<script>
var = '<script></script><div>'
</script>
<div>423434</div>
<script type="text/javascript">
var b = "<script type='javascript.1.1'></script>";
</script>
</html>

I'm trying to write a shell script to replace a few lines of text in a file with a new value

The rules would be:
Delete all lines except the last line which contains: link and href=
Replace the contents of whatever is after: href= and before: .css with: hello-world
Must maintain no quotes, single quotes or double quotes around the file name
A few examples:
This is a source file with quotes:
<link rel="stylesheet" href="css/reset.css">
<link rel="stylesheet" href="css/master.css">
This is the new source file:
<link rel="stylesheet" href="hello-world.css">
This is a source file without quotes:
<link rel=stylesheet href=css/reset.css>
<link rel=stylesheet href=css/master.css>
This is the new source file:
<link rel=stylesheet href=hello-world.css>
It does not need to maintain the path of the file name. It however cannot use <> brackets or spaces to determine what needs to be edited because the template language which is writing that line might not use brackets or spaces. The only thing that would remain consistent is href=[filename].css.
My bash/sed/regex skills are awful but those tools seem like they will probably get the job done in a decent way? How would I go about doing this?
EDIT
To clarify, the end result would leave everything above and below the lines that contain link and href= alone. Imagine that the source file was an html file or any other template file like so:
<html>
<head>
<title>Hello</title>
<link rel="stylesheet" href="css/reset.css">
<link rel="stylesheet" href="css/master.css">
</head>
<body><p>...</p></body>
</html>
It would be changed to:
<html>
<head>
<title>Hello</title>
<link rel="stylesheet" href="css/hello-world.css">
</head>
<body><p>...</p></body>
</html>
The path of the CSS files might be anything too.
../foo/bar.css
http://www.hello.com/static/css/hi.css
/yep.css
ok.css
The new file's path would be supplied as an argument of the bash script so the regex should remove the path.
Following a discussion in chat, one solution using PHP as a command line script would look like this -
#! /usr/bin/php
<?php
$options = getopt("f:r:");
$inputFile = $options['f'];
$replacement = $options['r'];
// read entire contents of input file
$inputFileContents = file_get_contents($inputFile);
// setup the regex and execute the search
$pattern = '/.*link.*href=["|\']?(.*[\\\|\/]?.*)\.css["|\']?.*/';
preg_match_all($pattern, $inputFileContents, $matches);
// remove last occurance of regex
// these are the lines we'll want to hang onto
$matchedLines = $matches[0];
array_pop($matchedLines);
// isolate the last css file name
$matchedFileName = array_pop($matches[1]);
// first substitution replaces all lines with <link> with
// an empty string (deletes them)
$inputFileContents = str_replace($matchedLines,'',$inputFileContents);
// second substitution replaces the matched file name
// with the desired string
$inputFileContents = str_replace($matchedFileName,$replacement,$inputFileContents);
//*/
// save to new file for debugging
$outputFileName = "output.html";
$outputFile = fopen($outputFileName,'w+');
fwrite($outputFile,$inputFileContents);
fclose($outputFile);
/*/
// save changes to original file
$origFile = fopen($inputFile,'w+');
fwrite($origFile,$inputFileContents);
fclose($origFile);
//*/
exit();
?>
You would execute this script from the command line like so -
$ php thisScript.php -f "input.html" -r "hello-world"
-f is the input file that we are parsing.
-r is the replacement string for the css file name (in this example "hello-world").
Answer specifically, for this case:
If you include same css file twice, it does not create any harm as far as seen by the user.
So you may just replace both css/reset.css AND css/master.css by css/hello-world.css.
There may be better ways, but I found this a quick way. It will work specifically for this case & not if you want to replace <script> or other tags.
Try including the first part of the file before the css and then include the rest of the file below the css, and in the middle, echo the correct css lines