Perl Replacing a complex regular expression with simpler method - regex

I have the following code:
#!/usr/bin/perl
use strict;
use warnings;
#use diagnostics;
use URI qw( );
my #insert_words = qw(HELLO GOODBYE);
while (<DATA>) {
chomp;
my $url = URI->new($_);
my $query = $url->query;
foreach (#insert_words) {
# Use package vars to communicate with /(?{})/ blocks.
local our $insert_word = $_;
local our #queries;
if (defined $query) {
$query =~ m{
^(.*[/=])([^/=&]*)((?:[/=&].*)?)\z
(?{
if (length $2) {
push #queries, "$1$insert_word$2$3";
push #queries, "$1$insert_word$3";
push #queries, "$1$2$insert_word$3";
}
})
(?!)
}x;
}
if (#queries) {
for (#queries) {
$url->query($_);
print $url, "\n";
}
}
else {
print $url, "\n";
}
}
}
__DATA__
http://www.example.com/index.php?route=9&other=7
The above piece of code works correctly and produces the following output:
http://www.example.com/index.php?route=9&other=HELLO7 <-- precedes the query parameter
http://www.example.com/index.php?route=9&other=HELLO <-- replaces the query parameter
http://www.example.com/index.php?route=9&other=7HELLO <-- succeeds the query parameter and so on for the rest of them....
http://www.example.com/index.php?route=HELLO9&other=7
http://www.example.com/index.php?route=HELLO&other=7
http://www.example.com/index.php?route=9HELLO&other=7
http://www.example.com/index.php?route=9&other=GOODBYE7
http://www.example.com/index.php?route=9&other=GOODBYE
http://www.example.com/index.php?route=9&other=7GOODBYE
http://www.example.com/index.php?route=GOODBYE9&other=7
http://www.example.com/index.php?route=GOODBYE&other=7
http://www.example.com/index.php?route=9GOODBYE&other=7
What I am trying to do
I am trying to get exactly the same output as shown above (so foreach #insert_words precede, replace and succeed each query parameter in the url), but I would like to replace the complicated regular expression method with a simpler, more easily understandable method, but I don't know the best way of going about it.
Your help with this will be much appreciated, many thanks

It is described in the documentation for URI how to handle queries. The URI::QueryParam module supplies the query_param subroutine that allows interaction with the queries.
use strict;
use warnings;
use URI;
use URI::QueryParam;
my #words = qw(HELLO GOODBYE);
my $URL = <DATA>;
my $uri = URI->new($URL);
for my $key ($uri->query_param) { # the keys of the query
my $org = $uri->query_param($key); # keep original value
for my $word (#words) {
for ("$org$word", $word, "$word$org") {
$uri->query_param($key, $_); # set new value
print $uri->as_string, $/; # print new uri
}
}
$uri->query_param($key, $org); # restore original value
}
__DATA__
http://www.example.com/index.php?route=9&other=7
Output:
http://www.example.com/index.php?route=9HELLO&other=7
http://www.example.com/index.php?route=HELLO&other=7
http://www.example.com/index.php?route=HELLO9&other=7
http://www.example.com/index.php?route=9GOODBYE&other=7
http://www.example.com/index.php?route=GOODBYE&other=7
http://www.example.com/index.php?route=GOODBYE9&other=7
http://www.example.com/index.php?route=9&other=7HELLO
http://www.example.com/index.php?route=9&other=HELLO
http://www.example.com/index.php?route=9&other=HELLO7
http://www.example.com/index.php?route=9&other=7GOODBYE
http://www.example.com/index.php?route=9&other=GOODBYE
http://www.example.com/index.php?route=9&other=GOODBYE7

Related

Searching Perl array with regex and return single capturing group only

I have a Perl script in which I perform web service calls in a loop. The server returns a multivalued HTTP header that I need to parse after each call with information that I will need to make the next call (if it doesn't return the header, I want to exit the loop).
I only care about one of the values in the header, and I need to get the information out of it with a regular expression. Let's say the header is like this, and I only care about the "foo" value:
X-Header: test-abc12345; blah=foo
X-Header: test-fgasjhgakg; blah=bar
I can get the header values like this: #values = $response->header( 'X-Header' );. But how do I quickly check if
There is a foo value, and
Parse and save the foo value for the next iteration?
Ideally, I'd like to do something like this:
my $value = 'default';
do {
# (do HTTP request; use $value)
#values = $response->header( 'X-Header' );
} while( $value = first { /(?:test-)([^;]+)(?:; blah=foo)/ } #values );
But grep, first (from List::Util), etc. all return the entire match and not just the single capturing group I want. I want to avoid cluttering up my code by looping over the array and matching/parsing inside the loop body.
Is what I want possible? What would be the most compact way to write it? So far, all I can come up with is using lookarounds and \K to discard the stuff I don't care about, but this isn't super readable and makes the regex engine perform a lot of unnecessary steps.
So it seems that you want to catch the first element with a certain pattern, but acquire only the pattern. And you want it done nicely. Indeed, first and grep only pass the element itself.
However, List::MoreUtils::first_result does support processing of its match
use List::MoreUtils 0.406 qw(first_result);
my #w = qw(a bit c dIT); # get first "it" case-insensitive
my $res = first_result { ( /(it)/i )[0] } #w;
say $res // 'undef'; #--> it
That ( ... )[0] is needed to put the regex in the list context so that it returns the actual capture. Another way would be firstres { my ($r) = /(it)/i; $r }. Pick your choice
For the data in the question
use warnings;
use strict;
use feature 'say';
use List::MoreUtils 0.406 qw(firstres);
my #data = (
'X-Header: test-abc12345; blah=foo',
'X-Header: test-fgasjhgakg; blah=bar'
);
if (my $r = firstres { ( /test-([^;]+);\s+blah=foo/ )[0] } #data) {
say $r
}
Prints abc12345, clarified in a comment to be the sought result.
Module versions prior to 0.406 (of 2015-03-03) didn't have firstres (alias first_result)
first { ... } #values returns one the values (or undef).
You could use either of these:
my ($value) = map { /...(...).../ } #values;
my $value = ( map { /...(...).../ } #values ) ? $1 : undef;
my $value = ( map { /...(...).../ } #values )[0];
Using first, it would look like the following, which is rather silly:
my $value = first { 1 } map { /...(...).../ } #values;
However, assuming the capture can't be an empty string or the string 0, List::MoreUtils's first_result could be used to avoid the unnecessary matches:
my $value = first_result { /...(...).../ ? $1 : undef } #values;
my $value = first_result { ( /...(...).../ )[0] } #values;
If the returned value can be false (e.g. an empty string or a 0) you can use something like
my $value = first_result { /...(...).../ ? \$1 : undef } #values;
$value = $$value if $value;
The first_result approach isn't necessarily faster in practice.
Following code snippet is looking for foo stored in a variable $find, the found values is stored in variable $found.
my $find = 'foo';
my $found;
while( $response->header( 'X-Header' ) ) {
if( /X-Header: .*?blah=($find)/ ) {
$found = $1;
last;
}
}
say $found if $found;
Sample demo code
use strict;
use warnings;
use feature 'say';
use Data::Dumper;
my $find = 'foo';
my $found;
my #header = <DATA>;
chomp(#header);
for ( #header ) {
$found = $1 if /X-Header: .*?blah=($find)/;
last if $found;
}
say Dumper(\#header);
say "Found: $found" if $found;
__DATA__
X-Header: test-abc12345; blah=foo
X-Header: test-fgasjhgakg; blah=bar
Output
$VAR1 = [
'X-Header: test-abc12345; blah=foo',
'X-Header: test-fgasjhgakg; blah=bar'
];
Found: foo

Dynamically capture regular expression match in Perl

I'm trying to dynamically catch regex matching in Perl. I've known that eval will help me do this but I may be doing something wrong.
Code:
use strict;
use warnings;
my %testHash = (
'(\d+)\/(\d+)\/(\d+)' => '$1$2$3'
);
my $str = '1/12/2016';
foreach my $pattern (keys (%testHash)) {
my $value = $testHash{$pattern};
my $result;
eval {
local $_ = $str;
/$pattern/;
print "\$1 - $1\n";
print "\$2 - $2\n";
print "\$3 - $3\n";
eval { print "$value\n"; }
}
}
Is it also possible to store captured regex patterns in an array?
I believe what you really want is a dynamic version of the following:
say $str =~ s/(\d+)\/(\d+)\/(\d+)/$1$2$3/gr;
String::Substitution provides what we need to achieve that.
use String::Substitution qw( gsub_copy );
for my $pattern (keys(%testHash)) {
my $replacement = $testHash{$pattern};
say gsub_copy($str, $pattern, $replacement);
}
Note that $replacement can also be a callback. This permits far more complicated substitutions. For example, if you wanted to convert 1/12/2016 into 2016-01-12, you could use the following:
'(\d+)/(\d+)/(\d+)' => sub { sprintf "%d-%02d-%02d", #_[3,1,2] },
To answer your actual question:
use String::Substitution qw( interpolate_match_vars last_match_vars );
for my $pattern (keys(%testHash)) {
my $template = $testHash{$pattern};
$str =~ $pattern # Or /$pattern/ if you prefer
or die("No match!\n");
say interpolate_match_vars($template, last_match_vars());
}
I am not completely sure what you want to do here, but I don't think your program does what you think it does.
You are useing eval with a BLOCK of code. That's like a try block. If it dies inside of that eval block, it will catch that error. It will not run your string like it was code. You need a string eval for that.
Instead of explaining that, here's an alternative.
This program uses sprintf and numbers the parameters. The %1$s syntax in the pattern says _take the first argument (1$) and format it as a string (%s). You don't need to localize or assign to $_ to do a match. The =~ operator does that on other variables for you. I also use qr{} to create a quoted regular expression (essentially a variable containing a precompiled pattern) that I can use directly. Because of the {} as delimiter, I don't need to escape the slashes.
use strict;
use warnings;
use feature 'say'; # like print ..., "\n"
my %testHash = (
qr{(\d+)/(\d+)/(\d+)} => '%1$s.%2$s.%3$s',
qr{(\d+)/(\d+)/(\d+) nomatch} => '%1$s.%2$s.%3$s',
qr{(\d+)/(\d+)/(\d\d\d\d)} => '%3$4d-%2$02d-%1$02d',
qr{\d} => '%s', # no capture group
);
my $str = '1/12/2016';
foreach my $pattern ( keys %testHash ) {
my #captures = ( $str =~ $pattern );
say "pattern: $pattern";
if ($#+ == 0) {
say " no capture groups";
next;
}
unless (#captures) {
say " no match";
next;
}
# debug-output
for my $i ( 1 .. $#- ) {
say sprintf " \$%d - %s", $i, $captures[ $i - 1 ];
}
say sprintf $testHash{$pattern}, #captures;
}
I included four examples:
The first pattern is the one you had. It uses %1$s and so on as explained above.
The second one does not match. We check the number of elements in #captured by looking at it in scalar context.
The third one shows that you can also reorder the result, or even use the sprintf formatting.
The last one has no capture group. We check by looking at the index of the last element ($# as the sigil for arrays that usually have an # sigil) in #+, which holds the offsets of the ends of the last successful submatches in the currently active dynamic scope. The first element is the end of the overall match, so if this only has one element, we don't have capture groups.
The output for me is this:
pattern: (?^:(\d+)/(\d+)/(\d\d\d\d))
$1 - 1
$2 - 12
$3 - 2016
2016-12-01
pattern: (?^:(\d+)/(\d+)/(\d+) nomatch)
no match
pattern: (?^:\d)
no capture groups
pattern: (?^:(\d+)/(\d+)/(\d+))
$1 - 1
$2 - 12
$3 - 2016
1.12.2016
Note that the order in the output is mixed up. That's because hashes are not ordered in Perl, and if you iterate over the keys in a hash without sort the order is random.
Apologies! I realized both my question and sample code were both vague. But after reading your suggestions I came of with the following code.
I haven't optimized this code yet and there is a limit to the replacement.
foreach my $key (keys %testHash) {
if ( $str =~ $key ) {
my #matchArr = ($str =~ $key); # Capture all matches
# Search and replace (limited from $1 to $9)
for ( my $i = 0; $i < #matchArr; $i++ ) {
my $num = $i+1;
$testHash{$key} =~ s/\$$num/$matchArr[$i]/;
}
$result = $testHash{$key};
last;
}
}
print "$result\n";
Evaluing the regexp in list context returns the matches. so in your example:
use Data::Dumper; # so we can see the result
foreach my $pattern (keys (%testHash)) {
my #a = ($str =~/$pattern/);
print Dumper(\#a);
}
would do the job.
HTH
Georg
Is it also possible to store captured regex patterns in an array?
Of course it is possible to store captured substrings in an array:
#!/usr/bin/env perl
use strict;
use warnings;
my #patterns = map qr{$_}, qw{
(\d+)/(\d+)/(\d+)
};
my $str = '1/12/2016';
foreach my $pattern ( #patterns ) {
my #captured = ($str =~ $pattern)
or next;
print "'$_'\n" for #captured;
}
Output:
'1'
'12'
'2016'
I do not quite understand what you are trying to do with combinations of local, eval EXPR and eval BLOCK in your code and the purpose of the following hash:
my %testHash = (
'(\d+)\/(\d+)\/(\d+)' => '$1$2$3'
);
If you are trying to codify that this pattern should result in three captures, you can do that like this:
my #tests = (
{
pattern => qr{(\d+)/(\d+)/(\d+)},
ncaptures => 3,
}
);
my $str = '1/12/2016';
foreach my $test ( #tests ) {
my #captured = ($str =~ $test->{pattern})
or next;
unless (#captured == $test->{ncaptures}) {
# handle failure
}
}
See this answer to find out how you can automate counting the number of capture groups in a pattern. Using the technique in that answer:
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
my #tests = map +{ pattern => qr{$_}, ncaptures => number_of_capturing_groups($_) }, qw(
(\d+)/(\d+)/(\d+)
);
my $str = '1/12/2016';
foreach my $test ( #tests ) {
my #captured = ($str =~ $test->{pattern});
ok #captured == $test->{ncaptures};
}
done_testing;
sub number_of_capturing_groups {
"" =~ /|$_[0]/;
return $#+;
}
Output:
ok 1
1..1

Perl parse second instance with regex

I have code which get a rate of exchange:
#!/usr/bin/perl
use warnings;
use strict;
use LWP::Simple;
use POSIX qw(strftime);
use Math::Round;
use CGI qw(header start_html end_html);
use DBI;
sub isfloat {
my $val = shift;
return $val =~ m/^\d+.\d+$/;
}
.....
my $content = get('URL PAGE');
$content =~ /\s+(\d,\d{4})/gi;
my $dolar = $1;
$dolar =~ s/\,/./g;
if (!isfloat($dolar)) {
error("Error USD!");
}
How can I grab second instance /\s+(\d,\d{4})/gi ??
I tried solution from Perl Cookbook like this:
$content =~ /(?:\s+(\d,\d{4})) {2} \s+(\d,\d{4})/i;
but I have errors:
Use of uninitialized value $val in pattern match (m//)
Use of uninitialized value $dolar in substitution (s///)
Assign the pattern match operator result to an array. The array will contain all capture groups from all matches:
my $content = "abc 1,2345 def 0,9876 5,6789";
my #dollars = $content =~ /\s+(\d,\d{4})/g;
# Now, use the captures in #dollars this way:
foreach my $dollar (#dollars[0,1]) {
# process the $dollar items in a loop
}
# ... or this way:
my $dollar1 = shift #dollars;
# process the $dollar1
my $dollar2 = shift #dollars;
# process the $dollar2

Perl Inserting string into a url at specific places

I have the following piece of code:
#!/usr/bin/perl
use strict;
use warnings;
#use diagnostics;
use URI qw( );
my #insert_words = qw(HELLO GOODBYE);
while (<DATA>) {
chomp;
my $url = URI->new($_);
my $query = $url->query;
foreach (#insert_words) {
# Use package vars to communicate with /(?{})/ blocks.
local our $insert_word = $_;
local our #queries;
if (defined $query) {
$query =~ m{
^(.*[/=])([^/=&]*)((?:[/=&].*)?)\z
(?{
if (length $2) {
push #queries, "$1$insert_word$2$3";
push #queries, "$1$insert_word$3";
push #queries, "$1$2$insert_word$3";
}
})
(?!)
}x;
}
if (#queries) {
for (#queries) {
$url->query($_);
print $url, "\n";
}
}
else {
print $url, "\n";
}
}
}
__DATA__
http://www.example.com/index.php?route=9&other=7
The above piece of code works correctly and produces the following output:
http://www.example.com/index.php?route=9&other=HELLO7
http://www.example.com/index.php?route=9&other=HELLO
http://www.example.com/index.php?route=9&other=7HELLO
http://www.example.com/index.php?route=HELLO9&other=7
http://www.example.com/index.php?route=HELLO&other=7
http://www.example.com/index.php?route=9HELLO&other=7
http://www.example.com/index.php?route=9&other=GOODBYE7
http://www.example.com/index.php?route=9&other=GOODBYE
http://www.example.com/index.php?route=9&other=7GOODBYE
http://www.example.com/index.php?route=GOODBYE9&other=7
http://www.example.com/index.php?route=GOODBYE&other=7
http://www.example.com/index.php?route=9GOODBYE&other=7
As you can see it inserts the words in the array at specific places in the url.
What I am now having problems with:
I would now like to add the functionality to do all the possible combinations of HELLO and GOODBYE (or whatever is in the #insert_words) as well, for example it should also add the following url's to the output I already get:
http://www.example.com/index.php?route=HELLO&other=GOODBYE
http://www.example.com/index.php?route=HELLO&other=HELLO
http://www.example.com/index.php?route=GOODBYE&other=HELLO
http://www.example.com/index.php?route=GOODBYE&other=GOODBYE
But I do not know how to go about this in the best way?
Your help with this will be much appreciated, many thanks
Please don't use fancy regexes like that - they are an experimental feature of Perl and are far from simple to comprehend.
If I understand you then you need to do this recursively.
I think you want all variations of the URL with each query parameter as it is, or preceded, succeeded, or replaced by every value in #insert_words.
This seems to do what you ask. It uses URI::QueryParam to split up the query portion of the URL properly instead of using your nasty regex. It does produce substantially more combinations than you show in your question but I can see no other way of interpreting your requirement.
The number of possible variations is 49. Each parameter can have its original value, or be preceded, succeeded or replaced by either of two values. That is seven possible values for each parameter and so 7² or 49 different variations for two parameters.
use strict;
use warnings;
use URI;
use URI::QueryParam;
my #insert_words = qw/ HELLO GOODBYE /;
my #urls;
sub mod_param {
my ($url, $paridx, #insertions) = #_;
my #params = $url->query_param;
return if $paridx > $#params;
my $key = $params[$paridx];
my $oldval = $url->query_param($key);
my #variations = ($oldval);
push #variations, ($oldval.$_, $_.$oldval, $_) for #insertions;
for my $val (#variations) {
$url->query_param($key, $val);
if ($paridx == $#params) {
push #urls, "$url";
}
else {
mod_param($url, $paridx + 1, #insertions);
}
}
$url->query_param($key, $oldval);
}
while (<DATA>) {
chomp;
my $url = URI->new($_);
#urls = ();
mod_param($url, 0, #insert_words);
print $_, "\n" for #urls;
}
__DATA__
http://www.example.com/index.php?route=9&other=7
output
http://www.example.com/index.php?route=9&other=7
http://www.example.com/index.php?route=9&other=7HELLO
http://www.example.com/index.php?route=9&other=HELLO7
http://www.example.com/index.php?route=9&other=HELLO
http://www.example.com/index.php?route=9&other=7GOODBYE
http://www.example.com/index.php?route=9&other=GOODBYE7
http://www.example.com/index.php?route=9&other=GOODBYE
http://www.example.com/index.php?route=9HELLO&other=7
http://www.example.com/index.php?route=9HELLO&other=7HELLO
http://www.example.com/index.php?route=9HELLO&other=HELLO7
http://www.example.com/index.php?route=9HELLO&other=HELLO
http://www.example.com/index.php?route=9HELLO&other=7GOODBYE
http://www.example.com/index.php?route=9HELLO&other=GOODBYE7
http://www.example.com/index.php?route=9HELLO&other=GOODBYE
http://www.example.com/index.php?route=HELLO9&other=7
http://www.example.com/index.php?route=HELLO9&other=7HELLO
http://www.example.com/index.php?route=HELLO9&other=HELLO7
http://www.example.com/index.php?route=HELLO9&other=HELLO
http://www.example.com/index.php?route=HELLO9&other=7GOODBYE
http://www.example.com/index.php?route=HELLO9&other=GOODBYE7
http://www.example.com/index.php?route=HELLO9&other=GOODBYE
http://www.example.com/index.php?route=HELLO&other=7
http://www.example.com/index.php?route=HELLO&other=7HELLO
http://www.example.com/index.php?route=HELLO&other=HELLO7
http://www.example.com/index.php?route=HELLO&other=HELLO
http://www.example.com/index.php?route=HELLO&other=7GOODBYE
http://www.example.com/index.php?route=HELLO&other=GOODBYE7
http://www.example.com/index.php?route=HELLO&other=GOODBYE
http://www.example.com/index.php?route=9GOODBYE&other=7
http://www.example.com/index.php?route=9GOODBYE&other=7HELLO
http://www.example.com/index.php?route=9GOODBYE&other=HELLO7
http://www.example.com/index.php?route=9GOODBYE&other=HELLO
http://www.example.com/index.php?route=9GOODBYE&other=7GOODBYE
http://www.example.com/index.php?route=9GOODBYE&other=GOODBYE7
http://www.example.com/index.php?route=9GOODBYE&other=GOODBYE
http://www.example.com/index.php?route=GOODBYE9&other=7
http://www.example.com/index.php?route=GOODBYE9&other=7HELLO
http://www.example.com/index.php?route=GOODBYE9&other=HELLO7
http://www.example.com/index.php?route=GOODBYE9&other=HELLO
http://www.example.com/index.php?route=GOODBYE9&other=7GOODBYE
http://www.example.com/index.php?route=GOODBYE9&other=GOODBYE7
http://www.example.com/index.php?route=GOODBYE9&other=GOODBYE
http://www.example.com/index.php?route=GOODBYE&other=7
http://www.example.com/index.php?route=GOODBYE&other=7HELLO
http://www.example.com/index.php?route=GOODBYE&other=HELLO7
http://www.example.com/index.php?route=GOODBYE&other=HELLO
http://www.example.com/index.php?route=GOODBYE&other=7GOODBYE
http://www.example.com/index.php?route=GOODBYE&other=GOODBYE7
http://www.example.com/index.php?route=GOODBYE&other=GOODBYE

Perl Regular Expression to insert/substitute in a string at specific places

Given a url the following regular expression is able insert/substitute in words at certain points in the urls.
Code:
#!/usr/bin/perl
use strict;
use warnings;
#use diagnostics;
my #insert_words = qw/HELLO GOODBYE/;
my $word = 0;
my $match;
while (<DATA>) {
chomp;
foreach my $word (#insert_words)
{
my $repeat = 1;
while ((my $match=$_) =~ s|(?<![/])(?:[/](?![/])[^/]*){$repeat}[^/]*\K|$word|)
{
print "$match\n";
$repeat++;
}
print "\n";
}
}
__DATA__
http://www.stackoverflow.com/dog/cat/rabbit/
http://www.superuser.co.uk/dog/cat/rabbit/hamster/
10.15.16.17/dog/cat/rabbit/
The output given (for the first example url in __DATA__ with the HELLO word):
http://www.stackoverflow.com/dogHELLO/cat/rabbit/
http://www.stackoverflow.com/dog/catHELLO/rabbit/
http://www.stackoverflow.com/dog/cat/rabbitHELLO/
http://www.stackoverflow.com/dog/cat/rabbit/HELLO
Where I am now stuck:
I would now like to alter the regular expression so that the output will look like what is shown below:
http://www.stackoverflow.com/dogHELLO/cat/rabbit/
http://www.stackoverflow.com/dog/catHELLO/rabbit/
http://www.stackoverflow.com/dog/cat/rabbitHELLO/
http://www.stackoverflow.com/dog/cat/rabbit/HELLO
#above is what it already does at the moment
#below is what i also want it to be able to do as well
http://www.stackoverflow.com/HELLOdog/cat/rabbit/ #<-puts the word at the start of the string
http://www.stackoverflow.com/dog/HELLOcat/rabbit/
http://www.stackoverflow.com/dog/cat/HELLOrabbit/
http://www.stackoverflow.com/dog/cat/rabbit/HELLO
http://www.stackoverflow.com/HELLO/cat/rabbit/ #<- now also replaces the string with the word
http://www.stackoverflow.com/dog/HELLO/rabbit/
http://www.stackoverflow.com/dog/cat/HELLO/
http://www.stackoverflow.com/dog/cat/rabbit/HELLO
But I am having trouble getting it to automatically do this within the one regular expression.
Any help with this matter would be highly appreciated, many thanks
One solution:
use strict;
use warnings;
use URI qw( );
my #insert_words = qw( HELLO );
while (<DATA>) {
chomp;
my $url = URI->new($_);
my $path = $url->path();
for (#insert_words) {
# Use package vars to communicate with /(?{})/ blocks.
local our $insert_word = $_;
local our #paths;
$path =~ m{
^(.*/)([^/]*)((?:/.*)?)\z
(?{
push #paths, "$1$insert_word$2$3";
if (length($2)) {
push #paths, "$1$insert_word$3";
push #paths, "$1$2$insert_word$3";
}
})
(?!)
}x;
for (#paths) {
$url->path($_);
print "$url\n";
}
}
}
__DATA__
http://www.stackoverflow.com/dog/cat/rabbit/
http://www.superuser.co.uk/dog/cat/rabbit/hamster/
http://10.15.16.17/dog/cat/rabbit/
Without crazy regexes:
use strict;
use warnings;
use URI qw( );
my #insert_words = qw( HELLO );
while (<DATA>) {
chomp;
my $url = URI->new($_);
my $path = $url->path();
for my $insert_word (#insert_words) {
my #parts = $path =~ m{/([^/]*)}g;
my #paths;
for my $part_idx (0..$#parts) {
my $orig_part = $parts[$part_idx];
local $parts[$part_idx];
{
$parts[$part_idx] = $insert_word . $orig_part;
push #paths, join '', map "/$_", #parts;
}
if (length($orig_part)) {
{
$parts[$part_idx] = $insert_word;
push #paths, join '', map "/$_", #parts;
}
{
$parts[$part_idx] = $orig_part . $insert_word;
push #paths, join '', map "/$_", #parts;
}
}
}
for (#paths) {
$url->path($_);
print "$url\n";
}
}
}
__DATA__
http://www.stackoverflow.com/dog/cat/rabbit/
http://www.superuser.co.uk/dog/cat/rabbit/hamster/
http://10.15.16.17/dog/cat/rabbit/
one more solution:
#!/usr/bin/perl
use strict;
use warnings;
my #insert_words = qw/HELLO GOODBYE/;
while (<DATA>) {
chomp;
/(?<![\/])(?:[\/](?![\/])[^\/]*)/p;
my $begin_part = ${^PREMATCH};
my $tail = ${^MATCH} . ${^POSTMATCH};
my #tail_chunks = split /\//, $tail;
foreach my $word (#insert_words) {
for my $index (1..$#tail_chunks) {
my #new_tail = #tail_chunks;
$new_tail[$index] = $word . $tail_chunks[$index];
my $str = $begin_part . join "/", #new_tail;
print $str, "\n";
$new_tail[$index] = $tail_chunks[$index] . $word;
$str = $begin_part . join "/", #new_tail;
print $str, "\n";
}
print "\n";
}
}
__DATA__
http://www.stackoverflow.com/dog/cat/rabbit/
http://www.superuser.co.uk/dog/cat/rabbit/hamster/
10.15.16.17/dog/cat/rabbit/