Masking a string in perl using a mask string - regex

I have a string such as 'xxox-x' that I want to mask each line in a file against as such:
x's are ignored (or just set to a known value)
o's remain unchanged
the - is a variable length field that will keep everything else unchanged
therefore mask 'xxox-x' against 'deadbeef' would yield 'xxaxbeex'
the same mask 'xxox-x' against 'deadabbabeef' would yield 'xxaxabbabeex'
How can I do this succinctly preferrably using s operator?

$mask =~ s/-/'o' x (length $str - length $mask)/e;
$str =~ s/(.)/substr($mask, pos $str, 1) eq 'o' ? $1 : 'x'/eg;

$ perl -pe 's/^..(.).(.+).$/xx$1x$2x/;'
deadbeef
xxaxbeex
deadabbabeef
xxaxabbabeex

Compile your pattern into a Perl sub:
sub compile {
use feature 'switch';
my($pattern) = #_;
die "illegal pattern" unless $pattern =~ /^[-xo]+$/;
my($search,$replace);
my $i = 0;
for (split //, $pattern) {
given ($_) {
when ("x") {
$search .= "."; $replace .= "x";
}
when ("o") {
$search .= "(?<sub$i>.)";
$replace .= "\$+{sub$i}";
++$i;
}
when ("-") {
$search .= "(?<sub$i>.*)";
$replace .= "\$+{sub$i}";
++$i;
}
}
}
my $code = q{
sub {
local($_) = #_;
s/^SEARCH$/REPLACE/s;
$_;
}
};
$code =~ s/SEARCH/$search/;
$code =~ s/REPLACE/$replace/;
#print $code;
local $#;
my $sub = eval $code;
die $# if $#;
$sub;
}
To be more concise, you could write
sub _patref { '$+{sub' . $_[0]++ . '}' }
sub compile {
my($pattern) = #_;
die "illegal pattern" unless $pattern =~ /^[-xo]+$/;
my %gen = (
'x' => sub { $_[1] .= '.'; $_[2] .= 'x' },
'o' => sub { $_[1] .= "(?<sub$_[0]>.)"; $_[2] .= &_patref },
'-' => sub { $_[1] .= "(?<sub$_[0]>.*)"; $_[2] .= &_patref },
);
my($i,$search,$replace) = (0,"","");
$gen{$1}->($i,$search,$replace)
while $pattern =~ /(.)/g;
eval "sub { local(\$_) = \#_; s/\\A$search\\z/$replace/; \$_ }"
or die $#;
}
Testing it:
use v5.10;
my $replace = compile "xxox-x";
my #tests = (
[ deadbeef => "xxaxbeex" ],
[ deadabbabeef => "xxaxabbabeex" ],
);
for (#tests) {
my($input,$expect) = #$_;
my $got = $replace->($input);
print "$input => $got : ", ($got eq $expect ? "PASS" : "FAIL"), "\n";
}
Output:
deadbeef => xxaxbeex : PASS
deadabbabeef => xxaxabbabeex : PASS
Note that you'll need Perl 5.10.x for given ... when.

x can be translated to . and o to (.) whereas - becomes (.+?):
#!/usr/bin/perl
use strict; use warnings;
my %s = qw( deadbeef xxaxbeex deadabbabeef xxaxabbabeex);
for my $k ( keys %s ) {
(my $x = $k) =~ s/^..(.).(.+?).\z/xx$1x$2x/;
print +($x eq $s{$k} ? 'good' : 'bad'), "\n";
}

heres a quick stab at a regex generator.. maybe somebody can refactor something pretty from it?
#!/usr/bin/perl
use strict;
use Test::Most qw( no_plan );
my $mask = 'xxox-x';
is( mask( $mask, 'deadbeef' ), 'xxaxbeex' );
is( mask( $mask, 'deadabbabeef' ), 'xxaxabbabeex' );
sub mask {
my ($mask, $string) = #_;
my $regex = $mask;
my $capture_index = 1;
my $mask_rules = {
'x' => '.',
'o' => '(.)',
'-' => '(.+)',
};
$regex =~ s/$_/$mask_rules->{$_}/g for keys %$mask_rules;
$mask =~ s/$_/$mask_rules->{$_}/g for keys %$mask_rules;
$mask =~ s/\./x/g;
$mask =~ s/\([^)]+\)/'$' . $capture_index++/eg;
eval " \$string =~ s/^$regex\$/$mask/ ";
$string;
}

Here's a character by character solution using substr rather that split. It should be efficient for long strings since it skips processing the middle part of the string (when there is a dash).
sub apply_mask {
my $mask = shift;
my $string = shift;
my ($head, $tail) = split /-/, $mask;
for( 0 .. length($head) - 1 ) {
my $m = substr $head, $_, 1;
next if $m eq 'o';
die "Bad char $m\n" if $m ne 'x';
substr($string, $_, 1) = 'x';
}
return $string unless defined $tail;
$tail = reverse $tail;
my $last_char = length($string) - 1;
for( 0 .. length($tail) - 1 ) {
my $m = substr $tail, $_, 1;
next if $m eq 'o';
die "Bad char $m\n" if $m ne 'x';
substr($string, $last_char - $_, 1) = 'x';
}
return $string;
}

sub mask {
local $_ = $_[0];
my $mask = $_[1];
$mask =~ s/-/'o' x (length($_)-(length($mask)-1))/e;
s/(.)/substr($mask, pos, 1) eq 'o' && $1/eg;
return $_;
}
Used tidbits from a couple answers ... this is what I ended up with.
EDIT: update from comments

Related

Dynamic replacing and removing C code (using Perl)

I come to you with my question: what is the best way to replace/remove pieces of C code dynamically? I already did something in Perl using regular expressions and reading what to replace/remove from a configuration file but I can't make it dynamic.
Code:
#autoflush output so it will not interfere with calling application
local $| = 1;
##
#libraries
use warnings;
use strict;
use Switch;
use Cwd;
use File::Find;
##
#global variables
my #config;
my $file;
my $directory;
my $result;
##
if(#ARGV < 1)
{
$directory = cwd();
}
else
{
$directory = $ARGV[0];
}
$result = $directory . "\\result";
if(! -d $result)
{
mkdir ($result);
}
open LOG, ">", $result . "\\log.log";
sub start
{
my $configFile = $_[0];
open CONFIG, $configFile;
local $/;
my $conf = <CONFIG>;
close CONFIG;
foreach my $line (split(/\n\*/, $conf))
{
if(index($line, "*") == 0)
{
$line = substr($line, 1);
}
setConfig($line);
}
processFiles();
}
sub setConfig
{
my $line = $_[0];
my $count = () = $line =~ /\s*==>\s*/;
switch($count)
{
case 0
{
remove($line);
}
case 1
{
replace($line);
}
}
}
sub addSlashes
{
$_[0] =~ s/([\.\\\/\+\*\?\[\^\]\(\)\{\}\=\!\<\>\|\:\-])/\\$1/xg;
if($_[1] == 1)
{
$_[0] =~ s/([\$])/\\$1/xg;
}
return;
}
sub remove
{
my $line = $_[0];
addSlashes($line, 1);
$line =~ s/(\\\$){3}/\.\+/g;
$config[#config][0] = qr/$line/;
$config[#config - 1][1] = q("");
$line = "\\(" . $line . "\\(";
$config[#config - 1][2] = qr/$line/;
}
sub replace
{
my $line = $_[0];
my #split = split(/\s*==>\s*/, $line);
my $original = $split[0];
my $replace = $split[1];
my $regex;
addSlashes($original, 1);
addSlashes($replace, 0);
my $counter = 1;
while($original =~ /\\\$([\d]{1,3})\\\$/g)
{
if($1 <= $counter && $1 > 0)
{
$counter++;
}
else
{
print "Invalid format\n";
return;
}
}
if($counter == 1)
{
$config[#config][0] = qr/$original/;
$config[#config - 1][1] = q(") . $replace . q(");
$original = "\\(" . $original . "\\(";
$config[#config - 1][2] = qr/$original/;
return;
}
while($replace =~ /\$([\d]{1,3})\$/g)
{
if($1 <= 0 && $1 >= $counter)
{
print "Invalid format\n";
return;
}
}
$original =~ s/\\\$\d{1,3}\\\$/\(\.\+\?\)/xg;
$original =~ s/\?\)$/\)/xg;
$replace =~ s/\$(\d{1,3})\$/\$$1/xg;
$config[#config][0] = qr/$original/;
$config[#config - 1][1] = q(") . $replace . q(");
$original = "\\(" . $original . "\\(";
$config[#config - 1][2] = qr/$original/;
}
sub processFiles
{
my #files = grep { ! -d } glob "$directory\\*";
foreach my $file (#files)
{
if($file =~ /\.(h|c)$/)
{
process($file);
}
}
}
sub process
{
my $file = $_[0];
open READ, $file;
local $/;
my $text = <READ>;
close READ;
print LOG "\n--> $file <--\n";
for(my $i = 0; $i < #config; $i++)
{
my $original = $config[$i][0];
my $replace = $config[$i][1];
my $log = $config[$i][2];
while($text =~ /$log/g)
{
print LOG $log . " ----> " . $1 . "\n";
}
$text =~ s/$original/$replace/eeg;
print LOG "\n";
}
$file = $result . substr($file, rindex($file, "\\"));
open WRITE, ">", $file;
print WRITE $text;
close WRITE;
}
start("qm2asil.cfg");
close LOG;
Configuration file content:
*static
*GET_$1$() ==> $1$
*GET_$1$($2$) ==> $1$[$2$]
*SET_$1$($2$,$3$); ==> $1$[$2$] = $3$;
*SET_$1$($2$); ==> $1$ = $2$;
The idea is that there are already a few rules to replace/remove and they work but can exist more complex rules that I couldn't manage.
Example:
SET_VAR1((i),(u8)(((s32)(((s32)GET_VAR2 ((i))) != 0)) && ((s32)((u8)(((s32) (((s32)VAR3[i]) != 0)) ^ ((s32)(((s32) VAR4[i]) != 0)))))));
I want to remove SET function and make it an assignment to the variable (VAR1[i] = ...). This is one of many variations of things that need to be removed/replaced.
What do you advise me to do? Can I make it work using Perl and regex or I should reorientate to another method and/or programming language?
EDIT: I already create regexes based on the configuration file but I have problem matching unknown expressions (currently I use .+). The main idea is that I want to keep the configuration as simple I can.
regexes

Print out preceding lines after regex match

I am writing a sort of quiz program. I am using a .txt file as a test bank, but cant figure out how to (using regex's) match each question and print out the possible answers on different lines.I originally was just going to do true false so I didnt need to match anything else and just matching "1" worked fine. Basically I just need the question on one line and the answers on others. Here is an example of a question
1.) some text
a.) solution
b.) solution
c.) solution
code i had before:
while (<$test>) {
foreach my $line (split /\n/) {
my $match1 = "1";
if ($line =~ /$match1/) {
$question1 = $line;
print "$question1\n";
print "Answer: ";
$answer1 = <>;
chomp ($answer1);
if ( $answer1 =~ /(^a$)/i) {
$score1 = 20;
push #score, $score1;
}
}
I really couldn't get what you were getting at, so I wrote this sample program.
use 5.016;
use strict;
use warnings;
my ( #lines, #questions, $current_question );
sub prompt {
my ( $prompt ) = #_;
print $prompt, ' ';
STDOUT->flush;
my $val = <>;
return $val;
}
QUESTION:
while ( <DATA> ) {
if ( my ( $ans ) = m/^=(\w+)/ ) {
INPUT: {
say #lines;
last unless defined( my $answer = prompt( 'Your answer:' ));
say '';
my ( $response ) = $answer =~ /([a-z])\s*$/;
if ( not $response ) {
$answer =~ s/\s*$//; #/
say "Invalid response. '$answer' is not an answer!\n";
redo INPUT;
}
if ( $response eq $ans ) {
say 'You are right!';
}
elsif ( my $ansln = $current_question->{$response} ) {
if ( $response eq 'q' ) {
say 'Quitting...';
last QUESTION;
}
say <<"END_SAY";
You chose:\n$current_question->{$response}
The correct answer was:\n$current_question->{$ans}
END_SAY
}
else {
say "Invalid response. '$response' is not an answer!\n";
redo INPUT;
}
};
#lines = ();
prompt( 'Press enter to continue.' );
say '';
}
else {
if ( my ( $qn, $q ) = m/^\s*(\d+)\.\)\s+(.*\S)\s*$/ ) {
push #questions, $current_question = { question => $q };
}
else {
my ( $l, $a ) = m/^\s+([a-z])/;
$current_question->{$l} = ( m/(.*)/ )[0];
}
push #lines, $_;
}
}
__DATA__
1.) Perl is
a.) essential
b.) fun
c.) useful
=c
2.) This question is
a.) Number two
b.) A test to see how this format is parsed.
c.) Unneeded
=b
This is probably over simplified.
It just reads in the test data and creates a structure.
You could use it to grade test takers answers.
use strict;
use warnings;
use Data::Dumper;
$/ = undef;
my $testdata = <DATA>;
my %HashTest = ();
my $hchoices;
my $hqeustion;
my $is_question = 0;
while ( $testdata =~ /(^.*)\n/mg )
{
my $line = $1;
$line =~ s/^\s+|\s+$//g;
next if ( length( $line ) == 0);
if ( $line =~ /^(\d+)\s*\.\s*\)\s*(.*)/ )
{
$is_question = 1;
$HashTest{ $1 }{'question'} = $2;
$HashTest{ $1 }{'choices'} = {};
$HashTest{ $1 }{'answer'} = 'unknown';
$hqeustion = $HashTest{ $1 };
$hchoices = $HashTest{ $1 }{'choices'};
}
elsif ( $is_question && $line =~ /^\s*(answer)\s*:\s*([a-z])/ )
{
$hqeustion->{'answer'} = $2;
}
elsif ( $is_question && $line =~ /^\s*([a-z])\s*\.\s*\)\s*(.*)/ )
{
$hchoices->{ $1 } = $2;
}
}
print "\nQ & A summary\n-------------------------\n";
for my $qnum ( keys %HashTest )
{
print "Question $qnum: $HashTest{$qnum}{'question'}'\n";
my $ans_code = $HashTest{$qnum}{'answer'};
print "Answer: ($ans_code) $HashTest{$qnum}{'choices'}{$ans_code}\n\n";
}
print "---------------------------\n";
print Dumper(\%HashTest);
__DATA__
1.) What is the diameter of the earth?
a.) Half the distance to the sun
b.) Same as the moon
c.) 6,000 miles
answer: c
2.) Who is buried in Grants Tomb?
a.) Thomas Edison
b.) Grant, who else
c.) Jimi Hendrix
answer: b
Output:
Q & A summary
-------------------------
Question 1: What is the diameter of the earth?'
Answer: (c) 6,000 miles
Question 2: Who is buried in Grants Tomb?'
Answer: (b) Grant, who else
---------------------------
$VAR1 = {
'1' => {
'question' => 'What is the diameter of the earth?',
'answer' => 'c',
'choices' => {
'c' => '6,000 miles',
'a' => 'Half the distance to the sun',
'b' => 'Same as the moon'
}
},
'2' => {
'question' => 'Who is buried in Grants Tomb?',
'answer' => 'b',
'choices' => {
'c' => 'Jimi Hendrix',
'a' => 'Thomas Edison',
'b' => 'Grant, who else'
}
}
};

Find text enclosed by # and replace the inside

The problem:
Find pieces of text in a file enclosed by # and replace the inside
Input:
#abc# abc #ABC#
cba #cba CBA#
Deisred output:
абц abc АБЦ
cba цба ЦБА
I have the following:
#!/usr/bin/perl
use strict;
use warnings;
use Encode;
my $output;
open FILE,"<", 'test.txt';
while (<FILE>) {
chomp(my #chars = split(//, $_));
for (#chars) {
my #char;
$_ =~ s/a/chr(0x430)/eg;
$_ =~ s/b/chr(0x431)/eg;
$_ =~ s/c/chr(0x446)/eg;
$_ =~ s/d/chr(0x434)/eg;
$_ =~ s/e/chr(0x435)/eg;
$_ =~ s/A/chr(0x410)/eg;
$_ =~ s/B/chr(0x411)/eg;
$_ =~ s/C/chr(0x426)/eg;
push #char, $_;
$output = join "", #char;
print encode("utf-8",$output);}
print "\n";
}
close FILE;
But I'm stuck on how to process further
Thanks for help in advance!
Kluther
Here my solution. (you will fixed it, yes. It is prototype)
for (my $data = <DATA>){
$data=~s/[#]([\s\w]+)[#]/func($1)/ge;
print $data;
# while($data=~m/[#]([\s\w]+)[#]/g){
# print "marked: ",$1,"\n";
# print "position:", pos();
# }
# print "not marked: ";
}
sub func{
#do your magic here ;)
return "<< #_ >>";
}
__DATA__
#abc# abc #ABC# cba #cba CBA#
What happens here?
First, I read data. You can do it yourself.
for (my $data = <DATA>){...}
Next, I need to search your pattern and replace it.
What should I do?
Use substition operator: s/pattern/replace/
But in interesting form:
s/pattern/func($1)/ge
Key g mean Global Search
Key e mean Evaluate
So, I think, that you need to write your own func function ;)
Maybe better to use transliteration operator: tr/listOfSymbolsToBeReplaced/listOfSymbolsThatBePlacedInstead/
With minimal changes to your algorithm you need to keep track of whether you are inside the #marks or not. so add something like this
my $bConvert = 0;
chomp(my #chars = split(//, $_));
for (#chars) {
my $char = $_;
if (/#/) {
$bConvert = ($bConvert + 1) % 2;
next;
}
elsif ($bConvert) {
$char =~ s/a/chr(0x430)/eg;
$char =~ s/b/chr(0x431)/eg;
$char =~ s/c/chr(0x446)/eg;
$char =~ s/d/chr(0x434)/eg;
$char =~ s/e/chr(0x435)/eg;
$char =~ s/A/chr(0x410)/eg;
$char =~ s/B/chr(0x411)/eg;
$char =~ s/C/chr(0x426)/eg;
}
print encode("utf-8",$char);
}
Try this after $output is processed.
$output =~ s/\#//g;
my #split_output = split(//, $output);
$output = "";
my $len = scalar(#split_output) ;
while ($len--) {
$output .= shift(#split_output);
}
print $output;
It can be done with a single regex and no splitting of the string:
use strict;
use warnings;
use Encode;
my %chars = (
a => chr(0x430),
b => chr(0x431),
c => chr(0x446),
d => chr(0x434),
e => chr(0x435),
A => chr(0x410),
B => chr(0x411),
C => chr(0x426),
);
my $regex = '(' . join ('|', keys %chars) . ')';
while (<DATA>) {
1 while ($_ =~ s|\#(?!\s)[^#]*?\K$regex(?=[^#]*(?!\s)\#)|$chars{$1}|eg);
print encode("utf-8",$_);
}
It does require repeated runs of the regex due to the overlapping nature of the matches.

Perl - Parsing Arguments/Options with REGEX

I'm creating a perl script to convert a list of commands in a template file () and output them to another file in a different format in an output file ().
The commands in the template file will look as follows:
command1 --max-size=2M --type="some value"
I'm having some problems extracting the options and values from this string. So far i have:
m/(\s--\w*=)/ig
Which will return:
" --max-size="
" --type="
However I have no idea how to return both the option and value as a separate variable or how to accommodate for the use of quotes.
Could anyone steer me in the right direction?
side note: I'm aware that Getops does an awesome job at doing this from the command-line but unfortunately these commands are passed as strings :(
Getopt::Std or Getopt::Long?
Have you looked at this option or this one?
Seems like there's no reason to reinvent the wheel.
The code below produces
#args = ('command1', '--max-size=2M', '--type=some value');
That is suitable to pass to GetOptions as follows:
local #ARGV = #args;
GetOptions(...) or die;
Finally, the code:
for ($cmd) {
my #args;
while (1) {
last if /\G \s* \z /xgc;
/\G \s* /xgc;
my $arg;
while (1) {
if (/\G ([^\\"'\s]) /xgc) {
$arg .= $1;
}
elsif (/\G \\ /xgc) {
/\G (.) /sxgc
or die "Incomplete escape";
$arg .= $1;
}
elsif (/\G (?=") /xgc) {
/\G " ( (?:[^"\\]|\\.)* ) " /sxgc
or die "Incomplete double-quoted arging";
my $quoted = $1;
$quoted =~ s/\\(.)/$1/sg;
$arg .= $quoted;
}
elsif (/\G (?=') /xgc) {
/\G ' ( [^']* ) ' /xgc
or die "Incomplete single-quoted arging";
$arg .= $1;
}
else {
last;
}
}
push #args, $arg;
}
#args
or die "Blank command";
...
}
use Data::Dumper;
$_ = 'command1 --max-size=2M a=ignore =ignore --switch --type="some value" --x= --z=1';
my %args;
while (/((?<=\s--)[a-z\d-]+)(?:="?|(?=\s))((?<![="])|(?<=")[^"]*(?=")|(?<==)(?!")\S*(?!"))"?(?=\s|$)/ig) {
$args->{$1} = $2;
}
print Dumper($args);
---
$VAR1 = {
'switch' => '',
'x' => '',
'type' => 'some value',
'z' => '1',
'max-size' => '2M'
};
(test this demo here)

Perl regex syntax generation

This is a follow up to the question posted here: Perl Regex syntax
The results from that discussion yielded this script:
#!/usr/bin/env perl
use strict;
use warnings;
my #lines = <DATA>;
my $current_label = '';
my #ordered_labels;
my %data;
for my $line (#lines) {
if ( $line =~ /^\/(.*)$/ ) { # starts with slash
$current_label = $1;
push #ordered_labels, $current_label;
next;
}
if ( length $current_label ) {
if ( $line =~ /^(\d) "(.*)"$/ ) {
$data{$current_label}{$1} = $2;
next;
}
}
}
for my $label ( #ordered_labels ) {
print "$label <- as.factor($label\n";
print " , levels= c(";
print join(',',map { $_ } sort keys %{$data{$label}} );
print ")\n";
print " , labels= c(";
print join(',',
map { '"' . $data{$label}{$_} . '"' }
sort keys %{$data{$label}} );
print ")\n";
print " )\n";
}
__DATA__
...A bunch of nonsense I do not care about...
...
Value Labels
/gender
1 "M"
2 "F"
/purpose
1 "business"
2 "vacation"
3 "tiddlywinks"
execute .
Essentially, I need to build the Perl to accommodate a syntax shorthand found in the SPSS file. For adjacent columns, SPSS allows one to type something like:
VALUE LABELS
/agree1 to agree5
1 "Strongly disagree"
2 "Disagree"
3 "Neutral"
4 "Agree"
5 "Strongly agree"
As the script currently exists, it will generate this:
agree1 to agree5 <- factor(agree1 to agree5
, levels= c(1,2,3,4,5,6)
, labels= c("Strongly disagree","Disagree","Neutral","Agree","Strongly agree","N/A")
)
and I need it to produce something like this:
agree1 <- factor(agree1
, levels= c(1,2,3,4,5,6)
, labels= c("Strongly disagree","Disagree","Neutral","Agree","Strongly agree","N/A")
)
agree2 <- factor(agree2
, levels= c(1,2,3,4,5,6)
, labels= c("Strongly disagree","Disagree","Neutral","Agree","Strongly agree","N/A")
)
…
use strict;
use warnings;
main();
sub main {
my #lines = <DATA>;
my $vlabels = get_value_labels(#lines);
write_output_delim($vlabels);
}
# Extract the value label information from SPSS syntax.
sub get_value_labels {
my (#vlabels, $i, $j);
for my $line (#_){
if ( $line =~ /^\/(.+)/ ){
my #vars = parse_var_range($1);
$i = #vlabels;
$j = $i + #vars - 1;
push #vlabels, { var => $_, codes => [] } for #vars;
}
elsif ( $line =~ /^\s* (\d) \s+ "(.*)"$/x ){
push #{$vlabels[$_]{codes}}, [$1, $2] for $i .. $j;
}
}
return \#vlabels;
}
# A helper function to handle variable ranges: "agree1 to agree3".
sub parse_var_range {
my $vr = shift;
my #vars = split /\s+ to \s+/x, $vr;
return $vr unless #vars > 1;
my ($stem) = $vars[0] =~ /(.+?)\d+$/;
my #n = map { /(\d+)$/ } #vars;
return map { "$stem" . $_ } $n[0] .. $n[1];
}
sub write_output_delim {
my $vlabels = shift;
for my $vlab (#$vlabels){
print $vlab->{var}, "\n";
print join("\t", '', #$_), "\n" for #{$vlab->{codes}}
}
}
sub write_output_factors {
# You get the idea...
}
__DATA__
/gender
1 "M"
2 "F"
/purpose
1 "business"
2 "vacation"
3 "tiddlywinks"
/agree1 to agree3
1 "Disagree"
2 "Neutral"
3 "Agree"