Perl Regex Filename Not Matching - regex

I'm running a Perl script that someone else wrote for me, and I've noticed that the following regex doesn't seem to match my files which are named "Film CD 1.avi", "Film2 CD 1.avi" etc:
$name = (split (/[ ]*CD[ ]*1/i, $in_avi))[0];
Anyone know what I need to do to correct it?
/Edit: The full code is this:
#!/usr/bin/perl
use strict;
########################################################################
# Configuration Details #
########################################################################
# Array of movies to convert
my #MOVIES_TO_CONVERT; # = ('The Figher', 'The Fifth Element');
# Directory where the split AVI files are stored
my $MOVIE_IN_PATH = 'D:\split';
# Directory where the combined AVI files will be stored
my $MOVIE_OUT_PATH = 'D:\combined';
# Full path to the avidemux executable
my $AVIDEMUX = 'C:\Program Files (x86)\Avidemux\avidemux.exe';
########################################################################
# Functions #
########################################################################
# Return an array of all the AVI files in the specified directory.
sub get_avis_in_directory
{
my $dh; # Directory handle
my $dir; # Current directory
my #avis; # Array of file names to return
opendir ($dh, $dir = shift) or die "Failed to open directory $dir: $!\n";
while (readdir $dh)
{
next if (/^\.{1,2}/);
$_ = $dir . "\\" . $_;
push (#avis, $_) if (-f $_ and /.*\.avi$/i);
}
closedir $dh;
return (#avis);
}
########################################################################
# Entry Point #
########################################################################
die "Input directory $MOVIE_IN_PATH does not exist!\n" unless (-d $MOVIE_IN_PATH);
die "Output directory $MOVIE_OUT_PATH does not exist!\n" unless (-d $MOVIE_OUT_PATH);
# This variable represents the actual names and paths of movies to be converted.
# It will either be built from the files specified in #MOVIES_TO_CONVERT manually, or
# built dynamically based on the files in the source and destination paths.
my #movies_formatted; # Array of hashes of movies to convert
if ($#MOVIES_TO_CONVERT == -1)
{
my #in_avis; # Array of AVI files in the input directory
my #out_avis; # Array of AVI files in the ouput directory
#in_avis = get_avis_in_directory ($MOVIE_IN_PATH);
#out_avis = get_avis_in_directory ($MOVIE_OUT_PATH);
for my $in_avi (#in_avis)
{
if ($in_avi =~ /.*[ ]*CD[ ]*1\.avi$/i)
{
my $rec; # Temporary hash variable
my $name; # Name of the move we are processing
$name = (split (/[ ]*CD[ ]*1/i, $in_avi))[0];
$name = (split (/$MOVIE_IN_PATH[\\\/]{1}/i, $name))[1];
for my $in_avi_2 (#in_avis)
{
if ($in_avi_2 =~ /^$MOVIE_IN_PATH\\$name[ ]*CD[ ]*2\.avi$/i)
{
$rec->{'part2'} = $in_avi_2;
last;
}
}
if (defined $rec->{'part2'})
{
for my $out_avi (#out_avis)
{
if ($out_avi =~ /$name\.avi$/i)
{
$rec->{'output'} = $out_avi;
last;
}
}
unless (defined $rec->{'output'})
{
$rec->{'part1'} = $in_avi;
$rec->{'output'} = "$MOVIE_OUT_PATH\\$name.avi";
push (#movies_formatted, $rec);
}
}
}
}
}
else
{
my $rec; # Temporary hash variable
for my $name (#MOVIES_TO_CONVERT)
{
$rec = {};
$rec->{'part1'} = "$MOVIE_IN_PATH\\$name CD 1.avi";
$rec->{'part2'} = "$MOVIE_IN_PATH\\$name CD 2.avi";
$rec->{'output'} = "$MOVIE_OUT_PATH\\$name.avi";
push (#movies_formatted, $rec);
}
}
for my $movie (#movies_formatted)
{
my $convert_cmd = "\"$AVIDEMUX\" --load \"" . $movie->{'part1'} . "\" --append \"" . $movie->{'part2'} . "\" --force-smart --save \"" . $movie->{'output'} . "\" --quit";
print "$convert_cmd\n";
die "Unable to convert $movie->{'output'}!\n" if (system "$convert_cmd");
}
Script from http://www.neowin.net/forum/topic/1128384-batch-file-to-run-command-on-joining-video-files by xorangekiller.

The backslash in $MOVIE_IN_PATH is being removed by interpolating it into the regular expression. Change both the split commands to
$name = (split(/\Q$MOVIE_IN_PATH\E[\\\/]{1}/i, $name))[1];
i.e. add the \Q ... \E to escape any regex metacharacters.
Your friend doesn't appear to be a particularly good Perl programmer. You may want to try this code instead.
#!/usr/bin/perl
use strict;
use warnings;
use File::Basename;
use File::Spec;
# Configuration Details
#######################
# Array of movies to convert
my #MOVIES_TO_CONVERT; # = ('The Fighter', 'The Fifth Element');
# Directory where the split AVI files are stored
my $MOVIE_IN_PATH = 'D:\split';
# Directory where the combined AVI files will be stored
my $MOVIE_OUT_PATH = 'D:\combined';
# Full path to the avidemux executable
my $AVIDEMUX = 'C:\Program Files (x86)\Avidemux\avidemux.exe';
sub get_avis_in_directory {
opendir my $dh, $MOVIE_IN_PATH or die $!;
return grep {
/\.avi$/ and -f File::Spec->catfile($MOVIE_IN_PATH, $_)
} readdir $dh;
}
-d $MOVIE_IN_PATH or die qq(Input directory "$MOVIE_IN_PATH" does not exist!\n);
-d $MOVIE_OUT_PATH or die qq(Output directory "$MOVIE_OUT_PATH" does not exist!\n);
my #movies_formatted; # Array of hashes of movies to convert
if (#MOVIES_TO_CONVERT) {
for my $name (#MOVIES_TO_CONVERT) {
my $rec = {};
$rec->{part1} = File::Spec->catfile($MOVIE_IN_PATH, "$name CD 1.avi");
$rec->{part2} = File::Spec->catfile($MOVIE_IN_PATH, "$name CD 2.avi");
$rec->{output} = File::Spec->catfile($MOVIE_OUT_PATH, "$name.avi");
push #movies_formatted, $rec;
}
}
else {
my #in_avis = get_avis_in_directory($MOVIE_IN_PATH);
for my $in_avi (#in_avis) {
next unless $in_avi =~ /^(.+?)\s*CD\s*1\.avi$/i;
my $name = $1;
my $rec = {};
$rec->{part1} = $in_avi;
($rec->{part2}) = grep /^\Q$name\E\s*CD\s*2\.avi$/i, #in_avis;
if ($rec->{part2}) {
$rec->{$_} = File::Spec->catfile($MOVIE_IN_PATH, $rec->{$_}) for qw/ part1 part2 /;
$rec->{output} = File::Spec->catfile($MOVIE_OUT_PATH, "$name.avi");
push #movies_formatted, $rec;
}
}
}
for my $movie (#movies_formatted) {
my $convert_cmd = sprintf '"%s" --load "%s" --append "%s" --force-smart --save "%s" --quit',
$AVIDEMUX, #{$movie}{qw/ part1 part2 output /};
print "$convert_cmd\n";
die "Unable to convert $movie->{'output'}!\n" if (system $convert_cmd);
}

In what way doesn't it work?
use warnings;
use strict;
while (<DATA>) {
my $in_avi = $_;
my $name = (split (/[ ]*CD[ ]*1/i, $in_avi))[0];
print "$name\n";
}
__DATA__
Film CD 1.avi
Film2 CD 1.avi
Prints:
Film
Film2

Related

How can I filter the latest occurring filename from a certain directory that matches a filetype?

I wrote this to check if there's a file created at a certain directory and give me the newest file. How do I modify my regex to only pick up the latest file of a certain file type (say only .txt)?
my $DIR="C:\\Dropbox\\AnalysesLogs";
opendir(my $DH, $DIR) or die "Error opening $DIR: $!";
my #files = map { [ stat "$DIR/$_", $_ ] } grep(! /^\.\.?$/, readdir($DH));
closedir($DH);
sub rev_by_date { $b->[9] <=> $a->[9] }
my #sorted_files = sort rev_by_date #files;
my #newest = #{$sorted_files[0]};
my $newestfile = pop(#newest);
Currently, $newestfile would be the latest file, but it is an undesired type. I only wish to see the very latest .txt type.
Use grep again:
my #files = map { [ stat "$DIR/$_", $_ ] } grep { /\.txt$/ } grep(! /^\.\.?$/, readdir($DH));
This assumes you have no subdirectories with a .txt suffix.
This can be further simplified since it'll filter out the 2 dot directories:
my #files = map { [ stat "$DIR/$_", $_ ] } grep { /\.txt$/ } readdir($DH);
You could also do, instead of readdir:
my #files = map { [ stat $_, $_ ] } <$DIR/*.txt>;
See glob operator.

Perl: Splitting a variable containing a path to directory

So I'm having a problem trying to split a variable that has the complete path to a directory. I want to fetch the files under 'logs' directory of this path:
'/nfs/fm/disks/fm_mydiskhere_00000/users/me/repotest/myrepo/tools/toolsdir/logs/*';
and in my Perl script, I have a variable that contains this path: '$log_file'. When I print '$log_file', it contains the entire path; I want to go to the last directory 'logs' and fetch the files under it. By the way, this complete path is in a separate configuration file that is being read by my Perl script this way:
sub read_file {
my ($log_file) = shift(#_);
info ("Using file : $log_file");
my $fh = new FileHandle ("$log_file");
printError ("Could not open this file : '$log_file' - $!") unless defined $fh;
my $contents;
{
local $/ = undef;
$contents = <$fh>;
}
$fh->close();
eval $contents;
if ($#) {
chomp $#;
my $msg = "BAD (perl) syntax in file:\n\n$#\n";
if ( $msg =~ /requires explicit package name/ ) {
$msg .= "\n -> A 'requires explicit package name' message means".
" a NON-VALID variable name was found\n";
}
die "Error: $msg\n\n";
}
return 1;
}
And I'm using variable $log_file in another subroutine this way:
my $fh = read_file ($log_file);
if ($log_file eq "abc.txt"){
while (my $line = <$fh>) {
#do something
}
}
Can anybody help me here, please? Am I missing something or using $log_file in the wrong way?
Thanks in advance!
Try:
my #files = glob($log_file);
use Data::Dumper;
print Dumper(\#files);

Write to file a multiple line string variable

I am trying to extract the content of a text file between two tags and store it to another file.
I manage to convert the input file to a multiple line string variable then use regexp successfully to get what I want in the variable.
But I fail writing my variable to a file, I assume this is because of the type of string with multiple \n inside.
I would appreciate any help. (This my first Perl Script…)
For the test, I use a index.html file but can be any text file.
Edit : solved, see correction in comments
Here below my documented code :
# Extract string between two tags
use strict;
use warnings;
my $inputfile = "";
my $outputfile = "";
# Parse Macro Arguments arguments
if(#ARGV < 2)
{
print "Usage: perl Macro_name.pl <inputfile.HTML> <outfile.HTML>\n";
exit(1);
}
$inputfile = $ARGV[0];
$outputfile = $ARGV[1];
my $body="";
# Convert input file to multiple line string #
$body = File_to_Var_Multi_Line($inputfile);
# First tag & Second tag match
if ( $body =~ /(.*)<body(.*?)>(.*)<\/body>/s )
{ # error :
my $body = $3; # $body is local here
# correction :
#Print to check if extract ok # declare another variable outside if
print $body, "\n";
}
# Write to file my match multiple line string #
open(my $fh_body, '>:encoding(UTF-8)', $outputfile)
or die "Could not open file '$outputfile' $!";
print $fh_body "$body\n";
close $fh_body;
# sub #
sub File_to_Var_Multi_Line
{
if(#_ < 1)
{
print "Usage: line=File_to_Var_Multi_Line<file>\n";
exit(1);
}
my $inputfile_2 = "";
$inputfile_2 = $_[0];
open(my $fl_in, '<:encoding(UTF-8)', $inputfile_2)
or die "Could not open file '$inputfile_2' $!";
my $line = "";
my $row_2 = "";
while (my $row_2 = <$fl_in>)
{
$line .= $row_2;
}
return $line
}
And the input test file :
<html>
<body>
page 1<br>
page 2<br>
page 3<br>
page 4<br>
page 5<br>
</body>
</html>
Notwithstanding RegEx match open tags except XHTML self-contained tags
You may find useful the 'range operator' for iterating through a file.
For example:
while ( <$fl_in> ) {
if ( m,<BODY>,i .. m,</BODY>,i ) {
print;
}
}
The condition will be true, if you're within the 'body' tags. (Although it's line oriented, so trailing stuff will be 'caught').

How to match a regex pattern for multiple files under a directory in perl?

I wrote a script to match a pattern and return a statement for a file
#!/usr/bin/perl
use strict;
use warnings;
my $file = '/home/Sidtest/sid.txt';
open my $info , $file or die " Couldn't open the $file:$!";
while( my $line = <$info>) {
if ($line =~ m/^#LoadModule ssl_module/) {
print "FileName =",$file," Status = Failed \n";
}
elsif ($line =~ m/^LoadModule ssl_module/) {
print "FileName =",$file," Status = Passed \n";
}
}
close $info;
So now I am trying to modify this script to work for multiple files under the same directory. I haven't been able to do that successfully. Can anyone please help in how I can make it work for any number of files in a directory.
This will read every file in ./directory and foreach file, print out each line.
The print statement can be altered to print if /match/, or whatever you want:
my #dir = <directory/*>;
foreach my $file (#dir){
open my $input, '<', $file;
while (<$input>){
print "PASS: $_\n" if m/^#LoadModule ssl_module/;
[...]
}
}
The variable #ARGV contains a list of arguments sent to the script when started. Loop through #ARGV and call the script with the files you want to process:
#!/usr/bin/perl
use strict;
use warnings;
foreach my $file (#ARGV) {
open my $info , $file or die " Couldn't open the $file:$!";
while( my $line = <$info>) {
if ($line =~ m/^#LoadModule ssl_module/) {
print "FileName =",$file," Status = Failed \n";
}
elsif ($line =~ m/^LoadModule ssl_module/) {
print "FileName =",$file," Status = Passed \n";
}
}
close $info;
}
# process all files *.txt in your dir: ./myscript.pl /home/Sidtest/*.txt
Check perldoc perlrun, and look at the -p and -n parameters. Essentially, they treat your script as if it were the contents of a loop over stdin, where stdin is generated by iterating through the files supplied on the command line. The name of the file currently-being-processed can be accessed using the $ARGV variable.
So, you might go for an approach where your whole script looks more like this, using the -n param, where $_ contains the current line.:
if ( m/^#LoadModule ssl_module/) {
print "FileName =",$ARGV" Status = Failed \n";
} elsif (m/^LoadModule ssl_module/) {
print "FileName =",$ARGV," Status = Passed \n";
}

Finding the last "}" of a subroutine

Supposed I have a file with Perl-code: does somebody know, if there is a module which could find the closing "}" of a certain subroutine in that file.
For example:
#!/usr/bin/env perl
use warnings;
use 5.012;
routine_one( '{°^°}' );
routine_two();
sub routine_one {
my $arg = shift;
if ( $arg =~ /}\z/ ) {
say "Hello my }";
}
}
sub routine_two {
say '...' for 0 .. 10
}
The module should be able to remove the whole routine_one or it should can tell me the line-number of the closing "}" from that routine.
You want to use PPI if you are going to be parsing Perl code.
#!/usr/bin/env perl
use warnings;
use 5.012;
use PPI;
my $file = 'Example.pm';
my $doc = PPI::Document->new( $file );
$doc->prune( 'PPI::Token::Pod' );
$doc->prune( 'PPI::Token::Comment' );
my $subs = $doc->find( sub { $_[1]->isa('PPI::Statement::Sub') and $_[1]->name eq 'layout' } );
die if #$subs != 1;
my $new = PPI::Document->new( \qq(sub layout {\n say "my new layout_code";\n}) );
my $subs_new = $new->find( sub { $_[1]->isa('PPI::Statement::Sub') and $_[1]->name eq 'layout' } );
$subs->[0]->block->insert_before( $subs_new->[0]->block ) or die $!;
$subs->[0]->block->remove or die $!;
# $subs->[0]->replace( $subs_new->[0] );
# The ->replace method has not yet been implemented at /usr/local/lib/perl5/site_perl/5.12.2/PPI/Element.pm line 743.
$doc->save( $file ) or die $!;
The following will work in case your subroutines don't contain any blank lines, like the one in your example:
#!/usr/bin/perl -w
use strict;
$^I = ".bkp"; # to create a backup file
{
local $/ = ""; # one paragraph constitutes one record
while (<>) {
unless (/^sub routine_one \{.+\}\s+$/s) { # 's' => '.' will also match "\n"
print;
}
}
}