I am running the following code:
use strict;
use warnings;
use Data::Dumper;
use File::HomeDir;
use File::Temp ();
use File::Spec;
open my $output, '<', '/tmp/cs.txt';
my #color_clusters;
my $image_number = 0;
my $image_name = undef;
my $last_image_name = '';
my $line = undef;
for $line (<$output>) {
chomp($line);
print "***${line}***\n";
# image (file) name -> ^\S+
# cluster number -> cluster \d,
# HEX -> hex #([0-9A-Z])6,
# Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
# Color Category -> (empty at the moment)
# Pixels -> f 0.\d+
# R, G, B -> rgb \d+ \d+ \d+
# H, S, V -> hsv \d+ \d+ \d+
$line =~ m/
^(?<IMAGE_NAME>.+) # image file name
\ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
\ f\ (?<PIXELS>[-]?[0-9]+[,.]?[0-9]*) # percent of pixels belonging to this cluster
\ rgb\ (?<RED>\d+)\ (?<GREEN>\d+)\ (?<BLUE>\d+)
\ hex\ \#(?<HEX>[0-9A-F]+) # Hexadecimal notation used in HTML
\ hsv\ (?<HUE>\d+)\ (?<SATURATION>\d+)\ (?<VALUE>\d+)
\ .+\ (?<CLUSTER_COLOR>\w+)\[
/x;
$image_name = $+{IMAGE_NAME};
if ($last_image_name ne $image_name) {
$last_image_name = $image_name;
$image_number++;
}
my $cluster_number = int($+{CLUST_NUM}) + 1; # convert to 1 based
my $pixels = $+{PIXELS};
if ($pixels) {
$pixels = ''. int((0 + $pixels) * 100). '%'
}
my $cluster_color = $+{CLUSTER_COLOR};
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
my %color_cluster = (
image_num => $image_number,
image_name => $image_name,
cluster_number => $cluster_number,
hex_code => $+{HEX},
cluster_color => $cluster_color,
color_category => '', # currently empty, will be calculated from HSV values
pixels => ''. int($+{PIXELS} * 100). '%', # percent of pixels within this cluster
r => $+{RED}, g => $+{GREEN}, b => $+{BLUE},
h => $+{HUE}, s => $+{SATURATION}, v => $+{VALUE}
);
push #color_clusters, %color_cluster;
print Dumper \%color_cluster;
$line =~ m/^.+$/;
} # end of for loop
on input that looks like this:
IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98 -2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99 -1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,255,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,249](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ceramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white
IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3 113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eighth_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey
IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 6 n 60 f 0.0545454545454545 rgb 249 250 243 hex #F9FAF3 hsv 67 3 98 lab 98 -2 3 lch 98 4 114 xyz 0.9 0.95 0.98 cmyk 0 0 3 2 bianca[1402][252,251,243](0.9):eighth_pearl_lusta[3414][249,248,240](1.4):quarter_bianca[6922][249,248,240](1.4):spring_wood[7933][248,246,241](1.9):ceramic[2174][252,255,249](2.0):hint_of_grey[4499][252,255,249](2.0):sea_fog[7554][252,255,249](2.0):wan_white[8990][252,255,249](2.0):orchid_white[6246][255,253,243](2.1):quarter_pearl_lusta[6978][255,253,244](2.1) 10 bianca:ceramic:eighth:fog:hint:lusta:of:orchid:pearl:quarter:sea:spring:wan:wood:grey:white
IMG_0069_result.JPG cluster 7 n 58 f 0.0527272727272727 rgb 250 251 246 hex #FAFBF6 hsv 69 2 98 lab 98 -1 2 lch 98 2 116 xyz 0.9 0.96 1.01 cmyk 0 0 2 2 snow_drift[7811][247,250,247](1.6):baby_powder[1248][254,254,250](1.6):bianca[1402][252,251,243](1.8):quarter_alabaster[6916][247,246,242](1.9):ceramic[2174][252,255,249](1.9):hint_of_grey[4499][252,255,249](1.9):sea_fog[7554][252,255,249](1.9):wan_white[8990][252,255,249](1.9):spring_wood[7933][248,246,241](2.0):eighth_pearl_lusta[3414][249,248,240](2.2) 10 alabaster:baby:bianca:ceramic:drift:eighth:fog:hint:lusta:of:pearl:powder:quarter:sea:snow:spring:wan:wood:grey:white
The input is the output of colorsummarizer a program written in Perl that summarizes the colors of images (http://mkweb.bcgsc.ca/color-summarizer/).
Since I am using Perl, I could call the libraries directly rather than run the command line from Perl, but I decided to run the command line since it is easier or least was supposed to be easier...
When running the code above, although all of the lines look very similar to each other regarding their structure, some of the lines are being parsed correctly, while others are not.
Here is part of the output I am getting (STDOUT and STDIN interleaved):
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
***IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98
-2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][
249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_whit
e[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8
):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246
,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue
:white***
$VAR1 = {
'pixels' => '6%',
'b' => '240',
's' => '3',
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'h' => '67',
'cluster_number' => 1,
'color_category' => '',
'r' => '248',
'v' => '98',
'g' => '249',
'cluster_color' => 'bianca',
'hex_code' => 'F8F9F0'
};
***IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99
-1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,25
5,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,2
49](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](
2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ce
ramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white***
$VAR1 = {
'cluster_number' => 2,
'h' => undef,
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'b' => undef,
'pixels' => '0%',
's' => undef,
'g' => undef,
'r' => undef,
'color_category' => '',
'v' => undef,
'hex_code' => undef,
'cluster_color' => 'baby powder'
};
***IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3
113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eigh
th_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,
246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey***
$VAR1 = {
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'h' => '65',
'cluster_number' => 3,
'pixels' => '6%',
'b' => '244',
's' => '3',
'hex_code' => 'FAFAF4',
'cluster_color' => 'bianca',
'g' => '250',
'color_category' => '',
'r' => '250',
'v' => '98'
};
***IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
'cluster_number' => 4,
'h' => undef,
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'b' => undef,
'pixels' => '0%',
's' => undef,
'g' => undef,
'r' => undef,
'color_category' => '',
'v' => undef,
'hex_code' => undef,
'cluster_color' => 'twilight blue'
};
***IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
'b' => '240',
'pixels' => '5%',
's' => '4',
'h' => '65',
'cluster_number' => 5,
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'r' => '248',
'color_category' => '',
'v' => '97',
'g' => '249',
'hex_code' => 'F8F9F0',
'cluster_color' => 'bianca'
};
***IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
's' => '4',
'pixels' => '5%',
'b' => '240',
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'h' => '63',
'cluster_number' => 6,
'v' => '98',
'color_category' => '',
'r' => '249',
'g' => '249',
'hex_code' => 'F9F9F0',
'cluster_color' => 'bianca'
};
...
I found the cause of the problem.
The problem is in these lines:
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
On output lines where there are underscores in $cluster_color,
the second line alters the capture buffer, hence resetting the other capture groups.
The solution was to assign each capture group to its own variable, and add the 3 lines above after all the capture groups have been assigned to their respective variables.
So, assigning directly to the hash was a bad idea :-)
I also improved my regex to be more precise, although this was not the cause of the problem, but simply the ... =~ tr/_/\ /; that altered the capture buffer.
Bellow is my working code:
use strict;
use warnings;
use Data::Dumper;
use File::HomeDir;
use File::Temp ();
use File::Spec;
open my $output, '<', '/tmp/cs8.txt';
my #color_clusters;
my $image_number = 0;
my $image_name = undef;
my $last_image_name = '';
my $line = undef;
for $line (<$output>) {
chomp($line);
print "***${line}***\n";
# image (file) name -> ^\S+
# cluster number -> cluster \d,
# HEX -> hex #([0-9A-Z])6,
# Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
# Color Category -> (empty at the moment)
# Pixels -> f 0.\d+
# R, G, B -> rgb \d+ \d+ \d+
# H, S, V -> hsv \d+ \d+ \d+
$line =~ m/
^(?<IMAGE_NAME>.+) # image file name
\ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
\ f\ (?<PIXELS>[0-9]+\.?[0-9]*) # percent of pixels belonging to this cluster
\ rgb\ (?<RED>[0-9]{1,3})\ (?<GREEN>[0-9]{1,3})\ (?<BLUE>[0-9]{1,3})
\ hex\ \#(?<HEX>[0-9A-F]{6}) # Hexadecimal notation used in HTML
\ hsv\ (?<HUE>[0-9]{1,3})\ (?<SATURATION>[0-9]{1,3})\ (?<VALUE>[0-9]{1,3})
\ .+\ (?<CLUSTER_COLOR>\w+)\[
/x;
$image_name = $+{IMAGE_NAME};
if ($last_image_name ne $image_name) {
$last_image_name = $image_name;
$image_number++;
}
my $cluster_number = $+{CLUST_NUM};
if (defined $cluster_number) {
$cluster_number = 1 + $cluster_number; # convert to 1 based
}
my $pixels = $+{PIXELS};
if (defined $pixels) {
$pixels = ''. int((0 + $pixels) * 100). '%'
}
my $cluster_color = $+{CLUSTER_COLOR};
my $hex = $+{HEX};
my ($red, $green, $blue) = ($+{RED}, $+{GREEN}, $+{BLUE});
my ($hue, $saturation, $value) = ($+{HUE}, $+{SATURATION}, $+{VALUE});
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
my %color_cluster = (
image_num => $image_number,
image_name => $image_name,
cluster_number => $cluster_number,
hex_code => $hex,
cluster_color => $cluster_color,
color_category => '', # currently empty, will be calculated from HSV values
pixels => $pixels, # percent of pixels within this cluster
r => $red, g => $green, b => $blue,
h => $hue, s => $saturation, v => $value,
);
push #color_clusters, %color_cluster;
print Dumper \%color_cluster;
} # end of for loop
Many thanks #Yunnosch for your comments, they gave me hints regarding the right direction.
Cheers,
Asaf