I am running the following code:
use strict;
use warnings;
use Data::Dumper;
use File::HomeDir;
use File::Temp ();
use File::Spec;
open my $output, '<', '/tmp/cs.txt';
my #color_clusters;
my $image_number = 0;
my $image_name = undef;
my $last_image_name = '';
my $line = undef;
for $line (<$output>) {
chomp($line);
print "***${line}***\n";
# image (file) name -> ^\S+
# cluster number -> cluster \d,
# HEX -> hex #([0-9A-Z])6,
# Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
# Color Category -> (empty at the moment)
# Pixels -> f 0.\d+
# R, G, B -> rgb \d+ \d+ \d+
# H, S, V -> hsv \d+ \d+ \d+
$line =~ m/
^(?<IMAGE_NAME>.+) # image file name
\ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
\ f\ (?<PIXELS>[-]?[0-9]+[,.]?[0-9]*) # percent of pixels belonging to this cluster
\ rgb\ (?<RED>\d+)\ (?<GREEN>\d+)\ (?<BLUE>\d+)
\ hex\ \#(?<HEX>[0-9A-F]+) # Hexadecimal notation used in HTML
\ hsv\ (?<HUE>\d+)\ (?<SATURATION>\d+)\ (?<VALUE>\d+)
\ .+\ (?<CLUSTER_COLOR>\w+)\[
/x;
$image_name = $+{IMAGE_NAME};
if ($last_image_name ne $image_name) {
$last_image_name = $image_name;
$image_number++;
}
my $cluster_number = int($+{CLUST_NUM}) + 1; # convert to 1 based
my $pixels = $+{PIXELS};
if ($pixels) {
$pixels = ''. int((0 + $pixels) * 100). '%'
}
my $cluster_color = $+{CLUSTER_COLOR};
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
my %color_cluster = (
image_num => $image_number,
image_name => $image_name,
cluster_number => $cluster_number,
hex_code => $+{HEX},
cluster_color => $cluster_color,
color_category => '', # currently empty, will be calculated from HSV values
pixels => ''. int($+{PIXELS} * 100). '%', # percent of pixels within this cluster
r => $+{RED}, g => $+{GREEN}, b => $+{BLUE},
h => $+{HUE}, s => $+{SATURATION}, v => $+{VALUE}
);
push #color_clusters, %color_cluster;
print Dumper \%color_cluster;
$line =~ m/^.+$/;
} # end of for loop
on input that looks like this:
IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98 -2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99 -1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,255,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,249](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ceramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white
IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3 113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eighth_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey
IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 6 n 60 f 0.0545454545454545 rgb 249 250 243 hex #F9FAF3 hsv 67 3 98 lab 98 -2 3 lch 98 4 114 xyz 0.9 0.95 0.98 cmyk 0 0 3 2 bianca[1402][252,251,243](0.9):eighth_pearl_lusta[3414][249,248,240](1.4):quarter_bianca[6922][249,248,240](1.4):spring_wood[7933][248,246,241](1.9):ceramic[2174][252,255,249](2.0):hint_of_grey[4499][252,255,249](2.0):sea_fog[7554][252,255,249](2.0):wan_white[8990][252,255,249](2.0):orchid_white[6246][255,253,243](2.1):quarter_pearl_lusta[6978][255,253,244](2.1) 10 bianca:ceramic:eighth:fog:hint:lusta:of:orchid:pearl:quarter:sea:spring:wan:wood:grey:white
IMG_0069_result.JPG cluster 7 n 58 f 0.0527272727272727 rgb 250 251 246 hex #FAFBF6 hsv 69 2 98 lab 98 -1 2 lch 98 2 116 xyz 0.9 0.96 1.01 cmyk 0 0 2 2 snow_drift[7811][247,250,247](1.6):baby_powder[1248][254,254,250](1.6):bianca[1402][252,251,243](1.8):quarter_alabaster[6916][247,246,242](1.9):ceramic[2174][252,255,249](1.9):hint_of_grey[4499][252,255,249](1.9):sea_fog[7554][252,255,249](1.9):wan_white[8990][252,255,249](1.9):spring_wood[7933][248,246,241](2.0):eighth_pearl_lusta[3414][249,248,240](2.2) 10 alabaster:baby:bianca:ceramic:drift:eighth:fog:hint:lusta:of:pearl:powder:quarter:sea:snow:spring:wan:wood:grey:white
The input is the output of colorsummarizer a program written in Perl that summarizes the colors of images (http://mkweb.bcgsc.ca/color-summarizer/).
Since I am using Perl, I could call the libraries directly rather than run the command line from Perl, but I decided to run the command line since it is easier or least was supposed to be easier...
When running the code above, although all of the lines look very similar to each other regarding their structure, some of the lines are being parsed correctly, while others are not.
Here is part of the output I am getting (STDOUT and STDIN interleaved):
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
***IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98
-2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][
249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_whit
e[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8
):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246
,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue
:white***
$VAR1 = {
'pixels' => '6%',
'b' => '240',
's' => '3',
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'h' => '67',
'cluster_number' => 1,
'color_category' => '',
'r' => '248',
'v' => '98',
'g' => '249',
'cluster_color' => 'bianca',
'hex_code' => 'F8F9F0'
};
***IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99
-1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,25
5,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,2
49](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](
2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ce
ramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white***
$VAR1 = {
'cluster_number' => 2,
'h' => undef,
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'b' => undef,
'pixels' => '0%',
's' => undef,
'g' => undef,
'r' => undef,
'color_category' => '',
'v' => undef,
'hex_code' => undef,
'cluster_color' => 'baby powder'
};
***IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3
113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eigh
th_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,
246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey***
$VAR1 = {
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'h' => '65',
'cluster_number' => 3,
'pixels' => '6%',
'b' => '244',
's' => '3',
'hex_code' => 'FAFAF4',
'cluster_color' => 'bianca',
'g' => '250',
'color_category' => '',
'r' => '250',
'v' => '98'
};
***IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
'cluster_number' => 4,
'h' => undef,
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'b' => undef,
'pixels' => '0%',
's' => undef,
'g' => undef,
'r' => undef,
'color_category' => '',
'v' => undef,
'hex_code' => undef,
'cluster_color' => 'twilight blue'
};
***IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
'b' => '240',
'pixels' => '5%',
's' => '4',
'h' => '65',
'cluster_number' => 5,
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'r' => '248',
'color_category' => '',
'v' => '97',
'g' => '249',
'hex_code' => 'F8F9F0',
'cluster_color' => 'bianca'
};
***IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
's' => '4',
'pixels' => '5%',
'b' => '240',
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'h' => '63',
'cluster_number' => 6,
'v' => '98',
'color_category' => '',
'r' => '249',
'g' => '249',
'hex_code' => 'F9F9F0',
'cluster_color' => 'bianca'
};
...
I found the cause of the problem.
The problem is in these lines:
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
On output lines where there are underscores in $cluster_color,
the second line alters the capture buffer, hence resetting the other capture groups.
The solution was to assign each capture group to its own variable, and add the 3 lines above after all the capture groups have been assigned to their respective variables.
So, assigning directly to the hash was a bad idea :-)
I also improved my regex to be more precise, although this was not the cause of the problem, but simply the ... =~ tr/_/\ /; that altered the capture buffer.
Bellow is my working code:
use strict;
use warnings;
use Data::Dumper;
use File::HomeDir;
use File::Temp ();
use File::Spec;
open my $output, '<', '/tmp/cs8.txt';
my #color_clusters;
my $image_number = 0;
my $image_name = undef;
my $last_image_name = '';
my $line = undef;
for $line (<$output>) {
chomp($line);
print "***${line}***\n";
# image (file) name -> ^\S+
# cluster number -> cluster \d,
# HEX -> hex #([0-9A-Z])6,
# Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
# Color Category -> (empty at the moment)
# Pixels -> f 0.\d+
# R, G, B -> rgb \d+ \d+ \d+
# H, S, V -> hsv \d+ \d+ \d+
$line =~ m/
^(?<IMAGE_NAME>.+) # image file name
\ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
\ f\ (?<PIXELS>[0-9]+\.?[0-9]*) # percent of pixels belonging to this cluster
\ rgb\ (?<RED>[0-9]{1,3})\ (?<GREEN>[0-9]{1,3})\ (?<BLUE>[0-9]{1,3})
\ hex\ \#(?<HEX>[0-9A-F]{6}) # Hexadecimal notation used in HTML
\ hsv\ (?<HUE>[0-9]{1,3})\ (?<SATURATION>[0-9]{1,3})\ (?<VALUE>[0-9]{1,3})
\ .+\ (?<CLUSTER_COLOR>\w+)\[
/x;
$image_name = $+{IMAGE_NAME};
if ($last_image_name ne $image_name) {
$last_image_name = $image_name;
$image_number++;
}
my $cluster_number = $+{CLUST_NUM};
if (defined $cluster_number) {
$cluster_number = 1 + $cluster_number; # convert to 1 based
}
my $pixels = $+{PIXELS};
if (defined $pixels) {
$pixels = ''. int((0 + $pixels) * 100). '%'
}
my $cluster_color = $+{CLUSTER_COLOR};
my $hex = $+{HEX};
my ($red, $green, $blue) = ($+{RED}, $+{GREEN}, $+{BLUE});
my ($hue, $saturation, $value) = ($+{HUE}, $+{SATURATION}, $+{VALUE});
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
my %color_cluster = (
image_num => $image_number,
image_name => $image_name,
cluster_number => $cluster_number,
hex_code => $hex,
cluster_color => $cluster_color,
color_category => '', # currently empty, will be calculated from HSV values
pixels => $pixels, # percent of pixels within this cluster
r => $red, g => $green, b => $blue,
h => $hue, s => $saturation, v => $value,
);
push #color_clusters, %color_cluster;
print Dumper \%color_cluster;
} # end of for loop
Many thanks #Yunnosch for your comments, they gave me hints regarding the right direction.
Cheers,
Asaf
I have a string like "0189", for which I need to generate all subsequences, but the ordering of the individual characters must be kept, i.e, here 9 should not come before 0, 1 or 8. Ex: 0, 018, 01, 09, 0189, 18, 19, 019, etc.
Another example is "10292" for which subsequences would be: 1, 10, 02, 02, 09, 29, 92, etc. As you might have noticed '02' two times, since '2' comes twice in the given string. But again things like: 21, 01, 91 are invalid as order is to be maintained.
Any algorithm or psuedo code, which could be implemented in C/C++ would be appreciated!
Try a recursive approach:
the set of subsequences can be split into the ones containing the first character and the ones not containing it
the ones containing the first character are build by appending that character to the subsequences which don't contain it (+ the subsequence which contains only the first character itself)
I'd recommend using the natural correspondence between the power set of a sequence and the set of binary numbers from 0 to 2^n - 1, where n is the length of the sequence.
In your case, n is 4, so consider 0 = 0000 .. 15 = 1111; where there is a 1 in the binary expression include the corresponding item from the sequence. To implement this you'll need bitshift and binary operations:
for (int i = 0; i < (1 << n); ++i) {
std::string item;
for (j = 0; j < n; ++j) {
if (i & (1 << j)) {
item += sequence[j];
}
}
result.push_back(item);
}
Also consider how you'd handle sequences longer than can be covered by an int (hint: consider overflow and arithmetic carry).
In Python:
In [29]: def subseq(s): return ' '.join((' '.join(''.join(x) for x in combs(s,n)) for n in range(1, len(s)+1)))
In [30]: subseq("0189")
Out[30]: '0 1 8 9 01 08 09 18 19 89 018 019 089 189 0189'
In [31]: subseq("10292")
Out[31]: '1 0 2 9 2 10 12 19 12 02 09 02 29 22 92 102 109 102 129 122 192 029 022 092 292 1029 1022 1092 1292 0292 10292'
In [32]:
__author__ = 'Robert'
from itertools import combinations
g = combinations(range(4), r=2)
print(list(g)) #[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
def solve(string_):
n = len(string_)
for repeat in range(1, len(string_) + 1):
combos = combinations(range(len(string_)), r=repeat)
for combo in combos:
sub_string = "".join(string_[i] for i in combo)
yield sub_string
print(list(solve('0189'))) #['0', '1', '8', '9', '01', '08', '09', '18', '19', '89', '018', '019', '089', '189']
#using recursion
def solve2(string_, i):
if i >= len(string_):
return [""] #no sub_strings beyond length of string_
character_i = string_[i]
all_sub_strings = solve2(string_, i + 1)
all_sub_strings += [character_i + sub_string for sub_string in all_sub_strings]
return all_sub_strings
print(solve2('0189', 0)) #['', '9', '8', '89', '1', '19', '18', '189', '0', '09', '08', '089', '01', '019', '018', '0189']