Related
I have data like this (file is called list-in.dat)
a ; b ; c ; i
d
e ; f ; a ; b
g ; h ; i
and I want a list like this (output file list-out.dat) with all items, in alphabetically order (case insensitive) and each unique item only once.
a
b
c
d
e
f
g
h
i
My attempt is:
awk -F " ; " ' BEGIN { OFS="\n" ; } {for(i=0; i<=NF; i++) print $i} ' file-in.dat | uniq | sort -uf > file-out.dat
But I end up with all antries except those lines which has only one item:
a
b
c
e
f
g
h
i
How can I get all (unique, sorted) items no matter how many items are in one line / if the field separator is missing?
Using gnu-awk:
awk -F '[[:blank:]]*;[[:blank:]]*' '{
for (i=1; i<=NF; i++) uniq[$i]
}
END {
PROCINFO["sorted_in"]="#ind_str_asc"
for (i in uniq)
print i
}' file
a
b
c
d
e
f
g
h
i
For non-gnu awk use:
awk -F '[[:blank:]]*;[[:blank:]]*' '{for (i=1; i<=NF; i++) uniq[$i]}
END{for (i in uniq) print i}' file | sort
awk -F' ; ' -v OFS='\n' '{$1=$1} 1' ip.txt | sort -fu
-F' ; ' sets space followed by ; followed by space as field separator
-v OFS='\n' sets newline as output field separator
{$1=$1} change $0 as per new OFS
1 print $0
sort -fu sort uniquely ignoring case in alphabetic order
Could you please try following, awk + sort solution, written and tested with shown samples. In case you want to use ignorecase then add IGNORECASE=1 in awk code.
awk '
BEGIN{
FS=" ; "
}
{
for(i=1;i<=NF;i++){
if(!a[$i]++){ print $i }
}
}
' Input_file | sort
Explanation: Adding detailed explanation for above.
awk ' ##Starting awk program from here.
BEGIN{ ##Starting BEGIN section of this program from here.
FS=" ; " ##Setting field separator as space semi-colon space here.
}
{
for(i=1;i<=NF;i++){ ##Starting a for loop till NF here for each line.
if(!a[$i]++){ print $i } ##Checking condition if current field is NOT present in array a then printing that field value here.
}
}
' Input_file | sort ##Mentioning Input_file name here and passing it to sort as Input to sort the data.
I have the following data:
"b":1.14105,"a":1.14106,"x":48,"t":1594771200000
"a":1.141,"b":1.14099,"x":48,"t":1594771206000
...
I am trying to display data in a given order and only for three fields. As the fields order is not guaranteed, I need to read the "tag" for each comma separated column for each line.
I have tried to solve this task using awk:
awk -F',' '
{
for(i=1; i<=$NF; i++) {
if(index($i,"\"a\":")!=0) a=$i;
if(index($i,"\"b\":")!=0) b=$i;
if(index($i,"\"t\":")!=0) t=$i;
}
printf("%s,%s,%s\n",a,b,t);
}
'
But I get:
,,
,,
...
In the above data sample, I would expect:
"a":1.14106,"b":1.14105,"t":1594771200000
"a":1.141,"b":1.14099,"t":1594771206000
...
Note: I am using the awk shipped with FreeBSD
$ cat tst.awk
BEGIN {
FS = "[,:]"
OFS = ","
}
{
for (i=1; i<NF; i+=2) {
f[$i] = $(i+1)
}
print p("a"), p("b"), p("t")
}
function p(tag, t) {
t = "\"" tag "\""
return t ":" f[t]
}
.
$ awk -f tst.awk file
"a":1.14106,"b":1.14105,"t":1594771200000
"a":1.141,"b":1.14099,"t":1594771206000
With awk and an array:
awk -F '[:,]' '{for(i=1; i<=NF; i=i+2){a[$i]=$(i+1)}; print "\"a\":" a["\"a\""] ",\"b\":" a["\"b\""] ",\"t\":" a["\"t\""]}' file
or
awk -F '[":,]' '{for(i=2; i<=NF; i=i+4){a[$i]=$(i+2)}; print "\"a\":" a["a"] ",\"b\":" a["b"] ",\"t\":" a["t"]}' file
Output:
"a":1.14106,"b":1.14105,"t":1594771200000
"a":1.141,"b":1.14099,"t":1594771206000
similar awk where you can specify the fields and order.
$ awk -F[:,] -v fields='"a","b","t"' 'BEGIN{n=split(fields,f)}
{for(i=1;i<NF;i+=2) map[$i]=$(i+1);
for(i=1;i<=n;i++) printf "%s", f[i]":"map[f[i]] (i==n?ORS:",")}' file
"a":1.14106,"b":1.14105,"t":1594771200000
"a":1.141,"b":1.14099,"t":1594771206000
file.txt
fruits:banana,apple,grape,limon,orange,tomate,
fruits:apple,limon,
fruits:banana,grape,limon,
fruits:orange,tomate,grape,
fruits:banana,
fruits:apple,
fruits:banana,apple,
I need to replace everything that is different than "banana" for FRUIT, and get output like this:
fruits:banana,FRUIT,FRUIT,FRUIT,FRUIT,FRUIT,
fruits:FRUIT,FRUIT,
fruits:banana,FRUIT,FRUIT,
fruits:FRUIT,FRUIT,FRUIT,
fruits:banana,
fruits:FRUIT,
fruits:FRUIT,apple,
I tried using awk, but I can only replace the fields of specific strings.
Example replace all strings "apple" by fruit2, or all strings "apple" by fruit2 and all strings "tomate"or "orange" by fruit3
awk -F":" '{ gsub(/apple/,"FRUIT2",$2); print }' OFS="," file.tx
or
awk -F":" '{ gsub(/apple/,"FRUIT2",$2);;gsub(/tomate|orange/,"FRUIT3",$2); print }' OFS="," file.txt |sed "s/./:/7"
fruits:banana,FRUIT2,grape,limon,FRUIT3,FRUIT3,
fruits:FRUIT2,limon,
fruits:banana,grape,limon,
fruits:FRUIT3,FRUIT3,grape,
fruits:banana,
fruits:FRUIT2,
fruits:banana,FRUIT2
but I really need is to replace everything that is different from that for any string, ex: fruit4
How to generate output like this?
fruits:FRUIT4,FRUIT2,FRUIT4,FRUIT4,FRUIT3,FRUIT3,
fruits:FRUIT2,FRUIT4,
fruits:FRUIT4,FRUIT4,FRUIT4,
fruits:FRUIT3,FRUIT3,FRUIT4,
fruits:FRUIT4,
fruits:FRUIT2,
fruits:FRUIT4,FRUIT2
This awk should work:
awk -F, -v OFS=, '{
for (i=1; i<=NF; i++)
if ($i !~ /(^|:)banana$/)
sub(/[^:]+$/, "FRUIT", $i)
} 1' file
Output:
fruits:banana,FRUIT,FRUIT,FRUIT,FRUIT,FRUIT,
fruits:FRUIT,FRUIT,
fruits:banana,FRUIT,FRUIT,
fruits:FRUIT,FRUIT,FRUIT,
fruits:banana,
fruits:FRUIT,
fruits:banana,FRUIT,
To make the process automated, you can do
awk -F '[:,]' -v OFS=, '
{
for (i=2; i<=NF; i++)
if ($i)
if (seen[$i])
$i = seen[$i]
else
$i = seen[$i] = "FRUIT" ++n
sub(OFS, ":")
print
}
END {
print "map:"
for (key in seen)
print key "\t" seen[key]
}
' file
fruits:FRUIT1,FRUIT2,FRUIT3,FRUIT4,FRUIT5,FRUIT6,
fruits:FRUIT2,FRUIT4,
fruits:FRUIT1,FRUIT3,FRUIT4,
fruits:FRUIT5,FRUIT6,FRUIT3,
fruits:FRUIT1,
fruits:FRUIT2,
fruits:FRUIT1,FRUIT2,
map:
orange FRUIT5
tomate FRUIT6
apple FRUIT2
limon FRUIT4
banana FRUIT1
grape FRUIT3
If you'd like some flexibility in being able to specify your mapping of old to new names on the command line:
$ cat tst.awk
BEGIN {
FS="[:,]"; OFS=","
split(map,t)
for (i=1; i in t; i+=2) {
m[t[i]] = t[i+1]
}
}
{
printf "%s:", $1
for (i=2;i<=NF;i++) {
if ($i in m ) { $i = m[$i] }
else if ("*" in m) { $i = m["*"] }
printf "%s%s", $i, (i<NF?OFS:ORS)
}
}
.
$ awk -v map='apple,FRUIT2,tomate,FRUIT3,*,FRUIT4' -f tst.awk file
fruits:FRUIT4,FRUIT2,FRUIT4,FRUIT4,FRUIT4,FRUIT3,FRUIT4
fruits:FRUIT2,FRUIT4,FRUIT4
fruits:FRUIT4,FRUIT4,FRUIT4,FRUIT4
fruits:FRUIT4,FRUIT3,FRUIT4,FRUIT4
fruits:FRUIT4,FRUIT4
fruits:FRUIT2,FRUIT4
fruits:FRUIT4,FRUIT2,FRUIT4
$ awk -v map='apple,BAZINGA,*,VEGGIE' -f tst.awk file
fruits:VEGGIE,BAZINGA,VEGGIE,VEGGIE,VEGGIE,VEGGIE,VEGGIE
fruits:BAZINGA,VEGGIE,VEGGIE
fruits:VEGGIE,VEGGIE,VEGGIE,VEGGIE
fruits:VEGGIE,VEGGIE,VEGGIE,VEGGIE
fruits:VEGGIE,VEGGIE
fruits:BAZINGA,VEGGIE
fruits:VEGGIE,BAZINGA,VEGGIE
$ awk -v map='apple,FRUIT2,tomate,FRUIT3' -f tst.awk file
fruits:banana,FRUIT2,grape,limon,orange,FRUIT3,
fruits:FRUIT2,limon,
fruits:banana,grape,limon,
fruits:orange,FRUIT3,grape,
fruits:banana,
fruits:FRUIT2,
fruits:banana,FRUIT2,
I wanna extract ip address in a file,
each line of the file is like:
T 218.241.107.98 167.232.255.245 7 2719 1378473670 N 0 0 0 G 0 I 218.241.107.97,0.146,1 218.241.98.45,0.239,1 192.168.1.253,0.182,1 159.226.253.77,0.210,1 159.226.253.54,0.676,1 159.226.254.254,39.287,1 203.192.137.173,39.335,1 203.192.134.69,50.128,1 61.14.157.141,42.917,1 202.147.61.193,188.165,1 38.104.84.41,201.100,1 154.54.30.193,194.939,1 154.54.41.221,194.915,1 154.54.5.65,237.396,1 154.54.2.81,251.547,1 154.54.24.153,260.946,1 154.54.26.126,256.046,1 154.54.10.14,245.145,1 193.251.240.113,241.663,1 q q q 57.69.31.22,283.784,1;57.69.31.22,284.763,1
But my awk script doesn't work
#!/usr/bin/awk -f
BEGIN {
FS = "[, \t;]"
}
{
for(i = 4; i <= NF; i++)
{
if ($1 == "#")
continue
if ($i ~ /(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}/)
printf $i"\t"
if (i == NF)
printf "\n"
}
}
Can anyone figure out what's wrong?
Any help will be really appreaciated, thanks in advance.
PS: there is no output but a new line character
Try this awk
awk -F"[, \t;]+" '!/^#/ {for (i=1;i<NF;i++) if ($i ~ /(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}/) printf "%s\t",$i;print ""}' file
218.241.107.98 167.232.255.245 218.241.107.97 218.241.98.45 192.168.1.253 159.226.253.77 159.226.253.54 159.226.254.254 203.192.137.173 203.192.134.69 61.14.157.141 202.147.61.193 38.104.84.41 154.54.30.193 154.54.41.221 154.54.5.65 154.54.2.81 154.54.24.153 154.54.26.126 154.54.10.14 193.251.240.113 57.69.31.22 57.69.31.22
This !/^#/ makes it only prints line not starting with #
I have an output file that I am trying to process into a formatted csv for our audit team.
I thought I had this mastered until I stumbled across bad data within the output. As such, I want to be able to handle this using awk.
MY OUTPUT FILE EXAMPLE
Enter password ==>
o=hoster
ou=people,o=hoster
ou=components,o=hoster
ou=websphere,ou=components,o=hoster
cn=joe-bloggs,ou=appserver,ou=components,o=hoster
cn=joe
sn=bloggs
cn=S01234565
uid=bloggsj
cn=john-blain,ou=appserver,ou=components,o=hoster
cn=john
uid=blainj
sn=blain
cn=andy-peters,ou=appserver,ou=components,o=hoster
cn=andy
sn=peters
uid=petersa
cn=E09876543
THE OUTPUT I WANT AFTER PROCESSING
joe,bloggs,s01234565;uid=bloggsj,cn=joe-bloggs,ou=appserver,ou=components,o=hoster
john,blain;uid=blainj;cn=john-blain,ou=appserver,ou=components,o=hoster
andy,peters,E09876543;uid=E09876543;cn=andy-peters,ou=appserver,ou=components,o=hoster
As you can see:
we always have a cn= variable that contains o=hoster
uid can have any value
we may have multiple cn= variables without o=hoster
I have acheived the following:
cat output | awk '!/^o.*/ && !/^Enter.*/{print}' | awk '{getline a; getline b; getline c; getline d; print $0,a,b,c,d}' | awk -v srch1="cn=" -v repl1="" -v srch2="sn=" -v repl2="" '{ sub(srch1,repl1,$2); sub(srch2,repl2,$3); print $4";"$2" "$3";"$1 }'
Any pointers or guidance is greatly appreciated using awk. Or should I give up and just use the age old long winded method a large looping script to process the file?
You may try following awk code
$ cat file
Enter password ==>
o=hoster
ou=people,o=hoster
ou=components,o=hoster
ou=websphere,ou=components,o=hoster
cn=joe-bloggs,ou=appserver,ou=components,o=hoster
cn=joe
sn=bloggs
cn=S01234565
uid=bloggsj
cn=john-blain,ou=appserver,ou=components,o=hoster
cn=john
uid=blainj
sn=blain
cn=andy-peters,ou=appserver,ou=components,o=hoster
cn=andy
sn=peters
uid=petersa
cn=E09876543
Awk Code :
awk '
function out(){
print s,u,last
i=0; s=""
}
/^cn/,!NF{
++i
last = i == 1 ? $0 : last
s = i>1 && !/uid/ && NF ? s ? s "," $NF : $NF : s
u = /uid/ ? $0 : u
}
i && !NF{
out()
}
END{
out()
}
' FS="=" OFS=";" file
Resulting
joe,bloggs,S01234565;uid=bloggsj;cn=joe-bloggs,ou=appserver,ou=components,o=hoster
john,blain;uid=blainj;cn=john-blain,ou=appserver,ou=components,o=hoster
andy,peters,E09876543;uid=petersa;cn=andy-peters,ou=appserver,ou=components,o=hoster
If you want to try this on a Solaris/SunOS system, change awk to /usr/xpg4/bin/awk , /usr/xpg6/bin/awk , or nawk
This awk script works for your sample and produces the sample output:
BEGIN { delete cn[0]; OFS = ";" }
function print_info() {
if (length(cn)) {
names = cn[1] "," sn
for (i=2; i <= length(cn); ++i) names = names "," cn[i]
print names, uid, dn
delete cn
}
}
/^cn=/ {
if ($0 ~ /o=hoster/) dn = $0
else {
cn[length(cn)+1] = substr($0, index($0, "=") + 1)
uid = $0; sub("cn", "uid", uid)
}
}
/^sn=/ { sn = substr($0, index($0, "=") + 1) }
/^uid=/ { uid = $0 }
/^$/ { print_info() }
END { print_info() }
This should help you get started.
awk '$1 ~ /^cn/ {
for (i = 2; i <= NF; i++) {
if ($i ~ /^uid/) {
u = $i
continue
}
sub(/^[^=]*=/, x, $i)
r = length(r) ? r OFS $i : $i
}
print r, u, $1
r = u = x
}' OFS=, RS= infile
I assume that there is an error in your sample output: in the 3d record the uid should be petersa and not E09876543.
You might want look at some of the "already been there and done that" solutions to accomplish the task.
Apache Directory Studio for example, will do the LDAP query and save the file in CSV or XLS format.
-jim