Python struct.error: unpack requires a string argument of length 2 - python-2.7

I have written some data using C++ in byte format. I am now trying to read that data again using Python, but I run into an error;
Traceback (most recent call last):
File "binary-reader.py", line 61, in <module>
interaction_types.append(struct.unpack('<H',fp.read(2))[0]);
struct.error: unpack requires a string argument of length 2
I don't really understand since it looks like I am giving a string of length 2, right? Furthermore, I do the same thing at line 32
There is another question like mine but it is without an answer is targeted for Python 3.
Here is my code
import sys
import struct
import os
print "Arguments : "
print str(sys.argv)
#N = #isects
# 2 2 3*4 2 3*4*N 4N 4N 3*4N 2N 2N
#imageX,imageY,throughput,#isects,isect_positions,primitive_ids,shape_ids,spectra,interaction_types,light_ids
file_path = str(sys.argv[1]);
byte_count = 0;
line_number = 1;
fp = open(file_path, "rb");
output = open('output.txt',"w");
file_size = os.path.getsize(file_path)
print "(input) file size = " + str(file_size);
while byte_count < file_size:
print "Line number = " + str(line_number)
print "Current byte count = " + str(byte_count)
# Do stuff with byte.
x = struct.unpack('<H', fp.read(2))[0]
y = struct.unpack('<H', fp.read(2))[0]
throughputOne = struct.unpack('<f', fp.read(4))[0]
throughputTwo = struct.unpack('<f', fp.read(4))[0]
throughputThree = struct.unpack('<f', fp.read(4))[0]
nrIsects = struct.unpack('<H',fp.read(2))[0]
# print "x = " + str(x)
# print "y = " + str(y)
# print "throughputOne = " + str(throughputOne)
# print "throughputTwo = " + str(throughputTwo)
# print "throughputThree = " + str(throughputThree)
print "nrIsects = " + str(nrIsects)
isect_positions = []
for i in range(nrIsects*3):
value = struct.unpack('<f',fp.read(4))[0]
isect_positions.append(value);
primitive_ids = []
for i in range(nrIsects):
value = struct.unpack('<I',fp.read(4))[0]
primitive_ids.append(value);
shape_ids = []
for i in range(nrIsects):
shape_ids.append(struct.unpack('<I',fp.read(4))[0]);
spectra = []
for i in range(nrIsects*3):
spectra.append(struct.unpack('<f',fp.read(4))[0]);
interaction_types = []
for i in range(nrIsects):
interaction_types.append(struct.unpack('<H',fp.read(2))[0]);
light_ids = []
for i in range(nrIsects):
light_ids.append(struct.unpack('<H',fp.read(2))[0]);
output_vars = [x,y,throughputOne,throughputTwo,throughputThree,nrIsects]
line_string = ""
for i in range(len(output_vars)):
output.write(str(output_vars[i]))
line_string += str(output_vars[i])
if i is not len(output_vars) - 1:
output.write(',')
line_string += ','
print line_string
#Update counters
byte_count += 18 + 36*nrIsects
line_number+=1
# raw_input('Press any key to continue.');
# print byte
And here is a link to a input file to use. You can run the code by passing a commandline argument specifying the path of the binary file. I have also written the code in ASCII, which reads
0,0,[0.127076,0.127076,0.127076],1,{[0.144978,-0.294863,2.991749]},{3917},{3916},{[1.375603,1.375603,1.375603]},{5},{0}
https://www.dropbox.com/s/tu1anqo5k0ygtd6/writetest.bin
EDIT: The layout of my file can be found as a comment in the code

50 bytes have already been read before the fp.read(2) that raises the error. Thus, fp.read(2) returns an empty string, and struct.unpack raises an exception:
In [83]: 2+2+4+4+4+2+12+4+4+12
Out[83]: 50
x = struct.unpack('<H', fp.read(2))[0] # 2 bytes read
y = struct.unpack('<H', fp.read(2))[0] # 2 bytes
throughputOne = struct.unpack('<f', fp.read(4))[0] # 4 bytes
throughputTwo = struct.unpack('<f', fp.read(4))[0] # 4 bytes
throughputThree = struct.unpack('<f', fp.read(4))[0] # 4 bytes
nrIsects = struct.unpack('<H',fp.read(2))[0] # 2 bytes
print "nrIsects = " + str(nrIsects)
isect_positions = []
for i in range(nrIsects*3):
value = struct.unpack('<f',fp.read(4))[0] # 12 bytes
isect_positions.append(value)
primitive_ids = []
for i in range(nrIsects):
value = struct.unpack('<I',fp.read(4))[0] # 4 bytes
primitive_ids.append(value)
shape_ids = []
for i in range(nrIsects):
shape_ids.append(struct.unpack('<I',fp.read(4))[0]) # 4 bytes
spectra = []
for i in range(nrIsects*3):
spectra.append(struct.unpack('<f',fp.read(4))[0]) # 12 bytes
interaction_types = []
for i in range(nrIsects):
interaction_types.append(struct.unpack('<H', fp.read(2))[0]) # error!

Related

Python coding questions on bitwise shift and logical operations in packet sniffer

import socket
import sys
import struct
import re
#get data
def receiveData(s):
data=''
try:
data=s.recvfrom(65565)
except timeout:
data = ''
except:
print "An error occurred"
sys.exc_info()
return data[0]
#get Type Of Service - 8bits
def getTOS(data):
precedence = {0:'Routine',1:'Priority', 2:'Immediate', 3:'Flash', 4:'Flash Override', 5:'CRITIC/ECP', 6:'Internetwork control', 7:'Network Control'}
delay = {0:'Normal Delay', 1:'Low Delay'}
throughput = {0:'Normal Throughput', 1:'High Throughput'}
reliability = {0:'Normal Reliability', 1:'High Reliability'}
monetary = {0:'Normal Cost', 1:'Minimize monetary cost'}
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ tabs + delay[D] + tabs + throughput[T] + tabs + reliability[R] + tabs + monetary[M]
return TOS
def getFlags(data):
flagR = {0:'Cleared to 0'}
flagDF = {0:'Fragment if necessary', 1:'Do not Fragment'}
flagMF = {0:'This is the last Fragment', 1:'More fragments to follow this fragment'}
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
tabs = '\n\t\t\t'
flags = flagR[R] + tabs + flagDF[DF] + tabs + flagMF[MF]
return flags
def getProtocol(protocolNr):
protocolFile = open('/root/Desktop/protocol.txt', 'r')
protocolData = protocolFile.read()
protocol = re.findall(r'\n' + str(protocolNr) + ' (?:.)+\n', protocolData)
if protocol:
protocol = protocol[0]
protocol = protocol.replace('\n', '')
protocol = protocol.replace(str(protocolNr), '')
protocol = protocol.lstrip()
return protocol
else:
return "No such protocol found"
s=socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_TCP)
data = receiveData(s)
unpackedData = struct.unpack('!BBHHHBBH4s4s', data[:20])
version_IHL = unpackedData[0]
version = version_IHL >> 4
IHL = version_IHL & 0xF
TOS = unpackedData[1]
totalLength = unpackedData[2]
ID = unpackedData[3]
flags = unpackedData[4]
fragmentOffset = unpackedData[4] & 0x1FFF
TTL = unpackedData[5]
protocolNr = unpackedData[6]
checksum = unpackedData[7]
sourceAddress = socket.inet_ntoa(unpackedData[8])
destinationAddress = socket.inet_ntoa(unpackedData[9])
print " An IP packed with the size %i was captured :" %totalLength
print " Raw Data :" +data
print "\nParsed Data"
print " Version:\t\t" +str(version)
print " Header Length:\t\t" + str(IHL*4) + "bytes"
print " Type of Service:\t" + getTOS(TOS)
print " ID:\t\t\t" + str(hex(ID)) + " (" + str(ID) + ")"
print " Flags:\t\t\t" + getFlags(flags)
print " Fragment Offset:" + str(fragmentOffset)
print " TTL:\t\t\t" + str(TTL)
print " Protocol:\t\t\t" +getProtocol(protocolNr)
print " Checksum:\t\t\t" +str(checksum)
print " Source:\t\t\t" +sourceAddress
print " Destination:\t\t\t" +destinationAddress
print " Payload:\n" + data[20:]
Below I have extracted some code sections from above and kindly spend some time to answer the following.
(1)
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ ......
in the above section could you please explain on what basis D=data & 0x10 is used and right shifted 4 times and also why is precedence shifted 5 times to the right. My question is more like why 0x10 was specifically used to AND with data and then right shifted 4 times to get the Delay bit. I am seeking clarification on all other bits like T, R, M in the above section and the flag bits in the below section too.
(2)
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
In the above segment please explain on what basis is used R = data & 0x8000 and shift it 15 times to the right
(3)
fragmentOffset = unpackedData[4] & 0x1FFF
In the above line of code why specifically 0x1FFF was used to AND with unpackedData[4]
Credits to the source code goes to Ana Balica
https://www.youtube.com/watch?v=ghokDuCDcMY
import socket
import sys
import struct
import re
def receiveData(s):
data =''
try:
data , addr =s.recvfrom(655365)
except timout:
data =''
except:
print "An error hapend"
sys.exc_info()
return data
#get Type Of Service - 8bits
def getTOS(data):
precedence = {0:'Routine',1:'Priority', 2:'Immediate', 3:'Flash', 4:'Flash Override', 5:'CRITIC/ECP', 6:'Internetwork control', 7:'Network Control'}
delay = {0:'Normal Delay', 1:'Low Delay'}
throughput = {0:'Normal Throughput', 1:'High Throughput'}
reliability = {0:'Normal Reliability', 1:'High Reliability'}
monetary = {0:'Normal Cost', 1:'Minimize monetary cost'}
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ tabs + delay[D] + tabs + throughput[T] + tabs + reliability[R] + tabs + monetary[M]
return TOS
def getFlags(data):
flagR = {0:'Cleared to 0'}
flagDF = {0:'Fragment if necessary', 1:'Do not Fragment'}
flagMF = {0:'This is the last Fragment', 1:'More fragments to follow this fragment'}
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
tabs = '\n\t\t\t'
flags = flagR[R] + tabs + flagDF[DF] + tabs + flagMF[MF]
return flags
def getProtocol(protocolNr):
protocolFile = open('protocol.txt', 'r')
protocolData = protocolFile.read()
protocol = re.findall(r'\n' + str(protocolNr) + ' (?:.)+\n', protocolData)
if protocol:
protocol = protocol[0]
protocol = protocol.replace('\n', '')
protocol = protocol.replace(str(protocolNr), '')
protocol = protocol.lstrip()
return protocol
else:
return "No such protocol found"
def showdata(unpackedData):
version_IHL = unpackedData[0]
version = version_IHL >> 4
IHL = version_IHL & 0xF
TOS = unpackedData[1]
totalLength = unpackedData[2]
ID = unpackedData[3]
flags = unpackedData[4]
fragmentOffset = unpackedData[4] & 0x1FFF
TTL = unpackedData[5]
protocolNr = unpackedData[6]
checksum = unpackedData[7]
sourceAddress = socket.inet_ntoa(unpackedData[8])
destinationAddress = socket.inet_ntoa(unpackedData[9])
print " An IP packed with the size %i was captured :" %totalLength
print " Raw Data :" +data
print "\nParsed Data"
print " Version:\t\t" +str(version)
print " Header Length:\t\t" + str(IHL*4) + "bytes"
print " Type of Service:\t" + getTOS(TOS)
print " ID:\t\t\t" + str(hex(ID)) + " (" + str(ID) + ")"
print " Flags:\t\t\t" + getFlags(flags)
print " Fragment Offset:" + str(fragmentOffset)
print " TTL:\t\t\t" + str(TTL)
print " Protocol:\t\t\t" +getProtocol(protocolNr)
print " Checksum:\t\t\t" +str(checksum)
print " Source:\t\t\t" +sourceAddress
print " Destination:\t\t\t" +destinationAddress
print " Payload:\n" + data[20:]
# the public network interface
HOST = socket.gethostbyname(socket.gethostname())
# create a raw socket and bind it to the public interface
s = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_IP)
s.bind((HOST, 0))
# Include IP headers
s.setsockopt(socket.IPPROTO_IP, socket.IP_HDRINCL, 1)
# receive all packages
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_ON)
for i in range(100):
# receive a package
#print s.recvfrom(65565)
data=receiveData(s)
datastr = str(data)
if len(datastr) >19 :
unpackedData = struct.unpack('!BBHHHBBH4s4s',datastr[:20])
#print unpackedData
showdata(unpackedData)
else:
print data
# disabled promiscuous mode
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_OFF)
import socket
import sys
import struct
import re
def receiveData(s):
data =''
try:
data , addr =s.recvfrom(655365)
except timout:
data =''
except:
print "An error hapend"
sys.exc_info()
return data
#get Type Of Service - 8bits
def getTOS(data):
precedence = {0:'Routine',1:'Priority', 2:'Immediate', 3:'Flash', 4:'Flash Override', 5:'CRITIC/ECP', 6:'Internetwork control', 7:'Network Control'}
delay = {0:'Normal Delay', 1:'Low Delay'}
throughput = {0:'Normal Throughput', 1:'High Throughput'}
reliability = {0:'Normal Reliability', 1:'High Reliability'}
monetary = {0:'Normal Cost', 1:'Minimize monetary cost'}
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ tabs + delay[D] + tabs + throughput[T] + tabs + reliability[R] + tabs + monetary[M]
return TOS
def getFlags(data):
flagR = {0:'Cleared to 0'}
flagDF = {0:'Fragment if necessary', 1:'Do not Fragment'}
flagMF = {0:'This is the last Fragment', 1:'More fragments to follow this fragment'}
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
tabs = '\n\t\t\t'
flags = flagR[R] + tabs + flagDF[DF] + tabs + flagMF[MF]
return flags
def getProtocol(protocolNr):
protocolFile = open('protocol.txt', 'r')
protocolData = protocolFile.read()
protocol = re.findall(r'\n' + str(protocolNr) + ' (?:.)+\n', protocolData)
if protocol:
protocol = protocol[0]
protocol = protocol.replace('\n', '')
protocol = protocol.replace(str(protocolNr), '')
protocol = protocol.lstrip()
return protocol
else:
return "No such protocol found"
def showdata(unpackedData):
version_IHL = unpackedData[0]
version = version_IHL >> 4
IHL = version_IHL & 0xF
TOS = unpackedData[1]
totalLength = unpackedData[2]
ID = unpackedData[3]
flags = unpackedData[4]
fragmentOffset = unpackedData[4] & 0x1FFF
TTL = unpackedData[5]
protocolNr = unpackedData[6]
checksum = unpackedData[7]
sourceAddress = socket.inet_ntoa(unpackedData[8])
destinationAddress = socket.inet_ntoa(unpackedData[9])
print " An IP packed with the size %i was captured :" %totalLength
print " Raw Data :" +data
print "\nParsed Data"
print " Version:\t\t" +str(version)
print " Header Length:\t\t" + str(IHL*4) + "bytes"
print " Type of Service:\t" + getTOS(TOS)
print " ID:\t\t\t" + str(hex(ID)) + " (" + str(ID) + ")"
print " Flags:\t\t\t" + getFlags(flags)
print " Fragment Offset:" + str(fragmentOffset)
print " TTL:\t\t\t" + str(TTL)
print " Protocol:\t\t\t" +getProtocol(protocolNr)
print " Checksum:\t\t\t" +str(checksum)
print " Source:\t\t\t" +sourceAddress
print " Destination:\t\t\t" +destinationAddress
print " Payload:\n" + data[20:]
# the public network interface
HOST = socket.gethostbyname(socket.gethostname())
# create a raw socket and bind it to the public interface
s = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_IP)
s.bind((HOST, 0))
# Include IP headers
s.setsockopt(socket.IPPROTO_IP, socket.IP_HDRINCL, 1)
# receive all packages
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_ON)
for i in range(100):
# receive a package
#print s.recvfrom(65565)
data=receiveData(s)
datastr = str(data)
if len(datastr) >19 :
unpackedData = struct.unpack('!BBHHHBBH4s4s',datastr[:20])
#print unpackedData
showdata(unpackedData)
else:
print data
# disabled promiscuous mode
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_OFF)

Python Value error

I'm getting an error when trying to split a piped output into python.
Error is need more than 3 values to unpack although I'm using 8 values
import subprocess, sys
from datetime import datetime
from time import sleep as sleep
multimon_ng = subprocess.Popen("multimon-ng -a FLEX -t wav flex.wav",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
while True:
nextline = multimon_ng.stdout.readline()
flex, mdate, mtime, bitrate, other, capcode, o2, msg = nextline.split(" ", 7) # error here
if nextline is " ":
print "napping"
else:
print mdate + " " + mtime + " " + capcode + " " + msg
multimon_ng.poll()
sys.stdout.flush()
any help would be great
3 in error message indicated length of iterable in right hand side argument.
Minimal wrong examples:
a, = [] # ValueError: need more than 0 values to unpack
a, b = [1] # ValueError: need more than 1 value to unpack
a, b, c = [1, 2] # ValueError: need more than 2 values to unpack
# etc ...
Minimal correct examples:
a, = [1]
a, b = [1, 2]
a, b, c = [1, 2, 3]
Smallest change to fix and expose issue would be to wrap unpacking iterable in try-except block.
while True:
nextline = multimon_ng.stdout.readline()
if not nextline:
print "napping"
else:
try:
flex, mdate, mtime, bitrate, other, capcode, o2, msg = nextline.split(" ", 7)
except ValueError:
print "invalid line", nextline
else:
print mdate + " " + mtime + " " + capcode + " " + msg
multimon_ng.poll()
sys.stdout.flush()
As you can see, I also moved check for empty line before unpacking. If line is empty, unpacking would also fail.

Python, WindowsError: [Error 32], file being used by another process

I'm writing a small programm that's supposed to loop through a folder of msg files (i.e. MS Outlook emails) and prefix a short string to their file names. I keep running into WindowsError: [Error 32] (The process cannot access the file because it is being used by another process) on line 33 (os.rename(filenameOLD, filenameNEW)). Any idea why?
import win32com.client
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
import os
path = 'C:\Users\MyName\Desktop\SomeFile\\'
msgFiles = os.listdir(path) # returns list
count = 0
for msgFile in msgFiles:
count = count + 1
msg = outlook.OpenSharedItem(path + msgFile)
date = str(msg.SentOn)
#Extract YYYY, MM, DD, HHMMSS from .msg sent date
YYYY = str(20)+date.split("/")[2][:2]
MM = date.split("/")[1]
DD = date.split("/")[0]
HHMMSS = "".join(date.split()[1].split(":")) + "Hrs"
#Reformat date to valid file name
filenamePrefix = YYYY + DD + MM + " " + HHMMSS + " "
#generate new file name
filenameOLD = path + msgFile
filenameNEW = path + filenamePrefix + msgFile
#rename file
os.rename(filenameOLD, filenameNEW)
print count, "files renamed"
You've opened the message without closing it. Instead do:
# ...
for msgFile in msgFiles:
count = count + 1
msg = outlook.OpenSharedItem(path + msgFile)
date = str(msg.SentOn)
del msg
# ...

chunk of data into fixed lengths chunks and then add a space and again add them all as a string

I have got hex values as a85b080040010000. I want it to be as a8 5b 08 00 40 01 00 00. I have done it by using below code. But I have to work with very large data. So I want computed time to be very low.
import binascii
import re
filename = 'calc.exe'
with open(filename, 'rb') as f:
content = f.readline()
text = binascii.hexlify(content)
text1 = binascii.unhexlify(text)
length1 = 32
length2 = 16
list = re.findall('.{%d}' % length1, text)
list1 = re.findall('.{%d}' % length2, text1)
d = []
for i in range (0, len(list), 1):
temp = ""
l = re.findall('.{%d}' % length2, list[i])
s = l[0]
t = iter(s)
temp += str(' '.join(a+b for a,b in zip(t, t)))
temp += " "
s = l[1]
t = iter(s)
temp += str(' '.join(a+b for a,b in zip(t, t)))
temp += " | " + list1[i]
print temp
You can simply do
x="a85b080040010000"
print re.sub(r"(.{2})",r"\1 ",x)
or
x="a85b080040010000"
print " ".join([i for i in re.split(r"(.{2})",x) if i])

string comparision inside a file in python

I need to write a script in python for finding the output of line 7 which is "y" if my first line is true (6c00ff00 = 1). I am able capture all these values in a file (say "xyz") but I am unable to compare in the file.
>>> vi xyz
6c00ff00 = 1
6c01ff00 = BGSV 1
6c02ff00 = 08IS01191025
6c03ff00 = 192.11.13.5
7005ff00 = g430
e808ff00 = 249
6c0aff00 = y
7002ff00 = 35 .4 .0 /
7001ff00 = 0
7b00ff00 =
7100ff00 = 1
7003ff00 = 192.11.13.150
with open('xyz') as infile:
line1 = infile.readline().strip()
for _ in xrange(6):
line7 = infile.readline()
line7 = line7.strip()
print "The first line is '%s'" %line1
print "The seventh line is '%s'" %line7
if line1.split('=')[1].strip() == '1':
if line7.split('=')[1].strip() == 'y':
print "Correct"
else:
print "Wrong"