Python coding questions on bitwise shift and logical operations in packet sniffer - python-2.7

import socket
import sys
import struct
import re
#get data
def receiveData(s):
data=''
try:
data=s.recvfrom(65565)
except timeout:
data = ''
except:
print "An error occurred"
sys.exc_info()
return data[0]
#get Type Of Service - 8bits
def getTOS(data):
precedence = {0:'Routine',1:'Priority', 2:'Immediate', 3:'Flash', 4:'Flash Override', 5:'CRITIC/ECP', 6:'Internetwork control', 7:'Network Control'}
delay = {0:'Normal Delay', 1:'Low Delay'}
throughput = {0:'Normal Throughput', 1:'High Throughput'}
reliability = {0:'Normal Reliability', 1:'High Reliability'}
monetary = {0:'Normal Cost', 1:'Minimize monetary cost'}
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ tabs + delay[D] + tabs + throughput[T] + tabs + reliability[R] + tabs + monetary[M]
return TOS
def getFlags(data):
flagR = {0:'Cleared to 0'}
flagDF = {0:'Fragment if necessary', 1:'Do not Fragment'}
flagMF = {0:'This is the last Fragment', 1:'More fragments to follow this fragment'}
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
tabs = '\n\t\t\t'
flags = flagR[R] + tabs + flagDF[DF] + tabs + flagMF[MF]
return flags
def getProtocol(protocolNr):
protocolFile = open('/root/Desktop/protocol.txt', 'r')
protocolData = protocolFile.read()
protocol = re.findall(r'\n' + str(protocolNr) + ' (?:.)+\n', protocolData)
if protocol:
protocol = protocol[0]
protocol = protocol.replace('\n', '')
protocol = protocol.replace(str(protocolNr), '')
protocol = protocol.lstrip()
return protocol
else:
return "No such protocol found"
s=socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_TCP)
data = receiveData(s)
unpackedData = struct.unpack('!BBHHHBBH4s4s', data[:20])
version_IHL = unpackedData[0]
version = version_IHL >> 4
IHL = version_IHL & 0xF
TOS = unpackedData[1]
totalLength = unpackedData[2]
ID = unpackedData[3]
flags = unpackedData[4]
fragmentOffset = unpackedData[4] & 0x1FFF
TTL = unpackedData[5]
protocolNr = unpackedData[6]
checksum = unpackedData[7]
sourceAddress = socket.inet_ntoa(unpackedData[8])
destinationAddress = socket.inet_ntoa(unpackedData[9])
print " An IP packed with the size %i was captured :" %totalLength
print " Raw Data :" +data
print "\nParsed Data"
print " Version:\t\t" +str(version)
print " Header Length:\t\t" + str(IHL*4) + "bytes"
print " Type of Service:\t" + getTOS(TOS)
print " ID:\t\t\t" + str(hex(ID)) + " (" + str(ID) + ")"
print " Flags:\t\t\t" + getFlags(flags)
print " Fragment Offset:" + str(fragmentOffset)
print " TTL:\t\t\t" + str(TTL)
print " Protocol:\t\t\t" +getProtocol(protocolNr)
print " Checksum:\t\t\t" +str(checksum)
print " Source:\t\t\t" +sourceAddress
print " Destination:\t\t\t" +destinationAddress
print " Payload:\n" + data[20:]
Below I have extracted some code sections from above and kindly spend some time to answer the following.
(1)
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ ......
in the above section could you please explain on what basis D=data & 0x10 is used and right shifted 4 times and also why is precedence shifted 5 times to the right. My question is more like why 0x10 was specifically used to AND with data and then right shifted 4 times to get the Delay bit. I am seeking clarification on all other bits like T, R, M in the above section and the flag bits in the below section too.
(2)
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
In the above segment please explain on what basis is used R = data & 0x8000 and shift it 15 times to the right
(3)
fragmentOffset = unpackedData[4] & 0x1FFF
In the above line of code why specifically 0x1FFF was used to AND with unpackedData[4]
Credits to the source code goes to Ana Balica
https://www.youtube.com/watch?v=ghokDuCDcMY

import socket
import sys
import struct
import re
def receiveData(s):
data =''
try:
data , addr =s.recvfrom(655365)
except timout:
data =''
except:
print "An error hapend"
sys.exc_info()
return data
#get Type Of Service - 8bits
def getTOS(data):
precedence = {0:'Routine',1:'Priority', 2:'Immediate', 3:'Flash', 4:'Flash Override', 5:'CRITIC/ECP', 6:'Internetwork control', 7:'Network Control'}
delay = {0:'Normal Delay', 1:'Low Delay'}
throughput = {0:'Normal Throughput', 1:'High Throughput'}
reliability = {0:'Normal Reliability', 1:'High Reliability'}
monetary = {0:'Normal Cost', 1:'Minimize monetary cost'}
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ tabs + delay[D] + tabs + throughput[T] + tabs + reliability[R] + tabs + monetary[M]
return TOS
def getFlags(data):
flagR = {0:'Cleared to 0'}
flagDF = {0:'Fragment if necessary', 1:'Do not Fragment'}
flagMF = {0:'This is the last Fragment', 1:'More fragments to follow this fragment'}
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
tabs = '\n\t\t\t'
flags = flagR[R] + tabs + flagDF[DF] + tabs + flagMF[MF]
return flags
def getProtocol(protocolNr):
protocolFile = open('protocol.txt', 'r')
protocolData = protocolFile.read()
protocol = re.findall(r'\n' + str(protocolNr) + ' (?:.)+\n', protocolData)
if protocol:
protocol = protocol[0]
protocol = protocol.replace('\n', '')
protocol = protocol.replace(str(protocolNr), '')
protocol = protocol.lstrip()
return protocol
else:
return "No such protocol found"
def showdata(unpackedData):
version_IHL = unpackedData[0]
version = version_IHL >> 4
IHL = version_IHL & 0xF
TOS = unpackedData[1]
totalLength = unpackedData[2]
ID = unpackedData[3]
flags = unpackedData[4]
fragmentOffset = unpackedData[4] & 0x1FFF
TTL = unpackedData[5]
protocolNr = unpackedData[6]
checksum = unpackedData[7]
sourceAddress = socket.inet_ntoa(unpackedData[8])
destinationAddress = socket.inet_ntoa(unpackedData[9])
print " An IP packed with the size %i was captured :" %totalLength
print " Raw Data :" +data
print "\nParsed Data"
print " Version:\t\t" +str(version)
print " Header Length:\t\t" + str(IHL*4) + "bytes"
print " Type of Service:\t" + getTOS(TOS)
print " ID:\t\t\t" + str(hex(ID)) + " (" + str(ID) + ")"
print " Flags:\t\t\t" + getFlags(flags)
print " Fragment Offset:" + str(fragmentOffset)
print " TTL:\t\t\t" + str(TTL)
print " Protocol:\t\t\t" +getProtocol(protocolNr)
print " Checksum:\t\t\t" +str(checksum)
print " Source:\t\t\t" +sourceAddress
print " Destination:\t\t\t" +destinationAddress
print " Payload:\n" + data[20:]
# the public network interface
HOST = socket.gethostbyname(socket.gethostname())
# create a raw socket and bind it to the public interface
s = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_IP)
s.bind((HOST, 0))
# Include IP headers
s.setsockopt(socket.IPPROTO_IP, socket.IP_HDRINCL, 1)
# receive all packages
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_ON)
for i in range(100):
# receive a package
#print s.recvfrom(65565)
data=receiveData(s)
datastr = str(data)
if len(datastr) >19 :
unpackedData = struct.unpack('!BBHHHBBH4s4s',datastr[:20])
#print unpackedData
showdata(unpackedData)
else:
print data
# disabled promiscuous mode
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_OFF)
import socket
import sys
import struct
import re
def receiveData(s):
data =''
try:
data , addr =s.recvfrom(655365)
except timout:
data =''
except:
print "An error hapend"
sys.exc_info()
return data
#get Type Of Service - 8bits
def getTOS(data):
precedence = {0:'Routine',1:'Priority', 2:'Immediate', 3:'Flash', 4:'Flash Override', 5:'CRITIC/ECP', 6:'Internetwork control', 7:'Network Control'}
delay = {0:'Normal Delay', 1:'Low Delay'}
throughput = {0:'Normal Throughput', 1:'High Throughput'}
reliability = {0:'Normal Reliability', 1:'High Reliability'}
monetary = {0:'Normal Cost', 1:'Minimize monetary cost'}
D = data & 0x10
D >>= 4
T = data & 0x8
T >>= 3
R = data & 0x4
R >>= 2
M = data & 0x2
M >>=1
tabs = "\n\t\t\t"
TOS = precedence [data >> 5]+ tabs + delay[D] + tabs + throughput[T] + tabs + reliability[R] + tabs + monetary[M]
return TOS
def getFlags(data):
flagR = {0:'Cleared to 0'}
flagDF = {0:'Fragment if necessary', 1:'Do not Fragment'}
flagMF = {0:'This is the last Fragment', 1:'More fragments to follow this fragment'}
R = data & 0x8000
R >>= 15
DF = data & 0x4000
DF >>= 14
MF = data & 0x2000
MF >>= 13
tabs = '\n\t\t\t'
flags = flagR[R] + tabs + flagDF[DF] + tabs + flagMF[MF]
return flags
def getProtocol(protocolNr):
protocolFile = open('protocol.txt', 'r')
protocolData = protocolFile.read()
protocol = re.findall(r'\n' + str(protocolNr) + ' (?:.)+\n', protocolData)
if protocol:
protocol = protocol[0]
protocol = protocol.replace('\n', '')
protocol = protocol.replace(str(protocolNr), '')
protocol = protocol.lstrip()
return protocol
else:
return "No such protocol found"
def showdata(unpackedData):
version_IHL = unpackedData[0]
version = version_IHL >> 4
IHL = version_IHL & 0xF
TOS = unpackedData[1]
totalLength = unpackedData[2]
ID = unpackedData[3]
flags = unpackedData[4]
fragmentOffset = unpackedData[4] & 0x1FFF
TTL = unpackedData[5]
protocolNr = unpackedData[6]
checksum = unpackedData[7]
sourceAddress = socket.inet_ntoa(unpackedData[8])
destinationAddress = socket.inet_ntoa(unpackedData[9])
print " An IP packed with the size %i was captured :" %totalLength
print " Raw Data :" +data
print "\nParsed Data"
print " Version:\t\t" +str(version)
print " Header Length:\t\t" + str(IHL*4) + "bytes"
print " Type of Service:\t" + getTOS(TOS)
print " ID:\t\t\t" + str(hex(ID)) + " (" + str(ID) + ")"
print " Flags:\t\t\t" + getFlags(flags)
print " Fragment Offset:" + str(fragmentOffset)
print " TTL:\t\t\t" + str(TTL)
print " Protocol:\t\t\t" +getProtocol(protocolNr)
print " Checksum:\t\t\t" +str(checksum)
print " Source:\t\t\t" +sourceAddress
print " Destination:\t\t\t" +destinationAddress
print " Payload:\n" + data[20:]
# the public network interface
HOST = socket.gethostbyname(socket.gethostname())
# create a raw socket and bind it to the public interface
s = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_IP)
s.bind((HOST, 0))
# Include IP headers
s.setsockopt(socket.IPPROTO_IP, socket.IP_HDRINCL, 1)
# receive all packages
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_ON)
for i in range(100):
# receive a package
#print s.recvfrom(65565)
data=receiveData(s)
datastr = str(data)
if len(datastr) >19 :
unpackedData = struct.unpack('!BBHHHBBH4s4s',datastr[:20])
#print unpackedData
showdata(unpackedData)
else:
print data
# disabled promiscuous mode
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_OFF)

Related

Getting the connected components in networkx in the order of which edges are added

Each sentence in doc2 is displayed as a graph. Now the edges were added in the form s-o-v from the respective subject_list, object_list and verb_list.
I have tried to display the connected components. But the order in which it displays the sentence is not in the order in which the edges were added.
# This Python file uses the following encoding: utf-8
%matplotlib notebook
import codecs
import itertools
import re
import networkx as nx
import matplotlib.pyplot as pl
from matplotlib.font_manager import FontProperties
prop = FontProperties()
graph = nx.Graph()
labels = {}
each_one = []
list_of_sentences = []
subject_list = []
object_list = []
verb_list = []
newDict = {}
with codecs.open('doc2.txt', encoding='utf-8') as f:
text = f.read()
sentences = re.split(r' *[\.\?!][\'"\)\]]* *', text)
for stuff in sentences:
list_of_sentences.append(stuff)
new_list_of_sentences = []
for d in list_of_sentences:
s = d.replace(u'वतीन', '').replace(u'आनी', '').replace(u'हिणें', '').replace(',', '')
new_list_of_sentences.append(s)
f = open('doc2_tag.txt', 'r')
for line in f:
k, v = line.strip().split('/')
newDict[k.strip().decode('utf-8')] = v.strip()
f.close()
for sentence in new_list_of_sentences:
a = b = c = ""
sentence_word_list = sentence.split()
for word in sentence_word_list:
if newDict[word] == 'N-NNP':
a += word + " "
if newDict[word] == 'N-NN':
b += word + " "
if newDict[word] == 'JJ':
b += word + " "
if newDict[word] == 'QT-QTC':
b += word + " "
if newDict[word] == 'RB':
b += word + " "
if newDict[word] == 'N-NST':
b += word + " "
if newDict[word] == 'PR-PRP':
b += word + " "
if newDict[word] == 'PSP':
b += word + " "
if newDict[word] == 'CC-CCD':
b += word + " "
if newDict[word] == 'V-VM-VF':
c += word + " "
subject_list.append(a)
object_list.append(b)
verb_list.append(c)
konkani_dict = {u'सनरायझर्साक': u'सनरायझर्स', u'सनरायझर्सान': u'सनरायझर्स', u'सनरायझर्साच्या': u'सनरायझर्स'}
for idx, sub in enumerate(subject_list):
temp_list = sub.split(" ")
for i in temp_list:
if i in konkani_dict:
new_sub = sub.replace(i, konkani_dict[i])
subject_list[idx] = new_sub
for s in subject_list:
if s is not "":
graph.add_node(s)
labels[s] = s
for o in object_list:
if o is not "":
graph.add_node(o)
labels[b] = b
for v in verb_list:
if v is not "":
graph.add_node(v)
labels[v] = v
for (s, o, v) in zip(subject_list, object_list, verb_list):
if s and o is not "":
graph.add_edge(s, o)
if o and v is not "":
graph.add_edge(o, v)
pos=nx.spring_layout(graph,k=0.15,iterations=20)
nx.draw(graph, with_labels = True, font_family = "Nirmala UI", node_size = 40, font_size = 9 ,node_color = "darkblue")
pl.show()
sentences=[]
for component in nx.connected_components(graph):
g=(
filter(
lambda x: x[0] in component and x[1] in component,
graph.edges
)
)
p=[]
p= ''.join(item for tuple_ in g for item in tuple_)
print p
sentences.append(p)
print sentences
output=[]
for i in sentences:
inputWords = i.split(" ")
inputWords=inputWords[-1::-1]
output = ' '.join(inputWords)
print output
Expected output is spmething like this:
शिखर धवनान सगळ्यांत चड ४५ धांवड्यो केल्यो ,
सनरायझर्स दीपर हुडा जैतांत पर्जळ्ळो
This is the output I get: sentences displayed
networkx doesn't store the order of created nodes/edges because this information is mostly useless. If you want to have this information, you should add it manually. In your program, for example (for edges):
edge_index = 0
for (s, o, v) in zip(subject_list, object_list, verb_list):
if s and o is not "":
graph.add_edge(s, o, index=edge_index)
edge_index += 1
if o and v is not "":
graph.add_edge(o, v, index=edge_index)
edge_index += 1
Then you should print sorted edges:
sorted( # Sorted list of edges
list(g.edges.data('index')), # With 'index' data
key=lambda x: x[2] # Sorted by 'index' data
)```

Python 2 to python 3

Having some trouble translating these two methods from python2 to python3.
Python2:
def send(self, data):
if self.debug:
print 'Send:',
print ':'.join('%02x' % ord(c) for c in data)
l0 = len(data) & 0xFF
l1 = (len(data) >> 8) & 0xFF
d = chr(l0) + chr(l1) + data
self.sock.send(d)
def recv(self):
data = self.sock.recv(2)
l0 = ord(data[0])
l1 = ord(data[1])
plen = l0 + (l1 << 8)
data = self.sock.recv(plen)
if self.debug:
print 'Recv:',
print ':'.join('%02x' % ord(c) for c in data)
return data
Python 3 This what I got so far :
def send(self, data):
if self.debug:
print('Send:', end=' ')
print(':'.join('%02x' % ord(c) for c in data))
l0 = len(data.encode('utf-8')) & 0xFF
l1 = (len(data.encode('utf-8')) >> 8) & 0xFF
d = chr(l0) + chr(l1) + data
self.sock.send(d)
def recv(self):
data = self.sock.recv(2)
l0 = ord(data[0])
l1 = ord(data[1])
plen = l0 + (l1 << 8)
data = self.sock.recv(plen)
if self.debug:
print('Recv:', end=' ')
print(':'.join('%02x' % ord(c) for c in data))
return data
I keep getting this error:
TypeError: ord() expected string of length 1, but int found
Any help would be appreciated. Thank you
There are two main types of issues you need to address to make your code work in Python 3.
The biggest issue is that strings and bytes are no longer represented by the same types in Python 3. The str type is for Unicode strings, the bytes type is for binary data. Your data argument looks like it should probably be bytes (since you're sending it directly out on a socket). If you did want to support Unicode strings, you'd need to encode() them with some encoding (e.g. "UTF-8") before sending them over the socket.
Anyway, assuming data is a bytes instance, you'll need to make a few small changes to the code to address how a few APIs work different for str and bytes:
Iterating on a bytes yields the individual byte values, but as integers, not as one-character bytestrings. This basically means you don't need the ord in the print calls, nor in the first parts of recv.
The chr function creates a str, not a bytes instance, and you can't concatenate the different types together. Creating a bytes instance from an integer is a bit awkward (bytes(some_number) doesn't do what you want), but it is possible.
The other issue you have is much simpler to understand. In Python 3, print is a function rather than a statement, so you need parentheses around its arguments. It also uses different syntax to suppress line endings.
Here's a fully fixed version of your code:
def send(self, data):
if self.debug:
print('Send:', end='') # new way to suppress the newline
print(':'.join('%02x' % c for c in data)) # add parentheses, no need for ord
l0 = len(data) & 0xFF
l1 = (len(data) >> 8) & 0xFF
d = bytes([l0, l1]) + data # build prefix bytestring differently
self.sock.send(d)
def recv(self):
l0, l1 = self.sock.recv(2) # no need for ord, unpack directly as ints
plen = l0 + (l1 << 8)
data = self.sock.recv(plen)
if self.debug:
print('Recv:', end='')
print(':'.join('%02x' % c for c in data))
return data
Note that the struct module may offer a more elegant way of encoding and decoding the length of your data to a bytestring. For instance, struct.pack("<H", len(data)) could replace several lines of the code in send (you wouldn't need l0 and l1).
insteed of print a use print (a) like:
python 2.x:
def send(self, data):
if self.debug:
print 'Send:',
print ':'.join('%02x' % ord(c) for c in data)
l0 = len(data) & 0xFF
l1 = (len(data) >> 8) & 0xFF
d = chr(l0) + chr(l1) + data
self.sock.send(d)
def recv(self):
data = self.sock.recv(2)
l0 = ord(data[0])
l1 = ord(data[1])
plen = l0 + (l1 << 8)
data = self.sock.recv(plen)
if self.debug:
print 'Recv:',
print ':'.join('%02x' % ord(c) for c in data)
return data
python 3.x:
def send(self, data):
if self.debug:
print ('Send:'),
print (':'.join('%02x' % ord(c) for c in data))
l0 = len(data) & 0xFF
l1 = (len(data) >> 8) & 0xFF
d = chr(l0) + chr(l1) + data
self.sock.send(d)
def recv(self):
data = self.sock.recv(2)
l0 = ord(data[0])
l1 = ord(data[1])
plen = l0 + (l1 << 8)
data = self.sock.recv(plen)
if self.debug:
print ('Recv:'),
print (':'.join('%02x' % ord(c) for c in data))
return data

Python: Trying to speed up a program that is running very slow

So, this program parses an e-mail address and a plain-text password from a text file. Then, it runs them through a few encryption routines and appends the encrypted text onto the end of the e-amil address:password entry in a new file.
import io
from Crypto.Cipher import AES
import base64
import struct
def str_to_a32(b):
if len(b) % 4:
b += '\0' * (4 - len(b) % 4)
return struct.unpack('>%dI' % (len(b) / 4), b)
def a32_to_str(a):
return struct.pack('>%dI' % len(a), *a)
def aes_cbc_encrypt(data, key):
encryptor = AES.new(key, AES.MODE_CBC, '\0' * 16)
return encryptor.encrypt(data)
def aes_cbc_encrypt_a32(data, key):
return str_to_a32(aes_cbc_encrypt(a32_to_str(data), a32_to_str(key)))
def base64urlencode(data):
data = base64.b64encode(data)
for search, replace in (('+', '-'), ('/', '_'), ('=', '')):
data = data.replace(search, replace)
return data
def a32_to_base64(a):
return base64urlencode(a32_to_str(a))
def stringhash(s, aeskey):
s32 = str_to_a32(s)
h32 = [0, 0, 0, 0]
for i in xrange(len(s32)):
h32[i % 4] ^= s32[i]
for _ in xrange(0x4000):
h32 = aes_cbc_encrypt_a32(h32, aeskey)
return a32_to_base64((h32[0], h32[2]))
def prepare_key(a):
pkey = [0x93C467E3, 0x7DB0C7A4, 0xD1BE3F81, 0x0152CB56]
for _ in xrange(0x10000):
for j in xrange(0, len(a), 4):
key = [0, 0, 0, 0]
for i in xrange(4):
if i + j < len(a):
key[i] = a[i + j]
pkey = aes_cbc_encrypt_a32(pkey, key)
return pkey
with io.open('user_list.txt', 'r') as file:
with io.open('user_list_enc.txt', 'a') as enc_file:
for line in file:
email_split, pass_split = line.replace('\n', '').split(":")
password_aes = prepare_key(str_to_a32(pass_split))
uh = stringhash(email_split.lower(), password_aes)
enc_file.write(email_split + ":" + pass_split + ":" + uh + "\n")
print email_split + ":" + pass_split + ":" + uh + "\n"

chunk of data into fixed lengths chunks and then add a space and again add them all as a string

I have got hex values as a85b080040010000. I want it to be as a8 5b 08 00 40 01 00 00. I have done it by using below code. But I have to work with very large data. So I want computed time to be very low.
import binascii
import re
filename = 'calc.exe'
with open(filename, 'rb') as f:
content = f.readline()
text = binascii.hexlify(content)
text1 = binascii.unhexlify(text)
length1 = 32
length2 = 16
list = re.findall('.{%d}' % length1, text)
list1 = re.findall('.{%d}' % length2, text1)
d = []
for i in range (0, len(list), 1):
temp = ""
l = re.findall('.{%d}' % length2, list[i])
s = l[0]
t = iter(s)
temp += str(' '.join(a+b for a,b in zip(t, t)))
temp += " "
s = l[1]
t = iter(s)
temp += str(' '.join(a+b for a,b in zip(t, t)))
temp += " | " + list1[i]
print temp
You can simply do
x="a85b080040010000"
print re.sub(r"(.{2})",r"\1 ",x)
or
x="a85b080040010000"
print " ".join([i for i in re.split(r"(.{2})",x) if i])

Python struct.error: unpack requires a string argument of length 2

I have written some data using C++ in byte format. I am now trying to read that data again using Python, but I run into an error;
Traceback (most recent call last):
File "binary-reader.py", line 61, in <module>
interaction_types.append(struct.unpack('<H',fp.read(2))[0]);
struct.error: unpack requires a string argument of length 2
I don't really understand since it looks like I am giving a string of length 2, right? Furthermore, I do the same thing at line 32
There is another question like mine but it is without an answer is targeted for Python 3.
Here is my code
import sys
import struct
import os
print "Arguments : "
print str(sys.argv)
#N = #isects
# 2 2 3*4 2 3*4*N 4N 4N 3*4N 2N 2N
#imageX,imageY,throughput,#isects,isect_positions,primitive_ids,shape_ids,spectra,interaction_types,light_ids
file_path = str(sys.argv[1]);
byte_count = 0;
line_number = 1;
fp = open(file_path, "rb");
output = open('output.txt',"w");
file_size = os.path.getsize(file_path)
print "(input) file size = " + str(file_size);
while byte_count < file_size:
print "Line number = " + str(line_number)
print "Current byte count = " + str(byte_count)
# Do stuff with byte.
x = struct.unpack('<H', fp.read(2))[0]
y = struct.unpack('<H', fp.read(2))[0]
throughputOne = struct.unpack('<f', fp.read(4))[0]
throughputTwo = struct.unpack('<f', fp.read(4))[0]
throughputThree = struct.unpack('<f', fp.read(4))[0]
nrIsects = struct.unpack('<H',fp.read(2))[0]
# print "x = " + str(x)
# print "y = " + str(y)
# print "throughputOne = " + str(throughputOne)
# print "throughputTwo = " + str(throughputTwo)
# print "throughputThree = " + str(throughputThree)
print "nrIsects = " + str(nrIsects)
isect_positions = []
for i in range(nrIsects*3):
value = struct.unpack('<f',fp.read(4))[0]
isect_positions.append(value);
primitive_ids = []
for i in range(nrIsects):
value = struct.unpack('<I',fp.read(4))[0]
primitive_ids.append(value);
shape_ids = []
for i in range(nrIsects):
shape_ids.append(struct.unpack('<I',fp.read(4))[0]);
spectra = []
for i in range(nrIsects*3):
spectra.append(struct.unpack('<f',fp.read(4))[0]);
interaction_types = []
for i in range(nrIsects):
interaction_types.append(struct.unpack('<H',fp.read(2))[0]);
light_ids = []
for i in range(nrIsects):
light_ids.append(struct.unpack('<H',fp.read(2))[0]);
output_vars = [x,y,throughputOne,throughputTwo,throughputThree,nrIsects]
line_string = ""
for i in range(len(output_vars)):
output.write(str(output_vars[i]))
line_string += str(output_vars[i])
if i is not len(output_vars) - 1:
output.write(',')
line_string += ','
print line_string
#Update counters
byte_count += 18 + 36*nrIsects
line_number+=1
# raw_input('Press any key to continue.');
# print byte
And here is a link to a input file to use. You can run the code by passing a commandline argument specifying the path of the binary file. I have also written the code in ASCII, which reads
0,0,[0.127076,0.127076,0.127076],1,{[0.144978,-0.294863,2.991749]},{3917},{3916},{[1.375603,1.375603,1.375603]},{5},{0}
https://www.dropbox.com/s/tu1anqo5k0ygtd6/writetest.bin
EDIT: The layout of my file can be found as a comment in the code
50 bytes have already been read before the fp.read(2) that raises the error. Thus, fp.read(2) returns an empty string, and struct.unpack raises an exception:
In [83]: 2+2+4+4+4+2+12+4+4+12
Out[83]: 50
x = struct.unpack('<H', fp.read(2))[0] # 2 bytes read
y = struct.unpack('<H', fp.read(2))[0] # 2 bytes
throughputOne = struct.unpack('<f', fp.read(4))[0] # 4 bytes
throughputTwo = struct.unpack('<f', fp.read(4))[0] # 4 bytes
throughputThree = struct.unpack('<f', fp.read(4))[0] # 4 bytes
nrIsects = struct.unpack('<H',fp.read(2))[0] # 2 bytes
print "nrIsects = " + str(nrIsects)
isect_positions = []
for i in range(nrIsects*3):
value = struct.unpack('<f',fp.read(4))[0] # 12 bytes
isect_positions.append(value)
primitive_ids = []
for i in range(nrIsects):
value = struct.unpack('<I',fp.read(4))[0] # 4 bytes
primitive_ids.append(value)
shape_ids = []
for i in range(nrIsects):
shape_ids.append(struct.unpack('<I',fp.read(4))[0]) # 4 bytes
spectra = []
for i in range(nrIsects*3):
spectra.append(struct.unpack('<f',fp.read(4))[0]) # 12 bytes
interaction_types = []
for i in range(nrIsects):
interaction_types.append(struct.unpack('<H', fp.read(2))[0]) # error!