Can Python add the " character to a string - python-2.7

I have to paste 3000 url's a day that are unformatted
Can i set up code to convert the raw paste data to a string?
(Example raw data) - 13 Michael Way Cottees NSW 2017
(Example changed data) - "13 Michael Way Cottees NSW 2017"
I have tried
RAW_URL = 13 Michael Way Cottees NSW 2017 + " "
RAW_URL = str(13 HOADLEY ST MAWSON ACT 2607)
RAW_DATA = ' " ' + (13 HOADLEY ST MAWSON ACT 2607) + ' " '
I keep getting "invalid syntax" error and not having much luck with google.
Once it's done it will be folded into the below code, to replace the single input on PASTED_CRM_DATA to a list just below
import requests
import csv
from lxml import html
import time
import sys
text2search = '''RECENTLY SOLD'''
PASTED_CRM_DATA = "13 HOADLEY ST MAWSON ACT 2607"
URL_LIST = 'https://www.realestate.com.au/property/' + str(PASTED_CRM_DATA.replace(' ', '-').lower()),
with open('REA.csv', 'wb') as csv_file:
writer = csv.writer(csv_file)
for index, url in enumerate(URL_LIST):
page = requests.get(url)
print '\r' 'Scraping URL ' + str(index+1) + ' of ' + str(len(URL_LIST))+ ' ' + url,
if text2search in page.text:
tree = html.fromstring(page.content)
(title,) = (x.text_content() for x in tree.xpath('//title'))
(price,) = (x.text_content() for x in tree.xpath('//div[#class="property-value__price"]'))
(sold,) = (x.text_content().strip() for x in tree.xpath('//p[#class="property-value__agent"]'))
writer.writerow([title, price, sold])
Any input is appreciated

First of all you should understand what strings are in python
In your examples that you have tried
RAW_URL = 13 Michael Way Cottees NSW 2017 + " "
RAW_URL = str(13 HOADLEY ST MAWSON ACT 2607)
RAW_DATA = ' " ' + (13 HOADLEY ST MAWSON ACT 2607) + ' " '
Here the characters you try to use a string are interpreted as actual code. To make your intentions clear to the interpreter use single quotes ' around them. (or double quotes)
RAW_URL = '13 Micheal Way Cottees NSW 2017'
RAW_DATA = '13 HOADLEY SY MAWSON ACT 2607'
To apply quotes use either string concatanation
RAW_URL = '"' + '13 Micheal Way Cottees NSW 2017' + '"'
Tough im not sure what you mean with raw paste data. Where is the data copied from? Is it by hand or done in the program?

Related

UTF-8 to EBCDIC using iconv in Python-script on USS

I am trying to convert utf-8 files from a directory listing on USS into ebcdic files BEFORE getting them into z/OS datasets.
Using a helper function which I found on stackoverflow (thanks for this!) I can issue shell-commands from within the python script:
def r(cmd_line):
return Popen(cmd_line.split(), stdout=PIPE).communicate()[0]
With this I can allocate and populate mainframe datasets from USS-files, using
r("tso alloc DSNAME(...) etc.") # to allocate a mainframe DS and
r("tso oget ...") # to populate the mainframe DS
However: some files need to be converted first, which in a shellscript I would simply code with
iconv -f UTF-8 -t IBM-1141 $utf8_file > $ebcdic_file
and I am totally at a loss of how to do this in python (2.7)?
Can't ask anybody in my shop since python was newly installed and I am currently the only one interested in it. Anyone an idea? Thanks a lot in advance!
Although not in the true spirit of python, you can do what you want by wrapping USS commands in a python script. Here is an example:
#!/bin/env python
from cStringIO import StringIO
import os
import sys
def r(cmd):
import subprocess
return subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
def allocate_dataset(dsName):
name = "'" + dsName + "'"
out = r(['/bin/tso', 'alloc', 'ds(' + name + ')', 'space(6000 2000)', 'track', 'lrecl(80)',
'dsntype(library)', 'blksize(3200)', 'recfm(f b)', 'dir(2)', 'new'])
for line in out.split():
print line
def not_allocated(dsName):
name = "'" + dsName + "'"
out = r(['/bin/tsocmd', 'listds ' + name])
for line in out.split():
if "NOT IN CATALOG" in out:
return True
return False
def ascii_to_ebcdic(from_codepage, to_codepage, fileName):
os.system('iconv -f' + from_codepage + ' -t' + to_codepage + ' <' + fileName + ' >ebcdic_' + fileName)
def copy_to_dataset(fileName, dsName, memberName):
dsn = "//'" + dsName + '(' + memberName + ")'"
os.system('cp -T ' + fileName + ' "' + dsn + '"')
def main():
dsName = "HLQ.MY.PYTHON"
if not_allocated(dsName):
print("Allocating '" + dsName + "' data set")
allocate_dataset(dsName)
ascii_to_ebcdic("UTF-8", "IBM-1047", "test.txt")
copy_to_dataset("ebcdic_test.txt", "HLQ.MY.PYTHON", "TXT")
member = "//'HLQ.MY.PYTHON(TXT)'"
os.system('cat -v "' + member + '"')
main()

How to email variable value using raspberry pi and smtplib

I am working on this project where i have 3 string variables which I want to email their values to an email address. I was able to email a plain text message; however, I couldn't include these variables values in. Below is my codes:
import serial
import smtplib
import time
serialport = serial.Serial('/dev/ttyUSB0', 115200, timeout = 0.5)
user= 'user#gmail.com'
password= 'password'
receiver= 'receiver#gmail.com
subject= 'Solar tracker status'
header = 'To: ' + email + '\n' + 'From: ' + email + '\n' + 'Subject: ' +
subject
while True:
line = serialport.readline()
result = line.find(";")
if result > 0:
str = line.split(";")
volt=str[0]
power=str[1]
temp=str[2]
body = "\n" + + "Voltage: " + volt + "\n" + "Power: " + power + "\n" + "Temp: " + temp
print header + '\n' + body
s=smtplib.SMTP('smtp.gmail.com',587)
s.ehlo()
s.starttls()
s.ehlo()
s.login(email, password)
s.sendmail(email, email, header + '\n\n' + body)
s.quit
When running this script, i got this error message :
File "testserial.py", line 23, in <module>
body = "\n" + + "Voltage: " + volt
TypeError: bad operand type for unary +: 'str'
I have tried converting the variable into string using str(volt), then got this error message:
File "testserial.py", line 23, in <module>
str(volt)
TypeError: 'list' object is not callable
I can't understand this because they are originally in string format since i was able to write them into a text file using %s without having to convert it.
I think i just don't know how to pass a variable into the body of the email.
Please help!

How to replace multiple value in python with the re module

I need to replace some text inside a file with the python re module.
Here is the input value :
<li><span class="PCap CharOverride-4">Contrôles</span> <span class="PCap CharOverride-4">Testes</span></li>
and the excepting output is this :
<li><span class="PCap CharOverride-4">C<span style="font-size:83%">ONTRôLES</span></span>
<span class="PCap CharOverride-4">T<span style="font-size:83%">ESTES</span></span></li>
but insted, I get this as result :
<li><span class="PCap CharOverride-4">C<span style="font-size:83%">ONTRôLES</span></span> <span class="PCap CharOverride-4">C<span style="font-size:83%">ONTRôLES</span></span></li>
Is there something that I missed ?
Here is what I've done so far :
for line in file_data.readlines():
#print(line)
reg = re.compile(r'(?P<b1>(<'+balise_name+' class="(([a-zA-Z0-9_\-]*?) |)'+class_value+')(| ([a-zA-Z0-9_\-]*?))">)(?P<maj>([A-ZÀÁÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝ]))(?P<min>([a-zàáâãäåæçèéëìíîïðòóôõöøùúûüýÿµœš]*?))(?P<b2>(<\/'+balise_name+'>))')
#print(reg)
search = reg.findall(line)
print(search)
if (search != None):
for matchObj in search:
print(matchObj)
#print(matchObj[8])
print(line)
balise1 = matchObj[0] #search.group('b1')
print(balise1)
balise2 = matchObj[10] #matchObj.group('b2')
print(balise2)
maj = matchObj[6] #matchObj.group('maj')
print(maj)
min = matchObj[8] #matchObj.group('min')
print(min)
sub_str = balise1+""+maj+"<span style=\"font-size:83%\">"+min.upper()+"</span>"+balise2
line = re.sub(reg, sub_str, line)
#ouverture du fichier pour ajour ligne
filename = file_name.split(".")
#file_result = open(filename[0]+"-OK."+filename[1], "a")
#file_result.writelines(line)
#file_data.writelines(line)
#file_result.close()
print(line)
NB : I don't know how to use the module Beautifulsoup of python so why I do it manually.
Pardon me for my poor english.
Thanks for your answer !!
So, I totally forgot about this question but here is the solution I came up with after fixing the code I wrote long time ago :
for line in file_data.readlines():
reg = re.compile(r'(?P<b1>(\<' + balise_name + ' class=\"(([a-zA-Z0-9_\-]*?) |)' + class_value +
')(| ([a-zA-Z0-9_\-]*?))\"\>)(?P<maj>([A-ZÀÁÂÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝ]))(?P<min>([a-zàáâãäåæçèéëìíîïðòóôõöøùúûüýÿµœš]*?))(?P<b2>(\<\/' + balise_name + '\>))')
print(line)
while reg.search(line):
search = reg.search(line)
if search:
print(search)
while search:
balise1 = search[0] # search.group('b1')
print('b1 : ' + str(balise1))
balise2 = search[11] # search.group('b2')
print('b2 : ' + str(balise2))
maj = search[7] # search.group('maj')
print('maj : ' + str(maj))
min = search[9] # search.group('min')
print('min : ' + str(min))
sub_str = search[1] + "" + maj + "<span style=\"font-size:83%\">" + min.upper() + \
"</span>" + balise2
print(sub_str)
line = re.sub(str(search[0]), sub_str, line)
print(line)
search = None
Here is what I changed with the code :
Fix some unescaped char inside the pattern
Iterate the result one by one
Fix group number for the sub function
Hope it will help someone who faced the same problem as me.

Why are the python element tree taking up so much memory?

I am about to find some specific information from a huge (4 GB) xml-file. To avoid using too much memory, I have used the iterparse method of the element tree library in Python. This seems to work well, and the memory usage for Python is ca. 3.5 MB for the most part. But the memory usage is increasing to several gigabytes, when it reach the end of the program, and it seems like it never will finish.
From the output csv-file, it seems like the program has been through all elements of interest, but has some problems with finishing the program. Can anybody see what is wrong with my program, and tell me why it behaves like it does?
The program is shown here:
import xml.etree.ElementTree as ET
output_file = 'output.csv'
input_file = 'raw_data/denmark-latest.xml'
parser = ET.iterparse(input_file, events=("start", "end"))
parser = iter(parser)
event, root = parser.next()
with open(output_file, 'a', 1) as f:
for event, element in parser:
if event == "start" and element.tag == "node":
for node in element.findall(".//tag/[#k='addr:housenumber']/..[#lat]"):
f = open(output_file, "a")
lat = node.get('lat')
lon = node.get('lon')
for tag in node.findall("./tag/[#k='addr:city']"):
city = tag.get('v')
for tag in node.findall("./tag/[#k='addr:postcode']"):
postcode = tag.get('v')
for tag in node.findall("./tag/[#k='addr:street']"):
street = tag.get('v')
for tag in node.findall("./tag/[#k='addr:housenumber']"):
houseno = tag.get('v')
string = str(lat) + ', ' + str(lon) + ', ' + str(postcode) + ', ' + str(city) + ', ' + str(street) + ', ' + str(houseno) +'\n'
f.write(string)
root.clear()

django make log that works for all models

I am trying to make my own log that makes a string of changed data between object (my old object and my new object) However i keep getting back empty string,
My code:
def log_fields(old_obj, new_obj):
fields = new_obj.__class__._meta.fields
changed_fields = ""
old_data = ""
new_data = ""
# get all changed data
for field in fields:
old_field_data = old_obj.__getattribute__(field.name)
new_field_data = new_obj.__getattribute__(field.name)
if old_field_data != new_field_data:
count =+ 1
# convert changed data to strings
# number + space + data + 5 spaces for next string
changed_fields.join(str(count)).join(" ").join(str(field)).join(" ")
old_data.join(str(count)).join(" ").join(str(old_field_data)).join(" ")
new_data.join(str(count)).join(" ").join(str(new_field_data)).join(" ")
print changed_fields
print old_data
print new_data
I got a feeling something with the string .join combination something is going wrong, cause trying this manually in shell seems to work up to the comparison. Not sure tho hos i should change the string
changed_fields = changed_fields + str(count) + "." + str(field.name) + " "
old_data = old_data + str(count) + "." + str(old_field_data) + " "
new_data = new_data + str(count) + "." + str(new_field_data) + " "
Seems to do the job, so for now, ill keep it at this