download streets from different address in OSMNX - list

i would like to download different streets in osmnx, using a list but i can't find the error in my code.
i tried this way
name_list = ['Kuwait, United Arab Emirates','Guangzhou, China']
i=0
for file_path in name_list:
print(i)
CD = file_path
filtro_vialidades = '["highway"~"trunk|motorwat|primary|secondary|tertiary|residential"]'
GCD=ox.graph_from_address(CD, network_type= "drive",
custom_filter= filtro_vialidades)
ox.save_graphml(GCD, f"{name_list[i]}.graphml", gephi= False)
i+=1
#I had this error
EmptyOverpassResponse: There are no data elements in the response JSON

Kuwait is a country not a city. You can add appropriate error handling code:
import osmnx as ox
from osmnx._errors import EmptyOverpassResponse
name_list = ['Kuwait, United Arab Emirates','Guangzhou, China']
for i, file_path in enumerate(name_list):
print(i)
CD = file_path
filtro_vialidades = '["highway"~"trunk|motorwat|primary|secondary|tertiary|residential"]'
try:
GCD=ox.graph_from_address(CD, network_type= "drive",
custom_filter= filtro_vialidades)
ox.save_graphml(GCD, f"{name_list[i]}.graphml", gephi= False)
except EmptyOverpassResponse:
print(f"no results: {file_path}")

It looks like it cannot find anything for one of your queries, probably because "Kuwait, United Arab Emirates" does not exist. These are two separate countries.

Related

PVLIB: How can I add module and inverter specifications which are not present in CEC and SAM library?

I am working on a PV system installed in Amsterdam. The PVsystem code is as follows. I am getting good results with the inverter and the modules specified in the code which is obtained with retrieve_sam.
import pvlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
from pandas.plotting import register_matplotlib_converters
from pvlib.modelchain import ModelChain
# Define location for the Netherlands
location = pvlib.location.Location(latitude=52.53, longitude=5.15, tz='UTC', altitude=50, name='amsterdam')
#import the database
module_database = pvlib.pvsystem.retrieve_sam(name='SandiaMod')
inverter_database = pvlib.pvsystem.retrieve_sam(name='cecinverter')
module = module_database.Canadian_Solar_CS5P_220M___2009_
# module = module_database.DMEGC_Solar_320_M6_120BB_ (I want to add this module)
inverter = inverter_database.ABB__PVI_3_0_OUTD_S_US__208V_
temperature_model_parameters = pvlib.temperature.TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']
modules_per_string = 10
inverter_per_string = 1
# Define a PV system characteristics
surface_tilt = 12.5
surface_azimuth = 180
system = pvlib.pvsystem.PVSystem(surface_tilt=surface_tilt, surface_azimuth=surface_azimuth, albedo=0.25,
module=module, module_parameters=module,
temperature_model_parameters=temperature_model_parameters,
modules_per_string=modules_per_string, inverter_per_string=inverter_per_string,
inverter=inverter, inverter_parameters=inverter, racking_model='open_rack')
# Define a weather file
def importPSMData():
df = pd.read_csv('/Users/laxmikantradkar/Desktop/PVLIB/solcast_data1.csv', delimiter=';')
# Rename the columns for input to PVLIB
df.rename(columns={'Dhi': 'dhi', 'Dni': 'dni', 'Ghi': 'ghi', 'AirTemp': 'temp_air', 'WindSpeed10m': 'wind_speed',
}, inplace=True)
df.rename(columns={'Year': 'year', 'Month': 'month', 'Day': 'day', 'Hour': 'hour',
'Minute': 'minute'}, inplace=True)
df['dt'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute']])
df.set_index(df['dt'], inplace=True)
# Rename data parameters to run to datetime
# df.rename(columns={'PeriodEnd': 'period_end'}, inplace=True)
# Drop unnecessary columns
df = df.drop('PeriodStart', 1)
df = df.drop('Period', 1)
df = df.drop('Azimuth', 1)
df = df.drop('CloudOpacity', 1)
df = df.drop('DewpointTemp', 1)
df = df.drop('Ebh', 1)
df = df.drop('PrecipitableWater', 1)
df = df.drop('SnowDepth', 1)
df = df.drop('SurfacePressure', 1)
df = df.drop('WindDirection10m', 1)
df = df.drop('Zenith', 1)
return df
mc = ModelChain(system=system, location=location)
weatherData = importPSMData()
mc.run_model(weather=weatherData)
ac_energy = mc.ac
# ac_energy.to_csv('/Users/laxmikantradkar/Desktop/ac_energy_netherlands.csv')
plt.plot(ac_energy)
plt.show()
Now I want to change the module and inverter which is not present in the library. Could anyone please tell me how to do this?
Is it possible to access the library and manually add the row/column of inverter and module? If yes, where is the library located?
Is it ../Desktop/PVLIB/venv/lib/python3.8/site-packages/pvlib/data/sam-library-sandia-modules-2015-6-30.csv
When I change try to change the module/inverter parameters from above path, I receive an error as DataFrame' object has no attribute 'Module name'
I started working on PVLIB_python 2 days ago, so I am new to the language. I really appreciate your help. Feel free to correct me at any point.
I started working on PVLIB_python 2 days ago, so I am new to the
language. I really appreciate your help. Feel free to correct me at
any point.
Welcome to the community! If you haven't already I encourage you to dig through the pvlib-python documentation and continue to learn Python basics through playing with the examples in the documentation. I encourage you to checkout the pandas tutorials and any other highly rated pandas learning material you can find to get yourself running with data science in Python.
When I change try to change the module/inverter parameters from above
path, I receive an error as DataFrame' object has no attribute 'Module
name'
This is because you're asking for a column in the DataFrame table that's not there. No worries, you can make your own module.
Now I want to change the module and inverter which is not present in
the library. Could anyone please tell me how to do this? Is it possible to access the library and manually add the row/column
of inverter and module? If yes, where is the library located?
It isn't necessary to change the library. You can construct a module yourself since it is a Series from the pandas library. Here's an example showing how you can output the module as a dictionary, change a couple parameters and create your own module.
my_new_module = module.copy() # create your own copy of the module
print("Before:", my_new_module, sep="\n") # show module before
my_new_module["Notes"] = "This is how to change a field in the module. Do this for every field in the module."
my_new_module.name = "DMEGC_Solar_320_M6_120BB_" # rename the Series appropriately
print("\nAfter:", my_new_module, sep="\n") # show module after
Then you can just insert "my_new_module" into PVSystem:
system = pvlib.pvsystem.PVSystem(
surface_tilt=surface_tilt,
surface_azimuth=surface_azimuth,
albedo=0.25,
module=my_new_module, # HERE'S THE NEW MODULE!
module_parameters=module,
temperature_model_parameters=temperature_model_parameters,
modules_per_string=modules_per_string,
inverter_per_string=inverter_per_string,
inverter=inverter,
inverter_parameters=inverter,
racking_model='open_rack')
The hard part here is having the right coefficients that you can trust. You may have an easier time using module_database = pvlib.pvsystem.retrieve_sam(name='CECMod') and replacing those parameters since they can be substituted more easily with data from the module spec sheet.
This should work identically for inverters as well.

Splitting the name when a word matches with one in array?

As a part of my learning. After i successfully split with help, in my next step, wanted to know if i can split the names of files when the month name is found in the name of the file that matches with the name of the month given in this list below ---
Months=['January','February','March','April','May','June','July','August','September','October','November','December'].
When my file name is like this
1.Non IVR Entries Transactions December_16_2016_07_49_22 PM.txt
2.Denied_Calls_SMS_Sent_December_14_2016_05_33_41 PM.txt
Please note that the names of files is not same..i.e why i need to split it like
Non IVR Entries Transactions as one part and December_16_2016_07_49_22 PM as another.
import os
import os.path
import csv
path = 'C:\\Users\\akhilpriyatam.k\\Desktop\\tes'
text_files = [os.path.splitext(f)[0] for f in os.listdir(path)]
for v in text_files:
print (v[0:9])
print (v[10:])
os.chdir('C:\\Users\\akhilpriyatam.k\\Desktop\\tes')
with open('file.csv', 'wb') as csvfile:
thedatawriter = csv.writer(csvfile,delimiter=',')
for v in text_files:
s = (v[0:9])
t = (v[10:])
thedatawriter.writerow([s,t])
import re
import calendar
fullname = 'Non IVR Entries Transactions December_16_2016_07_49_22 PM.txt'
months = list(calendar.month_name[1:])
regex = re.compile('|'.join(months))
iter = re.finditer(regex, fullname)
if iter:
idx = [it for it in iter][0].start()
filename, timestamp = fullname[:idx],fullname[idx:-4]
print filename, timestamp
else:
print "Month not found"
Assuming that you want the filename and timestamp as splits and the month occurs only once in the string, I hope the following code solves your problem.

os.walk set start and end point - python

I'm trying to find how to stop a os.walk after it has walked through a particular file.
I have a directory of log files organized by date. I'm trying to replace grep searches allowing a user to find ip addresses stored in a date range they specify.
The program will take the following arguments:
-i ipv4 or ipv6 address with subnet
-s start date ie 2013/12/20 matches file structure
-e end date
I'm assuming because the topdown option their is a logic that should allow me to declare a endpoint, what is the best way to do this? I'm thinking while loop.
I apologize in advance if something is off with my question. Just checked blood sugar, it's low 56, gd type one.
Additional information
The file structure will be situated in flows/index_border as such
2013
--01
--02
----01
----...
----29
2014
___________Hope this is clear, year folder contains month folders, containing day folders, containing hourly files. Dates increase downwards.___________________
The end date will need to be inclusive, ( I didn't focus too much on it because I can just add code to move one day up)
I have been trying to make a date range function, I was surprised I didn't see this in any datetime docs, seems like it would be useful.
import os, gzip, netaddr, datetime, argparse
startDir = '.'
def sdate_format(s):
try:
return (datetime.datetime.strptime(s, '%Y/%m/%d').date())
except ValueError:
msg = "Bad start date. Please use yyyy/mm/dd format."
raise argparse.ArgumentTypeError(msg)
def edate_format(e):
try:
return (datetime.datetime.strptime(e, '%Y/%m/%d').date())
except ValueError:
msg = "Bad end date. Please use yyyy/mm/dd format."
raise argparse.ArgumentTypeError(msg)
parser = argparse.ArgumentParser(description='Locate IP address in log files for a particular date or date range')
parser.add_argument('-s', '--start_date', action='store', type=sdate_format, dest='start_date', help='The first date in range of interest.')
parser.add_argument('-e', '--end_date', action='store', type=edate_format, dest='end_date', help='The last date in range of interest.')
parser.add_argument('-i', action='store', dest='net', help='IP address or address range, IPv4 or IPv6 with optional subnet accepted.', required=True)
results = parser.parse_args()
start = results.start_date
end = results.end_date
target_ip = results.net
startDir = '/flows/index_border/{0}/{1:02d}/{2:02d}'.format(start.year, start.month, start.day)
print('searching...')
for root, dirs, files in os.walk(startDir):
for contents in files:
if contents.endswith('.gz'):
f = gzip.open(os.path.join(root, contents), 'r')
else:
f = open(os.path.join(root, contents), 'r')
text = f.readlines()
f.close()
for line in text:
for address_item in netaddr.IPNetwork(target_IP):
if str(address_item) in line:
print line,
You need to describe what works or does not work. The argparse of your code looks fine, though I haven't done any testing. The use of type is refreshingly correct. :) (posters often misuse that parameter.)
But as for the stopping, I'm guessing you could do:
endDir = '/flows/index_border/{0}/{1:02d}/{2:02d}'.format(end.year, end.month, end.day)
for root, dirs, files in os.walk(startDir):
for contents in files:
....
if endDir in <something based on dirs and files>:
break
I don't know enough your file structure to be more specific. It's also been sometime since I worked with os.walk. In any case, I think a conditional break is the way to stop the walk early.
#!/usr/bin/env python
import os, gzip, netaddr, datetime, argparse, sys
searchDir = '.'
searchItems = []
def sdate_format(s):
try:
return (datetime.datetime.strptime(s, '%Y/%m/%d').date())
except ValueError:
msg = "Bad start date. Please use yyyy/mm/dd format."
raise argparse.ArgumentTypeError(msg)
def edate_format(e):
try:
return (datetime.datetime.strptime(e, '%Y/%m/%d').date())
except ValueError:
msg = "Bad end date. Please use yyyy/mm/dd format."
raise argparse.ArgumentTypeError(msg)
parser = argparse.ArgumentParser(description='Locate IP address in log files for a particular date or date range')
parser.add_argument('-s', '--start_date', action='store', type=sdate_format, dest='start_date',
help='The first date in range of interest.', required=True)
parser.add_argument('-e', '--end_date', action='store', type=edate_format, dest='end_date',
help='The last date in range of interest.', required=True)
parser.add_argument('-i', action='store', dest='net',
help='IP address or address range, IPv4 or IPv6 with optional subnet accepted.', required=True)
results = parser.parse_args()
start = results.start_date
end = results.end_date + datetime.timedelta(days=1)
target_IP = results.net
dateRange = end - start
for addressOfInterest in(netaddr.IPNetwork(target_IP)):
searchItems.append(str(addressOfInterest))
print('searching...')
for eachDay in range(dateRange.days):
period = start+datetime.timedelta(days=eachDay)
searchDir = '/flows/index_border/{0}/{1:02d}/{2:02d}'.format(period.year, period.month, period.day)
for contents in os.listdir(searchDir):
if contents.endswith('.gz'):
f = gzip.open(os.path.join(searchDir, contents), 'rb')
text = f.readlines()
f.close()
else:
f = open(os.path.join(searchDir, contents), 'r')
text = f.readlines()
f.close()
#for line in text:
# break
for addressOfInterest in searchItems:
for line in text:
if addressOfInterest in line:
# if str(address_item) in line:
print contents
print line,
I was banging my head, because I thought I was printing a duplicate. Turns out the file I was given to test has duplication. I ended up removing os.walk due to the predictable nature of the file system, but #hpaulj did provide a correct solution. Much appreciated!

Checking duplicate files against a dictionary of filesizes and names

This is pretty simple code - i've just completed Charles Severances Python for Informatics course, so if possible pls help me to keep it simple.
I'm trying to find duplicate documents in folders.
What i'm having trouble with is printing out the original, and the duplicate so i can manually check the accuracy of what it found. Later i'll look at how to automate deleting duplicates, looking for other filetypes etc.
A similarly structured piece of code worked well for itunes, but here i'm putting originals into a dictionary, and it seems i'm not getting the info back out.
Pls keep it simple, so i can learn. I know i can copy code to do the job, but i'm more interested in learning where i've gone wrong.
cheers
jeff
import os
from os.path import join
import re
import hashlib
location = '/Users/jeff/desktop/typflashdrive'
doccount = 0
dupdoc = 0
d = dict()
for (dirname, dirs, files) in os.walk(location):
for x in files:
size = hashlib.md5(x).hexdigest()
item = os.path.join(dirname,x)
#print os.path.getsize(item), item
#size = os.path.getsize(item)
if item.endswith ('.doc'):
doccount = doccount + 1
if size not in d:
original = item
d[size] = original
else:
copy = item
for key in d: print key, d[size],'\n', size, copy,'\n','\n',
#print item,'\n', copy,'\n','\n',
dupdoc=dupdoc+1
print '.doc Files:', doccount,'.', 'You have', dupdoc, 'duplicate .doc files:',
Your biggest mistake is that you're taking the hash of the filenames instead of the file content.
I have corrected that and also cleaned up the rest of the code:
import os
import hashlib
location = '/Users/jeff/desktop/typflashdrive'
doc_count = 0
dup_doc_count = 0
hash_vs_file = {}
for (dirname, dirs, files) in os.walk(location):
for filename in files:
file_path = os.path.join(dirname, filename)
file_hash = hashlib.md5(open(file_path).read()).hexdigest()
if filename.endswith('.doc'):
doc_count = doc_count + 1
if file_hash not in hash_vs_file:
hash_vs_file[file_hash] = [file_path]
else:
dup_doc_count += 1
hash_vs_file[file_hash].append(file_path)
print 'doc_count = ', doc_count
print 'dup_doc_count = ', dup_doc_count
for file_hash in hash_vs_file:
print file_hash
for file_path in hash_vs_file[file_hash]:
print file_path
print "\n\n\n"

What's wrong with this for scraping the table and data needed?

I'm trying to scrape data for Miami Heat and their opponent from a table at http://www.scoresandodds.com/grid_20111225.html. The problem I have is that tables for NBA and NFL and other sports are all identicaly marked and all the data I get is from the NFL table. Another problem is that I would like to scrape data for the entire season and the number of different tables changes and the position of Miami changes in the table. This is the code I've been using for different tables till now;
So why is this not getting the job done? Thx for you patience; I'm a real begginer, and I've been trying to solve this problem for some days now, to no effect.
def tableSnO(htmlSnO):
gameSections = soup.findAll('div', 'gameSection')
for gameSection in gameSections:
header = gameSection.find('div', 'header')
if header.get('id') == 'nba':
rows = gameSections.findAll('tr')
def parse_string(el):
text = ''.join(el.findAll(text=True))
return text.strip()
for row in rows:
data = map(parse_string, row.findAll('td'))
return data
Lately I decided to try a different approach; if I scrape the entire page and get the index of the data in question (this is where it stops:) I could just get the next set of data from the list, since that structure of the table never changes. I could also get the opponent's team name the same way I get the htmlSnO . It feels like this is such basic stuff and it's killing me that I can't get it right.
def tableSnO(htmlSnO):
oddslist = soupSnO.find('table', {"width" : "100%", "cellspacing" : "0", "cellpadding" : "0"})
rows = oddslist.findAll('tr',)
def parse_string(el):
text = ''.join(el.findAll(text=True))
return text.strip()
for row in rows:
data = map(parse_string, row.findAll('td'))
for teamName in data:
if re.match("(.*)MIAMI HEAT(.*)", teamName):
return teamName
return data.index(teamName)
New and final answer with working code:
The section of the page you want has this:
<div class="gameSection">
<div class="header" id="nba">
This should let you get at the NBA tables:
def tableSnO(htmlSnO):
gameSections = soup.findAll('div', 'gameSection')
for gameSection in gameSections:
header = gameSection.find('div', 'header')
if header.get('id') == 'nba':
# process this gameSection
print gameSection.prettify()
As a complete example, here's the full code I used to test:
import sys
import urllib2
from bs4 import BeautifulSoup
f = urllib2.urlopen('http://www.scoresandodds.com/grid_20111225.html')
html = f.read()
soup = BeautifulSoup(html)
gameSections = soup.findAll('div', 'gameSection')
for gameSection in gameSections:
header = gameSection.find('div', 'header')
if header.get('id') == 'nba':
table = gameSection.find('table', 'data')
print table.prettify()
This prints the NBA data table.