Python: extract all placeholders from format string - regex

I have to 'parse' a format string in order to extract the variables.
E.g.
>>> s = "%(code)s - %(description)s"
>>> get_vars(s)
'code', 'description'
I managed to do that by using regular expressions:
re.findall(r"%\((\w+)\)", s)
but I wonder whether there are built-in solutions (actually Python do parse the string in order to evaluate it!).

This seems to work great:
def get_vars(s):
d = {}
while True:
try:
s % d
except KeyError as exc:
# exc.args[0] contains the name of the key that was not found;
# 0 is used because it appears to work with all types of placeholders.
d[exc.args[0]] = 0
else:
break
return d.keys()
gives you:
>>> get_vars('%(code)s - %(description)s - %(age)d - %(weight)f')
['age', 'code', 'description', 'weight']

Related

Django postgres Json field with numeric keys

I have model with postgres json field.
class MyModel(models.Model):
data = JSONField(null=True)
then, I do:
m1 = MyModel.objects.create(data={'10':'2017-12-1'})
m2 = MyModel.objects.create(data={'10':'2018-5-1'})
I want query all the MyModel whose key '10' starts with '2017', so I want to write:
MyModel.objects.filter(data__10__startswith='2017')
The problem is that the 10 is interpreted as integer, and therefore, in the generated query it is considered as list index and not key.
Is there anyway to solve this? (except writing raw queries).
This is the generated query:
SELECT "systools_mymodel"."id", "systools_mymodel"."data" FROM "systools_mymodel" WHERE ("systools_mymodel"."data" ->> 10)::text LIKE '2017%' LIMIT 21;
And I want the 10 to be quoted (which would give me the right answer).
Thanks!
A very hackish solution (use on your own risk, tested under Django 2.0.5, voids warranty...):
# patch_jsonb.py
from django.contrib.postgres.fields.jsonb import KeyTransform
def as_sql(self, compiler, connection):
key_transforms = [self.key_name]
previous = self.lhs
while isinstance(previous, KeyTransform):
key_transforms.insert(0, previous.key_name)
previous = previous.lhs
lhs, params = compiler.compile(previous)
if len(key_transforms) > 1:
return "(%s %s %%s)" % (lhs, self.nested_operator), [
key_transforms] + params
try:
int(self.key_name)
except ValueError:
if self.key_name.startswith("K") and self.key_name[1:].isnumeric():
lookup = "'%s'" % self.key_name[1:]
else:
lookup = "'%s'" % self.key_name
else:
lookup = "%s" % self.key_name
return "(%s %s %s)" % (lhs, self.operator, lookup), params
def patch():
KeyTransform.as_sql = as_sql
Usage:
Add this to the bottom of your settings.py:
import patch_jsonb
patch_jsonb.patch()
Instead of __123__ lookups use __K123__ lookups - the uppercase K will be stripped by this patch:
MyModel.objects.filter(data__K10__startswith='2017')
And consider avoiding using numbers as jsonb object keys...

part of a string contained in another string regex python

Is there a way to check if any part of a string matches with another string in python?
For e.g.: I have URLs which look like this
url = pd.DataFrame({'urls' : ['www.amazon.com/ANASTASIA-Beverly...Brow/dp/B00GI21NZA', 'www.ulta.com/beautyservices/benefitbrowbar/']})
and I have strings which look like:
string_list = ['Benefit Cosmetics', 'Anastasia Beverly Hills']
string = '|'.join(string_list)
I would like to match string with url.
Anastasia Beverly Hills with www.amazon.com/ANASTASIA-Beverly...Brow/dp/B00GI21NZA and
www.ulta.com/beautyservices/benefitbrowbar/ with Benefit Cosmetics.
I've been trying url['urls'].str.contains('('+string+')', case = False) but this does not match.
What;s the correct way to do this?
I can't do it as a regex in one line but here is my attempt using itertools and any:
import pandas as pd
from itertools import product
url = pd.DataFrame({'urls' : ['www.amazon.com/ANASTASIA-Beverly...Brow/dp/B00GI21NZA', 'www.ulta.com/beautyservices/benefitbrowbar/']})
string_list = ['Benefit Cosmetics', 'Anastasia Beverly Hills']
"""
For each of Cartesian product (the different combinations) of
string_list and urls.
"""
for x in list(product(string_list, url['urls'])):
"""
If any of the words in the string (x[0]) are present in
the URL (x[1]) disregarding case.
"""
if any (word.lower() in x[1].lower() for word in x[0].split()):
"""
Show the match.
"""
print ("Match String: %s URL: %s" % (x[0], x[1]))
Outputs:
Match String: Benefit Cosmetics URL: www.ulta.com/beautyservices/benefitbrowbar/
Match String: Anastasia Beverly Hills URL: www.amazon.com/ANASTASIA-Beverly...Brow/dp/B00GI21NZA
Updated:
The way you were looking at it you could alternatively use:
import pandas as pd
import warnings
pd.set_option('display.width', 100)
"""
Supress the warning it will give on a match.
"""
warnings.filterwarnings("ignore", 'This pattern has match groups')
string_list = ['Benefit Cosmetics', 'Anastasia Beverly Hills']
"""
Create a pandas DataFrame.
"""
url = pd.DataFrame({'urls' : ['www.amazon.com/ANASTASIA-Beverly...Brow/dp/B00GI21NZA', 'www.ulta.com/beautyservices/benefitbrowbar/']})
"""
Using one string at a time.
"""
for string in string_list:
"""
Get the individual words in the string and concatenate them
using a pipe to create a regex pattern.
"""
s = "|".join(string.split())
"""
Update the DataFrame with True or False where the regex
matches the URL.
"""
url[string] = url['urls'].str.contains('('+s+')', case = False)
"""
Show the result
"""
print (url)
which would output:
urls Benefit Cosmetics Anastasia Beverly Hills
0 www.amazon.com/ANASTASIA-Beverly...Brow/dp/B00... False True
1 www.ulta.com/beautyservices/benefitbrowbar/ True False
Which I guess, if you want it in a DataFrame, may be better but I prefer the first way.

How to remove unwanted items from a parse file

from googlefinance import getQuotes
import json
import time as t
import re
List = ["A","AA","AAB"]
Time=t.localtime() # Sets variable Time to retrieve date/time info
Date2= ('%d-%d-%d %dh:%dm:%dsec'%(Time[0],Time[1],Time[2],Time[3],Time[4],Time[5])) #formats time stamp
while True:
for i in List:
try: #allows elements to be called and if an error does the next step
Data = json.dumps(getQuotes(i.lower()),indent=1) #retrieves Data from google finance
regex = ('"LastTradePrice": "(.+?)",') #sets parse
pattern = re.compile(regex) #compiles parse
price = re.findall(pattern,Data) #retrieves parse
print(i)
print(price)
except: #sets Error coding
Error = (i + ' Failed to load on: ' + Date2)
print (Error)
It will display the quote as: ['(number)'].
I would like it to only display the number, which means removing the brackets and quotes.
Any help would be great.
Changing:
print(price)
into:
print(price[0])
prints this:
A
42.14
AA
10.13
AAB
0.110
Try to use type() function to know the datatype, in your case type(price)
it the data type is list use print(price[0])
you will get the output (number), for brecess you need to check google data and regex.

Python Dictionary Fuzzy Match on keys

I have the following dictionary:
classes = {'MATH6371': 'Statistics 1', 'COMP7330': 'Database Management',
'MATH6471': 'Statistics 2','COMP7340': 'Creative Computation' }
And I am trying make a raw_input fuzzy match on the dictionary keys. For example, if I type in 'math', the output would be Statistics 1 and Statistics 2.
I have the following code, but it only matches keys exactly:
def print_courses (raw_input):
search = raw_input("Type a course ID here:")
if search in classes:
print classes.get(search)
else:
print "Sorry, that course doesn't exist, try again"
print_courses(raw_input)
Thanks
Here you go:
>>> search = 'math'
>>> result = [classes[key] for key in classes if search in key.lower()]
['Statistics 2', 'Statistics 1']

The queryset's `count` is wrong after `extra`

When I use extra in a certain way on a Django queryset (call it qs), the result of qs.count() is different than len(qs.all()). To reproduce:
Make an empty Django project and app, then add a trivial model:
class Baz(models.Model):
pass
Now make a few objects:
>>> Baz(id=1).save()
>>> Baz(id=2).save()
>>> Baz(id=3).save()
>>> Baz(id=4).save()
Using the extra method to select only some of them produces the expected count:
>>> Baz.objects.extra(where=['id > 2']).count()
2
>>> Baz.objects.extra(where=['-id < -2']).count()
2
But add a select clause to the extra and refer to it in the where clause, and the count is suddenly wrong, even though the result of all() is correct:
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).all()
[<Baz: Baz object>, <Baz: Baz object>] # As expected
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).count()
0 # Should be 2
I think the problem has to do with django.db.models.sql.query.BaseQuery.get_count(). It checks whether the BaseQuery's select or aggregate_select attributes have been set; if so, it uses a subquery. But django.db.models.sql.query.BaseQuery.add_extra adds only to the BaseQuery's extra attribute, not select or aggregate_select.
How can I fix the problem? I know I could just use len(qs.all()), but it would be nice to be able to pass the extra'ed queryset to other parts of the code, and those parts may call count() without knowing that it's broken.
Redefining get_count() and monkeypatching appears to fix the problem:
def get_count(self):
"""
Performs a COUNT() query using the current filter constraints.
"""
obj = self.clone()
if len(self.select) > 1 or self.aggregate_select or self.extra:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from django.db.models.sql.subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model, obj.connection)
obj.add_subquery(subquery)
obj.add_count_column()
number = obj.get_aggregation()[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
# output.
number = max(0, number - self.low_mark)
if self.high_mark is not None:
number = min(number, self.high_mark - self.low_mark)
return number
django.db.models.sql.query.BaseQuery.get_count = quuux.get_count
Testing:
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).count()
2
Updated to work with Django 1.2.1:
def basequery_get_count(self, using):
"""
Performs a COUNT() query using the current filter constraints.
"""
obj = self.clone()
if len(self.select) > 1 or self.aggregate_select or self.extra:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from django.db.models.sql.subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model)
obj.add_subquery(subquery, using=using)
obj.add_count_column()
number = obj.get_aggregation(using=using)[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
# output.
number = max(0, number - self.low_mark)
if self.high_mark is not None:
number = min(number, self.high_mark - self.low_mark)
return number
models.sql.query.Query.get_count = basequery_get_count
I'm not sure if this fix will have other unintended consequences, however.