I am new to Python and was trying the Pandas library. Here is the code to read a CSV file without headers:
import pandas as pnd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
pnd.set_option('max_columns', 50)
mpl.rcParams['lines.linewidth'] = 2
headers = ['OrderId', 'OrderDate', 'UserId', 'TotalCharges']
dtypes = {'OrderId': 'int', 'OrderDate': 'str', 'UserId': 'int', 'TotalCharges':'float'}
parse_dates = ['OrderDate']
df = pnd.read_csv('Raw_flight_data.csv', sep='\t', header=None,
names=headers,converters=dtypes,parse_dates=parse_dates)
This code gives me an error :-
runfile('C:/Users/rohan.arora/Desktop/Python/example.py', wdir='C:/Users/rohan.arora/Desktop/Python')
Traceback (most recent call last):
File "<ipython-input-47-43fc22883149>", line 1, in <module>
runfile('C:/Users/rohan.arora/Desktop/Python/example.py', wdir='C:/Users/rohan.arora/Desktop/Python')
File "C:\Users\rohan.arora\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\Users\rohan.arora\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/rohan.arora/Desktop/Python/example.py", line 13, in <module>
names=headers,converters=dtypes,parse_dates=parse_dates)
File "C:\Users\rohan.arora\Anaconda2\lib\site-packages\pandas\io\parsers.py", line 646, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\rohan.arora\Anaconda2\lib\site-packages\pandas\io\parsers.py", line 401, in _read
data = parser.read()
File "C:\Users\rohan.arora\Anaconda2\lib\site-packages\pandas\io\parsers.py", line 939, in read
ret = self._engine.read(nrows)
File "C:\Users\rohan.arora\Anaconda2\lib\site-packages\pandas\io\parsers.py", line 1508, in read
data = self._reader.read(nrows)
File "pandas\parser.pyx", line 848, in pandas.parser.TextReader.read (pandas\parser.c:10415)
File "pandas\parser.pyx", line 870, in pandas.parser.TextReader._read_low_memory (pandas\parser.c:10691)
File "pandas\parser.pyx", line 947, in pandas.parser.TextReader._read_rows (pandas\parser.c:11728)
File "pandas\parser.pyx", line 1044, in pandas.parser.TextReader._convert_column_data (pandas\parser.c:13129)
File "pandas\parser.pyx", line 2115, in pandas.parser._apply_converter (pandas\parser.c:28771)
TypeError: 'str' object is not callable
I am using Anaconda Spyder 3.1.2 and running Python 2.7.13.
I think you need remove ' for types, not string representation of types:
dtypes = {'OrderId': 'int', 'OrderDate': 'str', 'UserId': 'int', 'TotalCharges':'float'}
to:
dtypes = {'OrderId': int, 'OrderDate': str, 'UserId': int, 'TotalCharges': float}
Related
I try to run pytest with allure on my docker image, but it reports an "invalid syntax" error. Is there any python version requirement for allure? The python version on my docker image is 2.7.13.
Can anyone help me?
root#ubuntu:/fuego-rw/buildzone# pytest allure_title.py
Traceback (most recent call last):
File "/usr/local/bin/pytest", line 11, in <module>
sys.exit(main())
File "/usr/local/lib/python2.7/dist-packages/_pytest/config/__init__.py", line 65, in main
config = _prepareconfig(args, plugins)
File "/usr/local/lib/python2.7/dist-packages/_pytest/config/__init__.py", line 214, in _prepareconfig
pluginmanager=pluginmanager, args=args
File "/usr/local/lib/python2.7/dist-packages/pluggy/hooks.py", line 286, in __call__
return self._hookexec(self, self.get_hookimpls(), kwargs)
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 93, in _hookexec
return self._inner_hookexec(hook, methods, kwargs)
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 87, in <lambda>
firstresult=hook.spec.opts.get("firstresult") if hook.spec else False,
File "/usr/local/lib/python2.7/dist-packages/pluggy/callers.py", line 203, in _multicall
gen.send(outcome)
File "/usr/local/lib/python2.7/dist-packages/_pytest/helpconfig.py", line 94, in pytest_cmdline_parse
config = outcome.get_result()
File "/usr/local/lib/python2.7/dist-packages/pluggy/callers.py", line 81, in get_result
_reraise(*ex) # noqa
File "/usr/local/lib/python2.7/dist-packages/pluggy/callers.py", line 187, in _multicall
res = hook_impl.function(*args)
File "/usr/local/lib/python2.7/dist-packages/_pytest/config/__init__.py", line 789, in pytest_cmdline_parse
self.parse(args)
File "/usr/local/lib/python2.7/dist-packages/_pytest/config/__init__.py", line 997, in parse
self._preparse(args, addopts=addopts)
File "/usr/local/lib/python2.7/dist-packages/_pytest/config/__init__.py", line 943, in _preparse
self.pluginmanager.load_setuptools_entrypoints("pytest11")
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 299, in load_setuptools_entrypoints
plugin = ep.load()
File "/usr/local/lib/python2.7/dist-packages/importlib_metadata/__init__.py", line 105, in load
module = import_module(match.group('module'))
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
File "/usr/local/lib/python2.7/dist-packages/_pytest/assertion/rewrite.py", line 304, in load_module
exec(co, mod.__dict__)
File "/usr/local/lib/python2.7/dist-packages/allure_pytest/plugin.py", line 3, in <module>
import allure
File "/usr/local/lib/python2.7/dist-packages/allure.py", line 1, in <module>
from allure_commons._allure import title
File "/usr/local/lib/python2.7/dist-packages/allure_commons/__init__.py", line 3, in <module>
from allure_commons._allure import fixture # noqa: F401
File "/usr/local/lib/python2.7/dist-packages/allure_commons/_allure.py", line 165
def __call__(self, func: _TFunc) -> _TFunc:
^
SyntaxError: invalid syntax
This syntax:
func: _TFunc
is called a type hint, meaning the variable func is expected to be of type _TFunc (in simpler terms, num: int means the variable num is expected to be int).
The type hint feature was only made available from Python3.0 (PEP 3107) and Python3.5 (PEP 484), thus isn't available in the version you are using which is Python2.7.
def func(num: int):
print(num)
func(1)
Using Python2
File "Main.py", line 1
def func(num: int):
^
SyntaxError: invalid syntax
Using Python3
1
Either upgrade your Python version to >=3.5 or use an older version of allure-pytest. I would advise to upgrade Python as that would be more sustainable.
I used two csv files create a dataframe:
import matplotlib.pyplot as plt
import pandas as pd
#import data
df1= pd.read_csv("C:\Users\Meiji\Desktop\CNST 6308-python\Hourly_TTI.csv")
df2= pd.read_csv("C:\Users\Meiji\Desktop\CNST 6308-python\Weather.csv")
#standardize date format
df1['new_date']= pd.to_datetime(df1['Date'])
df2['new_date']= pd.to_datetime(df2['EST'])
#merge TTI and weather dataframe
df=pd.merge(df1,df2,on=['new_date'])
#plot
df[df["Events"]=='Rain-Hail-Thunderstorm'].groupby('Hour').mean()['TTI'].plot()
df[df["Events"]!='Rain-Hail-Thunderstorm'].groupby('Hour').mean()['TTI'].plot()
plt.ylabel('TTI')
plt.legend(['Rain-Hail-Thunderstorm','Ohters'])
plot.show()
Here is the error I'm getting:
Process started (PID=27504) >>>
Traceback (most recent call last):
File "C:\Users\Meiji\Desktop\CNST 6308-python\HW4\new 2.py", line 12, in <module>
df[df["Events"]=='Rain-Hail-Thunderstorm'].groupby('Hour').mean()['TTI'].plot()
File "c:\python27\lib\site-packages\pandas\core\frame.py", line 2927, in __getitem__
indexer = self.columns.get_loc(key)
File "c:\python27\lib\site-packages\pandas\core\indexes\base.py", line 2659, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Events'
<<< Process finished (PID=27504). (Exit code 1)
what am i missing? My friend has a same code, but he run perfectly.
I am trying to use TfIdfVectorizer of sklearn. I am having trouble because my input is probably not matching TfIdfVectorizer needs. I have a bunch of JSONs I loaded and appended into a list, and I now want that to be the corpus for TfIdfVectorizer use.
The code:
import json
import pandas
from sklearn.feature_extraction.text import TfidfVectorizer
train=pandas.read_csv("train.tsv", sep='\t')
documents=[]
for i,row in train.iterrows():
data = json.loads(row['boilerplate'].lower())
documents.append(data['body'])
vectorizer=TfidfVectorizer(min_df=1)
X = vectorizer.fit_transform(documents)
idf = vectorizer.idf_
print dict(zip(vectorizer.get_feature_names(), idf))
I am getting the following error:
Traceback (most recent call last):
File "<ipython-input-56-94a6b95b0745>", line 1, in <module>
runfile('C:/Users/Guinea Pig/Downloads/try.py', wdir='C:/Users/Guinea Pig/Downloads')
File "D:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 585, in runfile
execfile(filename, namespace)
File "C:/Users/Guinea Pig/Downloads/try.py", line 19, in <module>
X = vectorizer.fit_transform(documents)
File "D:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.py", line 1219, in fit_transform
X = super(TfidfVectorizer, self).fit_transform(raw_documents)
File "D:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.py", line 780, in fit_transform
vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary)
File "D:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.py", line 715, in _count_vocab
for feature in analyze(doc):
File "D:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.py", line 229, in <lambda>
tokenize(preprocess(self.decode(doc))), stop_words)
File "D:\Anaconda\lib\site-packages\sklearn\feature_extraction\text.py", line 195, in <lambda>
return lambda x: strip_accents(x.lower())
AttributeError: 'NoneType' object has no attribute 'lower'
I am getting that the documents array consists of Unicode objects, and not string objects, but I can't seem to solve this issue. ant ideas?
Eventually I used:
str_docs=[]
for item in documents:
str_docs.append(documents[i].encode('utf-8'))
As an addition
I read a csv file in pandas by specifying the name directly. It works without trouble. But when I try to do the same with sys.argv[] it throws an error. This is what I tried.
import sys
import pandas as pd
filename = sys.argv[-1]
data = pd.read_csv(filename, delimiter=',')
print data
print data[data['logFC'] >= 2]
The error it throws back is:
Traceback (most recent call last): File "/Users/filter_cutoff_genexdata.py", line 7, in <module>
data = pd.read_csv(filename, delimiter=',') File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 498, in parser_f
return _read(filepath_or_buffer, kwds) File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 285, in
_read
return parser.read() File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 747, in read
ret = self._engine.read(nrows) File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 1197, in read
data = self._reader.read(nrows) File "pandas/parser.pyx", line 766, in pandas.parser.TextReader.read (pandas/parser.c:7988) File "pandas/parser.pyx", line 788, in pandas.parser.TextReader._read_low_memory (pandas/parser.c:8244) File "pandas/parser.pyx", line 842, in pandas.parser.TextReader._read_rows (pandas/parser.c:8970) File "pandas/parser.pyx", line 829, in pandas.parser.TextReader._tokenize_rows (pandas/parser.c:8838) File "pandas/parser.pyx", line 1833, in pandas.parser.raise_parser_error (pandas/parser.c:22649) pandas.parser.CParserError: Error tokenizing data. C error: Expected 1 fields in line 7, saw 3
Can anyone please tell why it doesn't work ?
def voigt_PD(x,y0,xc,A,wG,wL):
def integconvo(t,x1,xc1,wG1,wL1):
return exp(-t**2)/((sqrt(log(2))*wL1/wG1)**2+ ((sqrt(4*log(2))*(x1- xc1)/wG1)-t)**2)
return y0+(A*(2*log(2)/pi**1.5)*(wL/wG**2)* quad(integconvo,-inf,inf,args=(x,xc,wG,wL)))
I am defining this voigt model function to fit a curve but I am getting an error stating that "Supplied function does not return a valid float". The following is the fitting routine and followed by the error. Can anyone help to find the mistake ? Thanks in advance
import pylab
from lmfit import Model
from readdatafile import readdatafile
from voigt_PD import voigt_PD
X,Y = readdatafile('data.dat')
gmod = Model(voigt_PD)
result = gmod.fit(Y, x=X,y0=0,xc=0,A=0.1,wG=0.5,wL=0.5)
print(result.fit_report())
pylab.plot(X, Y, 'bo')
pylab.plot(X, result.best_fit, 'r-')
pylab.show()
Then it gives
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Python27\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 685, in runfile
execfile(filename, namespace)
File "C:\Python27\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "E:/programming/python scripts/test5.py", line 29, in <module>
result = gmod.fit(Y, x=X,y0=0,xc=0,A=0.1,wG=0.5,wL=0.5)
File "build\bdist.win32\egg\lmfit\model.py", line 542, in fit
File "build\bdist.win32\egg\lmfit\model.py", line 746, in fit
File "build\bdist.win32\egg\lmfit\model.py", line 408, in eval
File "voigt_PD.py", line 13, in voigt_PD
return y0+(A*(2*log(2)/pi**1.5)*(wL/wG**2)* (quad(integconvo,-inf,inf,args=(x,xc,wG,wL)))[0])
File "C:\Python27\lib\site-packages\scipy\integrate\quadpack.py", line 311, in quad
points)
File "C:\Python27\lib\site-packages\scipy\integrate\quadpack.py", line 378, in _quad
return _quadpack._qagie(func,bound,infbounds,args,full_output,epsabs,epsrel,limit)
quadpack.error: Supplied function does not return a valid float.