I created a customized Spacy model using https://github.com/explosion/spaCy/blob/master/examples/training/train_ner.py and when I am loading this model. It shows an error - look-behind requires fixed-width pattern. I am confused about how to solve this issue. Please help me. Any help will be appreciated. Thanks in advance.
output_dir = 'NLP_entity/model'
print("Loading from", output_dir)
nlp2 = spacy.load("NLP_entity/model")
test_text = "Remove from account"
#print()
doc2 = nlp1(test_text)
print(test_text)
#print()
if doc2.ents:
for ent in doc2.ents:
print("entity = {}, text = {}".format(ent.label_, ent.text))
else:
print("Entities in None")
Error:
('Loading from', 'NLP_entity/model')
errorTraceback (most recent call last)
<ipython-input-1-94981b2ca322> in <module>()
2 output_dir = 'NLP_entity/model'
3 print("Loading from", output_dir)
----> 4 nlp2 = spacy.load("NLP_entity/model")
5 test_text = "Remove from account".decode("utf-8")
6 #print()
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/site-packages/spacy/__init__.pyc in load(name, **overrides)
25 if depr_path not in (True, False, None):
26 deprecation_warning(Warnings.W001.format(path=depr_path))
---> 27 return util.load_model(name, **overrides)
28
29
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/site-packages/spacy/util.pyc in load_model(name, **overrides)
131 return load_model_from_package(name, **overrides)
132 if Path(name).exists(): # path to model data directory
--> 133 return load_model_from_path(Path(name), **overrides)
134 elif hasattr(name, "exists"): # Path or Path-like to model data
135 return load_model_from_path(name, **overrides)
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/site-packages/spacy/util.pyc in load_model_from_path(model_path, meta, **overrides)
171 component = nlp.create_pipe(name, config=config)
172 nlp.add_pipe(component, name=name)
--> 173 return nlp.from_disk(model_path)
174
175
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/site-packages/spacy/language.pyc in from_disk(self, path, exclude, disable)
784 # Convert to list here in case exclude is (default) tuple
785 exclude = list(exclude) + ["vocab"]
--> 786 util.from_disk(path, deserializers, exclude)
787 self._path = path
788 return self
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/site-packages/spacy/util.pyc in from_disk(path, readers, exclude)
609 # Split to support file names like meta.json
610 if key.split(".")[0] not in exclude:
--> 611 reader(path / key)
612 return path
613
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/site-packages/spacy/language.pyc in <lambda>(p)
774 deserializers["meta.json"] = lambda p: self.meta.update(srsly.read_json(p))
775 deserializers["vocab"] = lambda p: self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self)
--> 776 deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(p, exclude=["vocab"])
777 for name, proc in self.pipeline:
778 if name in exclude:
tokenizer.pyx in spacy.tokenizer.Tokenizer.from_disk()
tokenizer.pyx in spacy.tokenizer.Tokenizer.from_bytes()
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/re.pyc in compile(pattern, flags)
192 def compile(pattern, flags=0):
193 "Compile a regular expression pattern, returning a pattern object."
--> 194 return _compile(pattern, flags)
195
196 def purge():
/home/ubuntu/anaconda3/envs/python2/lib/python2.7/re.pyc in _compile(*key)
249 p = sre_compile.compile(pattern, flags)
250 except error, v:
--> 251 raise error, v # invalid expression
252 if not bypass_cache:
253 if len(_cache) >= _MAXCACHE:
error: look-behind requires fixed-width pattern
Related
I dont know what happened with my db but now I can not len my queryset.
I can make a qs with a lot of obj with qs.SignalSma.objects.all()
But somehow I can not use len(qs) on that qs or make a loop with that qs
I am getting in this error if I try to do so.
In [9]: len(qs)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[9], line 1
----> 1 len(qs)
File ~\OneDrive\Desktop\dev-2023\signal\lib\site-packages\django\db\models\query.py:262, in QuerySet.__len__(self)
261 def __len__(self):
--> 262 self._fetch_all()
263 return len(self._result_cache)
File ~\OneDrive\Desktop\dev-2023\signal\lib\site-packages\django\db\models\query.py:1324, in QuerySet._fetch_all(self)
1322 def _fetch_all(self):
1323 if self._result_cache is None:
-> 1324 self._result_cache = list(self._iterable_class(self))
1325 if self._prefetch_related_lookups and not self._prefetch_done:
1326 self._prefetch_related_objects()
File ~\OneDrive\Desktop\dev-2023\signal\lib\site-packages\django\db\models\query.py:68, in ModelIterable.__iter__(self)
59 related_populators = get_related_populators(klass_info, select, db)
60 known_related_objects = [
61 (field, related_objs, operator.attrgetter(*[
62 field.attname
(...)
66 ])) for field, related_objs in queryset._known_related_objects.items()
67 ]
---> 68 for row in compiler.results_iter(results):
69 obj = model_cls.from_db(db, init_list, row[model_fields_start:model_fields_end])
70 for rel_populator in related_populators:
File ~\OneDrive\Desktop\dev-2023\signal\lib\site-packages\django\db\models\sql\compiler.py:1122, in SQLCompiler.apply_converters(self, rows, converters)
1120 value = row[pos]
1121 for converter in convs:
-> 1122 value = converter(value, expression, connection)
1123 row[pos] = value
1124 yield row
File ~\OneDrive\Desktop\dev-2023\signal\lib\site-packages\django\db\backends\sqlite3\operations.py:313, in DatabaseOperations.get_decimalfield_converter.<locals>.converter(value, expression, connection)
311 def converter(value, expression, connection):
312 if value is not None:
--> 313 return create_decimal(value).quantize(quantize_value, context=expression.output_field.context)
TypeError: argument must be int or float
Any idea what is happening?? and how can I fix this?
QuerySet objects have it's own counting method. Use it:
qs = SignalSma.objects.all()
qs.count() # returns number of objects inside the queryset
can anyone please help me move forward in my modeling, I have no idea where is that .lower attribute I have called upon and how to fix it.. appreciate any help
HERE IS THE ONLY PART WHERE I APPLIED .LOWER
wordnet_lemmatizer = WordNetLemmatizer()wordnet_lemmatizer = WordNetLemmatizer()
def create_tokens(df2):
df2['low'] = df2['Movie'].str.lower()
df2['stopwords_out'] = df2['low'].apply(lambda x: " ".join([word for word in x.split()if word not in stops]))
df2['tokenized'] = df2.apply(lambda row: nltk.word_tokenize(row['stopwords_out']), axis=1)
df2['eng_only'] = df2['tokenized'].apply(lambda x: [word for word in x if word.isalpha()])
df2['lemmatized'] = df2['eng_only'].apply(lambda x: [wordnet_lemmatizer.lemmatize(word) for word in x])
HERE IS WHEN I HAVE CHANGED MY LEMMATIZED COLUMN TO LIST
a = df2.lemmatized.to_list()
b = (list(itertools.chain.from_iterable(a)))
bow = Counter (b)
HERE IS WHEN I TRY TO CREATE TF IDF AND WHERE THE ERROR APPEARS
cv = CountVectorizer(min_df=0, max_df=1)
tf = cv.fit_transform(df2.lemmatized)
THE ERROR
AttributeError Traceback (most recent call last)
C:\AppData\Local\Temp/ipykernel_24552/1530549768.py in
2
3 cv = CountVectorizer(min_df=0, max_df=1)
----> 4 tf = cv.fit_transform(df2.lemmatized)
5
6 print(df2.lemmatized)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1200 max_features = self.max_features
1201
-> 1202 vocabulary, X = self.count_vocab(raw_documents,
1203 self.fixed_vocabulary)
1204
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
1112 for doc in raw_documents:
1113 feature_counter = {}
-> 1114 for feature in analyze(doc):
1115 try:
1116 feature_idx = vocabulary[feature]
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
102 else:
103 if preprocessor is not None:
--> 104 doc = preprocessor(doc)
105 if tokenizer is not None:
106 doc = tokenizer(doc)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _preprocess(doc, accent_function, lower)
67 """
68 if lower:
---> 69 doc = doc.lower()
70 if accent_function is not None:
71 doc = accent_function(doc)
AttributeError: 'list' object has no attribute 'lower'
print(df2.lemmatized)
I'm using:
Django 2.2.24,
django-firebird 2.2a1,
fdb 2.02,
my model:
class MyModel(models.Model):
...
total = models.DecimalField(max_digits=10, decimal_places=2, null=True)
...
When i run a simple query:
ml = MyModel.objects.values('id', 'total').last()
I got this error:
AttributeError: 'float' object has no attribute 'quantize'
I guess the issue is with converters from firebird/operations.py
Full traceback:
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/django/db/models/query.py in last(self)
656 def last(self):
657 """Return the last object of a query or None if no match is found."""
--> 658 for obj in (self.reverse() if self.ordered else self.order_by('-pk'))[:1]:
659 return obj
660
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/django/db/models/query.py in __iter__(self)
272 - Responsible for turning the rows into model objects.
273 """
--> 274 self._fetch_all()
275 return iter(self._result_cache)
276
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/django/db/models/query.py in _fetch_all(self)
1240 def _fetch_all(self):
1241 if self._result_cache is None:
-> 1242 self._result_cache = list(self._iterable_class(self))
1243 if self._prefetch_related_lookups and not self._prefetch_done:
1244 self._prefetch_related_objects()
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/django/db/models/query.py in __iter__(self)
111 ]
112 indexes = range(len(names))
--> 113 for row in compiler.results_iter(chunked_fetch=self.chunked_fetch, chunk_size=self.chunk_size):
114 yield {names[i]: row[i] for i in indexes}
115
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/django/db/models/sql/compiler.py in apply_converters(self, rows, converters)
1084 value = row[pos]
1085 for converter in convs:
-> 1086 value = converter(value, expression, connection)
1087 row[pos] = value
1088 yield row
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/firebird/operations.py in convert_decimalfield_value(self, value, expression, connection, context)
304 field = expression.field
305
--> 306 val = utils.format_number(value, field.max_digits, field.decimal_places)
307 value = decimal.Decimal.from_float(float(val))
308 return value
~/.pyenv/versions/crm-nov21/lib/python3.7/site-packages/django/db/backends/utils.py in format_number(value, max_digits, decimal_places)
236 context.prec = max_digits
237 if decimal_places is not None:
--> 238 value = value.quantize(decimal.Decimal(1).scaleb(-decimal_places), context=context)
239 else:
240 context.traps[decimal.Rounded] = 1
AttributeError: 'float' object has no attribute 'quantize'
could someone help me please ?
I'm getting an error (below) when I try to load a couple of datasets, using:
import seaborn as sns
exercise = sns.load_dataset("exercise")
and
import seaborn as sns
titanic = sns.load_dataset("titanic")
It's weird, though, because iris = sns.load_dataset("iris") works great; what is causing the CParserError?
---------------------------------------------------------------------------
CParserError Traceback (most recent call last)
<ipython-input-4-6b85a4d6ff71> in <module>()
----> 1 exercise = sns.load_dataset("exercise")
2 #iris = sns.load_dataset("iris")
c:\python27\lib\site-packages\seaborn\utils.pyc in load_dataset(name, cache, data_home, **kws)
425 full_path = cache_path
426
--> 427 df = pd.read_csv(full_path, **kws)
428 if df.iloc[-1].isnull().all():
429 df = df.iloc[:-1]
c:\python27\lib\site-packages\pandas\io\parsers.pyc in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
560 skip_blank_lines=skip_blank_lines)
561
--> 562 return _read(filepath_or_buffer, kwds)
563
564 parser_f.__name__ = name
c:\python27\lib\site-packages\pandas\io\parsers.pyc in _read(filepath_or_buffer, kwds)
323 return parser
324
--> 325 return parser.read()
326
327 _parser_defaults = {
c:\python27\lib\site-packages\pandas\io\parsers.pyc in read(self, nrows)
813 raise ValueError('skip_footer not supported for iteration')
814
--> 815 ret = self._engine.read(nrows)
816
817 if self.options.get('as_recarray'):
c:\python27\lib\site-packages\pandas\io\parsers.pyc in read(self, nrows)
1312 def read(self, nrows=None):
1313 try:
-> 1314 data = self._reader.read(nrows)
1315 except StopIteration:
1316 if self._first_chunk:
pandas\parser.pyx in pandas.parser.TextReader.read (pandas\parser.c:8620)()
pandas\parser.pyx in pandas.parser.TextReader._read_low_memory (pandas\parser.c:8876)()
pandas\parser.pyx in pandas.parser.TextReader._read_rows (pandas\parser.c:9602)()
pandas\parser.pyx in pandas.parser.TextReader._tokenize_rows (pandas\parser.c:9470)()
pandas\parser.pyx in pandas.parser.raise_parser_error (pandas\parser.c:23295)()
CParserError: Error tokenizing data. C error: Expected 1 fields in line 24, saw 2
I am trying to write to a *.xlsx file with the openpyxl module. I have downloaded it using pip install on Ubuntu 14.04.
I am loading in a *.xlsx file that is a Bill Of Materials template I use that was previously made in Excel and I can open it just fine in Libre Office and Kingsoft Office. My intention is to load it and fill in some cells with some strings.
My function looks like this:
def writeBOM(parts, projectname):
'''
Take the ordered and grouped part info and
write it to a standard BOM and save it
'''
StandardBOMFILE = '/home/jesse/Digi-Parser/SampleFiles/StandardBOM.xlsx'
wb = load_workbook(filename=StandardBOMFILE)
sheet = wb.get_sheet_by_name('BOM')
r = 8
# Fill BOM
for i, part in enumerate(parts):
sheet.cell(row = r+i,column = 1).value = part.designator
sheet.cell(row = r+i,column = 2).value = part.evalue + ' ' + part.package
sheet.cell(row = r+i, column = 3).value = part.qty
projectBOMname = projectname + 'BOM' + '.xlsx'
wb.save(projectBOMname)
The values that I am putting into the cells are just strings.
However when I run this I get the following Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/lib/python2.7/dist-packages/IPython/utils/py3compat.pyc in execfile(fname, *where)
202 else:
203 filename = fname
--> 204 __builtin__.execfile(filename, *where)
/home/jesse/Digi-Parser/SheetOrganizer.py in <module>()
232 prjpath = '/home/jesse/Digi-Parser/SampleFiles/'
233 prjname = 'Water Use Monitor'
--> 234 things = csvToBOM(prjpath, prjname)
/home/jesse/Digi-Parser/SheetOrganizer.py in csvToBOM(projectpath, projectname)
223 orderedBody = combineSameComponents(reorderParts(body))
224
--> 225 writeBOM(orderedBody, projectname)
226
227
/home/jesse/Digi-Parser/SheetOrganizer.py in writeBOM(parts, projectname)
192 sheet.cell(row = r+i, column = 3).value = part.qty
193 projectBOMname = projectname + 'BOM' + '.xlsx'
--> 194 wb.save(projectBOMname)
195
196
/usr/local/lib/python2.7/dist-packages/openpyxl/workbook/workbook.pyc in save(self, filename)
265 save_dump(self, filename)
266 else:
--> 267 save_workbook(self, filename)
/usr/local/lib/python2.7/dist-packages/openpyxl/writer/excel.pyc in save_workbook(workbook, filename)
183 """
184 writer = ExcelWriter(workbook)
--> 185 writer.save(filename)
186 return True
187
/usr/local/lib/python2.7/dist-packages/openpyxl/writer/excel.pyc in save(self, filename)
166 """Write data into the archive."""
167 archive = ZipFile(filename, 'w', ZIP_DEFLATED)
--> 168 self.write_data(archive)
169 archive.close()
170
/usr/local/lib/python2.7/dist-packages/openpyxl/writer/excel.pyc in write_data(self, archive)
78 archive.writestr(ARC_WORKBOOK_RELS, write_workbook_rels(self.workbook))
79 archive.writestr(ARC_APP, write_properties_app(self.workbook))
---> 80 archive.writestr(ARC_CORE, write_properties_core(self.workbook.properties))
81 if self.workbook.loaded_theme:
82 archive.writestr(ARC_THEME, self.workbook.loaded_theme)
/usr/local/lib/python2.7/dist-packages/openpyxl/writer/workbook.pyc in write_properties_core(properties)
65 SubElement(root, '{%s}created' % DCTERMS_NS,
66 {'{%s}type' % XSI_NS: '%s:W3CDTF' % DCTERMS_PREFIX}).text = \
---> 67 datetime_to_W3CDTF(properties.created)
68 SubElement(root, '{%s}modified' % DCTERMS_NS,
69 {'{%s}type' % XSI_NS: '%s:W3CDTF' % DCTERMS_PREFIX}).text = \
/usr/local/lib/python2.7/dist-packages/openpyxl/date_time.pyc in datetime_to_W3CDTF(dt)
54 def datetime_to_W3CDTF(dt):
55 """Convert from a datetime to a timestamp string."""
---> 56 return datetime.datetime.strftime(dt, W3CDTF_FORMAT)
57
58
ValueError: year=1899 is before 1900; the datetime strftime() methods require year >= 1900
I can not figure out how to fix this. I don't need to have Excel installed on my computer do I? It seems the issue is in the date_time.py file in the openpyxl package and that the variable 'dt' is set to 1899 for some reason.
Thank you for any help.
As discussed on the mailing list this looks like a bug in the Python datetime library.
https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior
There is not a problem with Python 3.3 or 3.4