can anyone please help me move forward in my modeling, I have no idea where is that .lower attribute I have called upon and how to fix it.. appreciate any help
HERE IS THE ONLY PART WHERE I APPLIED .LOWER
wordnet_lemmatizer = WordNetLemmatizer()wordnet_lemmatizer = WordNetLemmatizer()
def create_tokens(df2):
df2['low'] = df2['Movie'].str.lower()
df2['stopwords_out'] = df2['low'].apply(lambda x: " ".join([word for word in x.split()if word not in stops]))
df2['tokenized'] = df2.apply(lambda row: nltk.word_tokenize(row['stopwords_out']), axis=1)
df2['eng_only'] = df2['tokenized'].apply(lambda x: [word for word in x if word.isalpha()])
df2['lemmatized'] = df2['eng_only'].apply(lambda x: [wordnet_lemmatizer.lemmatize(word) for word in x])
HERE IS WHEN I HAVE CHANGED MY LEMMATIZED COLUMN TO LIST
a = df2.lemmatized.to_list()
b = (list(itertools.chain.from_iterable(a)))
bow = Counter (b)
HERE IS WHEN I TRY TO CREATE TF IDF AND WHERE THE ERROR APPEARS
cv = CountVectorizer(min_df=0, max_df=1)
tf = cv.fit_transform(df2.lemmatized)
THE ERROR
AttributeError Traceback (most recent call last)
C:\AppData\Local\Temp/ipykernel_24552/1530549768.py in
2
3 cv = CountVectorizer(min_df=0, max_df=1)
----> 4 tf = cv.fit_transform(df2.lemmatized)
5
6 print(df2.lemmatized)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1200 max_features = self.max_features
1201
-> 1202 vocabulary, X = self.count_vocab(raw_documents,
1203 self.fixed_vocabulary)
1204
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
1112 for doc in raw_documents:
1113 feature_counter = {}
-> 1114 for feature in analyze(doc):
1115 try:
1116 feature_idx = vocabulary[feature]
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
102 else:
103 if preprocessor is not None:
--> 104 doc = preprocessor(doc)
105 if tokenizer is not None:
106 doc = tokenizer(doc)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _preprocess(doc, accent_function, lower)
67 """
68 if lower:
---> 69 doc = doc.lower()
70 if accent_function is not None:
71 doc = accent_function(doc)
AttributeError: 'list' object has no attribute 'lower'
print(df2.lemmatized)
I'm a new Pyomo user, i try to convert an ampl model to pyomo and i found it's too hard to do this work.
When i create the var x with the set lev witch is indexed by a another set A, i get this error warning:
Cannot apply a Set operator to an indexed Set component (lev).
Thanks in advance.
Ampl model and pyomo code is shown below
Ampl
set T=1..7; #set of epochs
set A ordered; #set of appliances
set L; # set of energy consumption levels
set Lev {A} within L; #set of energy consumption levels of appliance A
var x{a in A, l in Lev[a], t in T}, binary; #1 if appliance a operates at consumption level l at epoch t
Pyomo
model=pyo.AbstractModel()
model.T = pyo.RangeSet(1,48)
model.A=pyo.Set(ordered=True)
model.L=pyo.Set()
model.lev=pyo.Set(model.A,within=model.L)
model.y=pyo.Var(model.A,model.T,domain=pyo.Binary)
model.x=pyo.Var(model.A,model.lev,model.T,domain=pyo.Binary)
error warning
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-74-f8fe2ec9e77d> in <module>()
10
11 model.y=pyo.Var(model.A,model.T,domain=pyo.Binary)
---> 12 model.x=pyo.Var(model.A,model.lev,model.T,domain=pyo.Binary)
C:\Users\HaichengLing\anaconda3\envs\nilmtk-env\lib\site-packages\pyomo\core\base\var.py in __init__(self, *args, **kwd)
522 #
523 kwd.setdefault('ctype', Var)
--> 524 IndexedComponent.__init__(self, *args, **kwd)
525 #
526 # Determine if the domain argument is a functor or other object
C:\Users\HaichengLing\anaconda3\envs\nilmtk-env\lib\site-packages\pyomo\core\base\indexed_component.py in __init__(self, *args, **kwds)
215 # "transferred" to the model).
216 #
--> 217 tmp = [process_setarg(x) for x in args]
218 self._implicit_subsets = tmp
219 self._index = tmp[0].cross(*tmp[1:])
C:\Users\HaichengLing\anaconda3\envs\nilmtk-env\lib\site-packages\pyomo\core\base\indexed_component.py in <listcomp>(.0)
215 # "transferred" to the model).
216 #
--> 217 tmp = [process_setarg(x) for x in args]
218 self._implicit_subsets = tmp
219 self._index = tmp[0].cross(*tmp[1:])
C:\Users\HaichengLing\anaconda3\envs\nilmtk-env\lib\site-packages\pyomo\core\base\set.py in process_setarg(arg)
118 raise TypeError("Cannot apply a Set operator to an "
119 "indexed %s component (%s)"
--> 120 % (arg.ctype.__name__, arg.name,))
121 elif isinstance(arg, Component):
122 raise TypeError("Cannot apply a Set operator to a non-Set "
TypeError: Cannot apply a Set operator to an indexed Set component (lev)
The easiest way to do this is to use an intermediate set:
model.A=pyo.Set(ordered=True)
model.L=pyo.Set()
model.lev=pyo.Set(model.A, within=model.L)
def AL_rule(m):
return [(a,l) for a in m.A for l in m.lev[a]]
model.AL = Set(within=model.A*model.L, initialize=AL_rule)
model.x=pyo.Var(model.AL, model.T, domain=pyo.Binary)
mean , variance = tf.nn.moments(X_train, axes = 1, keep_dims = True)
I am trying to get the mean and variance using tf.nn.moments() as shown above. However, I am encountering the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-43-fc383f99b15b> in <module>()
33 Y_train = Y_train.reshape(1,355)
34 X_mean = tf.reduce_mean(X_train, axis = 1, keepdims = True)
---> 35 mean , variance = tf.nn.moments(X_train, axes = 1, keep_dims = True)
36 X_train = tf.divide(tf.subtract(X_train,mean),tf.sqrt(variance))
37 #Y_train = Y_train/(Y_train.max(axis = 1, keepdims = True))
/Users/abhinandanchiney/anaconda2/lib/python2.7/site- packages/tensorflow/python/ops/nn_impl.pyc in moments(x, axes, shift, name, keep_dims)
664 # sufficient statistics. As a workaround we simply perform the operations
665 # on 32-bit floats before converting the mean and variance back to fp16
--> 666 y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x
667 # Compute true mean while keeping the dims for proper broadcasting.
668 mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
TypeError: data type not understood
Kindly help where I am going wrong.
tf.nn.moments is expecting a tensor, not a numpy array:
Args:
x: A Tensor.
Try this:
x = tf.convert_to_tensor(X_train)
mean , variance = tf.nn.moments(x, axes = 1, keep_dims = True)
Maintainer note: This question as-is is obsolete, since the bokeh.charts API was deprecated and removed years ago. But see the answer below for how to create grouped bar charts with the stable bokeh.plotting API in newer versions of Bokeh
I want to create a simple bar chart (like the one in the oficial example page)
I tried executing the code in this old answer Plotting Bar Charts with Bokeh
but it show the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-ba53ce344126> in <module>()
11
12 bar = Bar(xyvalues, cat, title="Stacked bars",
---> 13 xlabel="category", ylabel="language")
14
15 output_file("stacked_bar.html")
/usr/local/lib/python2.7/dist-packages/bokeh/charts/builders/bar_builder.pyc in Bar(data, label, values, color, stack, group, agg, xscale, yscale, xgrid, ygrid, continuous_range, **kw)
318 kw['y_range'] = y_range
319
--> 320 chart = create_and_build(BarBuilder, data, **kw)
321
322 # hide x labels if there is a single value, implying stacking only
/usr/local/lib/python2.7/dist-packages/bokeh/charts/builder.pyc in create_and_build(builder_class, *data, **kws)
60 # create the new builder
61 builder_kws = {k: v for k, v in kws.items() if k in builder_props}
---> 62 builder = builder_class(*data, **builder_kws)
63
64 # create a chart to return, since there isn't one already
/usr/local/lib/python2.7/dist-packages/bokeh/charts/builder.pyc in __init__(self, *args, **kws)
280
281 # handle input attrs and ensure attrs have access to data
--> 282 attributes = self._setup_attrs(data, kws)
283
284 # remove inputs handled by dimensions and chart attributes
/usr/local/lib/python2.7/dist-packages/bokeh/charts/builder.pyc in _setup_attrs(self, data, kws)
331 attributes[attr_name].iterable = custom_palette
332
--> 333 attributes[attr_name].setup(data=source, columns=attr)
334
335 else:
/usr/local/lib/python2.7/dist-packages/bokeh/charts/attributes.pyc in setup(self, data, columns)
193
194 if columns is not None and self.data is not None:
--> 195 self.set_columns(columns)
196
197 if self.columns is not None and self.data is not None:
/usr/local/lib/python2.7/dist-packages/bokeh/charts/attributes.pyc in set_columns(self, columns)
185 # assume this is now the iterable at this point
186 self.iterable = columns
--> 187 self._setup_default()
188
189 def setup(self, data=None, columns=None):
/usr/local/lib/python2.7/dist-packages/bokeh/charts/attributes.pyc in _setup_default(self)
142 def _setup_default(self):
143 """Stores the first value of iterable into `default` property."""
--> 144 self.default = next(self._setup_iterable())
145
146 def _setup_iterable(self):
/usr/local/lib/python2.7/dist-packages/bokeh/charts/attributes.pyc in _setup_iterable(self)
320
321 def _setup_iterable(self):
--> 322 return iter(self.items)
323
324 def get_levels(self, columns):
TypeError: 'NoneType' object is not iterable
The oficial example did work
URL: http://docs.bokeh.org/en/0.11.0/docs/user_guide/charts.html#userguide-charts-data-types
from bokeh.charts import Bar, output_file, show
from bokeh.sampledata.autompg import autompg as df
p = Bar(df, label='yr', values='mpg', agg='median', group='origin',
title="Median MPG by YR, grouped by ORIGIN", legend='top_right')
output_file("bar.html")
show(p)
BUT, I don't want to use pandas, I want to use a simple python dictionary like this:
my_simple_dict = {
'Group 1': [22,33,44,55],
'Group 2': [44,66,0,24],
'Group 3': [2,99,33,51]
}
How cant I achive a Bar chart that shows the tree groups (Group 1, Group 2, Group 3) with the x-axis going from 1 to 4?
NOTE: I am working with python 2.7
The question and other answers are obsolete, as bokeh.charts was deprecated and removed several years ago. However. support for grouped and stacked bar charts using the stable bokeh.plotting API has improved greatly since then:
https://docs.bokeh.org/en/latest/docs/user_guide/categorical.html
Here is a full example:
from bokeh.io import show
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']
data = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 3, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
# this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ]
x = [ (fruit, year) for fruit in fruits for year in years ]
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=250, title="Fruit Counts by Year",
toolbar_location=None, tools="")
p.vbar(x='x', top='counts', width=0.9, source=source)
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
show(p)
For now the solution I found is changing the dict structure
from bokeh.charts import Bar, output_file, show, hplot
import pandas as pd
my_simple_dict = {
'Group 1': [22,33,44,55],
'Group 2': [44,66,0,24],
'Group 3': [2,99,33,51]
}
my_data_transformed_dict = {}
my_data_transformed_dict['x-axis'] = []
my_data_transformed_dict['value'] = []
my_data_transformed_dict['group-name'] = []
for group, group_list in my_simple_dict.iteritems():
x_axis = 0
for item in group_list:
x_axis += 1
my_data_transformed_dict['x-axis'].append(x_axis)
my_data_transformed_dict['value'].append(item)
my_data_transformed_dict['group-name'].append(group)
my_bar = Bar(my_data_transformed_dict, values='value',label='x-axis',group='group-name',legend='top_right')
output_file("grouped_bar.html")
show(my_bar)
If someone knows a better way please tell me
I have groups constructed from a dataframe & trying to understand why this doesn't work:
for i in range(1,12):
out1=df.ix['group%s'% i]
out1.to_csv('group%s.csv' % i)
out1.pl.describe()
An example of a group_i is:
(a,b contain floats)
group1=df["ptdelta"][a & b]
a=df["ptdelta"]>=0
b=df["ptdelta"]<5
The traceback gives KeyError: group1 (meaning the first try)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-87-11399ea315df> in <module>()
6 '''
7 for i in range(1,12):
----> 8 out1=df.ix['group%s'% i]
9 out1.to_csv('group%s.csv' %i)
10 out1.pl.describe()
C:\Python27\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key)
32 return self._getitem_tuple(key)
33 else:
---> 34 return self._getitem_axis(key, axis=0)
35
36 def _get_label(self, label, axis=0):
C:\Python27\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis)
343 return self._get_loc(key, axis=0)
344
--> 345 return self._get_label(idx, axis=0)
346 else:
347 labels = self.obj._get_axis(axis)
You are trying to index with the string value 'group' which is not a valid index in your dataframe. If you have several groups already in a list and each group is a series with the correct size index, you can try:
i = 1
for group in groups:
out1=df.ix[group]
out1.to_csv('group%s.csv' % i)
out1.pl.describe()
i += 1