Something is wrong with my Jukebox. What should I do? - audio-player

Something is wrong with my Jukebox. My sampler is not working right.:
"zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)"
leads to
"AssertionError Traceback (most recent call last)
in ()
16 x = load_prompts(audio_files, duration, hps)
17 zs = top_prior.encode(x, start_level=0, end_level=len(priors), bs_chunks=x.shape[0])
---> 18 zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
19 else:
20 raise ValueError(f'Unknown sample mode {sample_hps.mode}.')"
8 frames
/usr/local/lib/python3.6/dist-packages/jukebox/prior/conditioners.py in forward(self, pos_start, pos_end)
89 # Check if [pos_start,pos_end] in [pos_min, pos_max)
90 assert len(pos_start.shape) == 2, f"Expected shape with 2 dims, got {pos_start.shape}"
---> 91 assert (self.pos_min <= pos_start).all() and (pos_start < self.pos_max).all(), f"Range is [{self.pos_min},{self.pos_max}), got {pos_start}"
92 pos_start = pos_start.float()
93 if pos_end is not None:
AssertionError: Range is [786744.0,26460000.0), got tensor([[240.]], device='cuda:0')"
and
"KeyError Traceback (most recent call last)
in ()
16 x = load_prompts(audio_files, duration, hps)
17 zs = top_prior.encode(x, start_level=0, end_level=len(priors), bs_chunks=x.shape[0])
---> 18 zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
19 else:
20 raise ValueError(f'Unknown sample mode {sample_hps.mode}.')
2 frames
/usr/local/lib/python3.6/dist-packages/jukebox/sample.py in sample_single_window(zs, labels, sampling_kwargs, level, prior, start, hps)
58 empty_cache()
59
---> 60 max_batch_size = sampling_kwargs['max_batch_size']
61 del sampling_kwargs['max_batch_size']
62
KeyError: 'max_batch_size'"

Related

AttributeError: 'list' object has no attribute 'lower' in TF IDF modeling

can anyone please help me move forward in my modeling, I have no idea where is that .lower attribute I have called upon and how to fix it.. appreciate any help
HERE IS THE ONLY PART WHERE I APPLIED .LOWER
wordnet_lemmatizer = WordNetLemmatizer()wordnet_lemmatizer = WordNetLemmatizer()
def create_tokens(df2):
df2['low'] = df2['Movie'].str.lower()
df2['stopwords_out'] = df2['low'].apply(lambda x: " ".join([word for word in x.split()if word not in stops]))
df2['tokenized'] = df2.apply(lambda row: nltk.word_tokenize(row['stopwords_out']), axis=1)
df2['eng_only'] = df2['tokenized'].apply(lambda x: [word for word in x if word.isalpha()])
df2['lemmatized'] = df2['eng_only'].apply(lambda x: [wordnet_lemmatizer.lemmatize(word) for word in x])
HERE IS WHEN I HAVE CHANGED MY LEMMATIZED COLUMN TO LIST
a = df2.lemmatized.to_list()
b = (list(itertools.chain.from_iterable(a)))
bow = Counter (b)
HERE IS WHEN I TRY TO CREATE TF IDF AND WHERE THE ERROR APPEARS
cv = CountVectorizer(min_df=0, max_df=1)
tf = cv.fit_transform(df2.lemmatized)
THE ERROR
AttributeError Traceback (most recent call last)
C:\AppData\Local\Temp/ipykernel_24552/1530549768.py in
2
3 cv = CountVectorizer(min_df=0, max_df=1)
----> 4 tf = cv.fit_transform(df2.lemmatized)
5
6 print(df2.lemmatized)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1200 max_features = self.max_features
1201
-> 1202 vocabulary, X = self.count_vocab(raw_documents,
1203 self.fixed_vocabulary)
1204
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
1112 for doc in raw_documents:
1113 feature_counter = {}
-> 1114 for feature in analyze(doc):
1115 try:
1116 feature_idx = vocabulary[feature]
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
102 else:
103 if preprocessor is not None:
--> 104 doc = preprocessor(doc)
105 if tokenizer is not None:
106 doc = tokenizer(doc)
~\Anaconda3\lib\site-packages\sklearn\feature_extraction\text.py in _preprocess(doc, accent_function, lower)
67 """
68 if lower:
---> 69 doc = doc.lower()
70 if accent_function is not None:
71 doc = accent_function(doc)
AttributeError: 'list' object has no attribute 'lower'
print(df2.lemmatized)

Data Type not understood in tensorflow

mean , variance = tf.nn.moments(X_train, axes = 1, keep_dims = True)
I am trying to get the mean and variance using tf.nn.moments() as shown above. However, I am encountering the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-43-fc383f99b15b> in <module>()
33 Y_train = Y_train.reshape(1,355)
34 X_mean = tf.reduce_mean(X_train, axis = 1, keepdims = True)
---> 35 mean , variance = tf.nn.moments(X_train, axes = 1, keep_dims = True)
36 X_train = tf.divide(tf.subtract(X_train,mean),tf.sqrt(variance))
37 #Y_train = Y_train/(Y_train.max(axis = 1, keepdims = True))
/Users/abhinandanchiney/anaconda2/lib/python2.7/site- packages/tensorflow/python/ops/nn_impl.pyc in moments(x, axes, shift, name, keep_dims)
664 # sufficient statistics. As a workaround we simply perform the operations
665 # on 32-bit floats before converting the mean and variance back to fp16
--> 666 y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x
667 # Compute true mean while keeping the dims for proper broadcasting.
668 mean = math_ops.reduce_mean(y, axes, keepdims=True, name="mean")
TypeError: data type not understood
Kindly help where I am going wrong.
tf.nn.moments is expecting a tensor, not a numpy array:
Args:
x: A Tensor.
Try this:
x = tf.convert_to_tensor(X_train)
mean , variance = tf.nn.moments(x, axes = 1, keep_dims = True)

Trying to find the second largest value after grouping by even number python

import numpy as np
import pandas as pd
values = np.array([1, 3, 2, 4, 1, 6, 4])
example_df = pd.DataFrame({
'value': values,
'even': values % 2 == 0,
'above_three': values > 3
}, index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
# Find second largest value in each group
if True:
def second_largest(xs):
sorted_xs = xs.sort(inplace=False, ascending=False)
return sorted_xs.iloc[1]
grouped_data = example_df.groupby('even')
print grouped_data['value'].apply(second_largest)
The traceback says the following
AttributeError
Traceback (most recent call last) <ipython-input-94-251c7e3ea488> in
()
14
15 grouped_data = example_df.groupby('even')
---> 16 print grouped_data['value'].apply(second_largest)
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\groupby.pyc in
apply(self, func, *args, **kwargs)
714 # ignore SettingWithCopy here in case the user mutates
715 with option_context('mode.chained_assignment', None):
--> 716 return self._python_apply_general(f)
717
718 def _python_apply_general(self, f):
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\groupby.pyc in
_python_apply_general(self, f)
718 def _python_apply_general(self, f):
719 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 720 self.axis)
721
722 return self._wrap_applied_output(
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\groupby.pyc in
apply(self, f, data, axis) 1800 # group might be
modified 1801 group_axes = _get_axes(group)
-> 1802 res = f(group) 1803 if not _is_indexed_like(res, group_axes): 1804 mutated = True
in second_largest(xs)
9 if True:
10 def second_largest(xs):
---> 11 sorted_xs = xs.sort(inplace=False, ascending=False)
12 print sorted_xs
13 return sorted_xs.iloc[1]
C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\generic.pyc in
getattr(self, name) 3079 if name in self._info_axis: 3080 return self[name]
-> 3081 return object.getattribute(self, name) 3082 3083 def setattr(self, name, value):
AttributeError: 'Series' object has no attribute 'sort'

Why I am getting the following AttributeError in Python?

I am using the sklearn's GradientBoostingRegression method. So after fitting it with 2000 estimators, I wanted to add more estimators to it. Since it is taking too long to rerun the entire fitting process, I used the set_params() method. Note that it is a multi-target problem, meaning, I have 3 targets to fit. So I am using the following code to add more estimators.
'''parameters: models (list of length 3 in our case )
train_X, train_y [n_samples x 3], test
n_estimators : previous + 500 (default) [additional estimators]
warm_start : True (default)
'''
def addMoreEstimators(train_X, train_y, test, models, n_estimators = 500, warm_start=True):
params = {'n_estimators':n_estimators, 'warm_start':warm_start}
gbm_pred= pd.DataFrame()
for (i,stars),clf in zip(enumerate(['*','**','***']), models):
clf.set_params(**params)
%time clf.fit(train_X.todense(),train_y[stars])
%time gbm_pred[stars] = clf.predict(test.todense())
gbm_pred = gbm_pred.as_matrix()
gbm_dict ={'model': gbm, 'prediction': gbm_pred}
return gbm_dict
Note: the models parameter is a list of 3 fitted models for the 3 targets.
When I ran it for the first time using 2500 (originally I had 2000 estimators), it ran fine and gave me an output.
When, I am running the same function using 3000 estimators, I am getting an AttributeError (see the traceback of the error below). Here the models contained the 3 fitted models. Below is the traceback of the error: (it's kinda long)
AttributeError Traceback (most recent call last)
<ipython-input-104-9418ada3b36f> in <module>()
7 test = val_X_tfidf[:,shortened_col_index],
8 models = models,
----> 9 n_estimators = 3000)
10
11 reduced_features_gbm_pred_3000_2_lr_1_msp_2 = reduced_features_gbm_model_3000_2_lr_1_msp_2['prediction']
<ipython-input-103-e15a4fb70b50> in addMoreEstimators(train_X, train_y, test, models, n_estimators, warm_start)
15
16 clf.set_params(**params)
---> 17 get_ipython().magic(u'time clf.fit(train_X.todense(),train_y[stars])')
18 print 'starting prediction'
19
//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2305 magic_name, _, magic_arg_s = arg_s.partition(' ')
2306 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2307 return self.run_line_magic(magic_name, magic_arg_s)
2308
2309 #-------------------------------------------------------------------------
//anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2226 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2227 with self.builtin_trap:
-> 2228 result = fn(*args,**kwargs)
2229 return result
2230
//anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
//anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
//anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
1160 if mode=='eval':
1161 st = clock2()
-> 1162 out = eval(code, glob, local_ns)
1163 end = clock2()
1164 else:
<timed eval> in <module>()
//anaconda/lib/python2.7/site-packages/sklearn/ensemble/gradient_boosting.pyc in fit(self, X, y, sample_weight, monitor)
973 self.estimators_.shape[0]))
974 begin_at_stage = self.estimators_.shape[0]
--> 975 y_pred = self._decision_function(X)
976 self._resize_state()
977
//anaconda/lib/python2.7/site-packages/sklearn/ensemble/gradient_boosting.pyc in _decision_function(self, X)
1080 # not doing input validation.
1081 score = self._init_decision_function(X)
-> 1082 predict_stages(self.estimators_, X, self.learning_rate, score)
1083 return score
1084
sklearn/ensemble/_gradient_boosting.pyx in sklearn.ensemble._gradient_boosting.predict_stages (sklearn/ensemble/_gradient_boosting.c:2502)()
AttributeError: 'int' object has no attribute 'tree_'
Sorry for the long traceback, but I think it wouldn't be possible to provide me with meaningful feedback.
Again, why am I getting this feedback ?
Any help would be greatly appreciated.
Thanks
Edit
Below is the code that generates the models that was one of the inputs in the above function.
from sklearn import ensemble
def updated_runGBM(train_X, train_y, test,
n_estimators =100,
max_depth = 1,
min_samples_split=1,
learning_rate=0.01,
loss= 'ls',
warm_start=True):
'''train_X : n_samples x m_features
train_y : n_samples x k_targets (multiple targets allowed)
test : n_samples x m_features
warm_start : True (originally the default is False, but I want to add trees)
'''
params = {'n_estimators': n_estimators, 'max_depth': max_depth, 'min_samples_split': min_samples_split,
'learning_rate': learning_rate, 'loss': loss,'warm_start':warm_start}
gbm1 = ensemble.GradientBoostingRegressor(**params)
gbm2 = ensemble.GradientBoostingRegressor(**params)
gbm3 = ensemble.GradientBoostingRegressor(**params)
gbm = [gbm1,gbm2,gbm3]
gbm_pred= pd.DataFrame()
for (i,stars),clf in zip(enumerate(['*','**','***']), gbm):
%time clf.fit(train_X.todense(),train_y[stars])
%time gbm_pred[stars] = clf.predict(test.todense())
gbm_pred = gbm_pred.as_matrix()
gbm_pred = np.clip(gbm_pred,0,np.inf)
gbm_dict ={'model': gbm, 'prediction': gbm_pred}
return gbm_dict
NOTE In the code above, I have removed some of the print statements to reduce clutter.
These are the two functions I am using, nothing else (apart from the code to split up the data).

Outputting dataframes to csv using a loop

I have groups constructed from a dataframe & trying to understand why this doesn't work:
for i in range(1,12):
out1=df.ix['group%s'% i]
out1.to_csv('group%s.csv' % i)
out1.pl.describe()
An example of a group_i is:
(a,b contain floats)
group1=df["ptdelta"][a & b]
a=df["ptdelta"]>=0
b=df["ptdelta"]<5
The traceback gives KeyError: group1 (meaning the first try)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-87-11399ea315df> in <module>()
6 '''
7 for i in range(1,12):
----> 8 out1=df.ix['group%s'% i]
9 out1.to_csv('group%s.csv' %i)
10 out1.pl.describe()
C:\Python27\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key)
32 return self._getitem_tuple(key)
33 else:
---> 34 return self._getitem_axis(key, axis=0)
35
36 def _get_label(self, label, axis=0):
C:\Python27\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis)
343 return self._get_loc(key, axis=0)
344
--> 345 return self._get_label(idx, axis=0)
346 else:
347 labels = self.obj._get_axis(axis)
You are trying to index with the string value 'group' which is not a valid index in your dataframe. If you have several groups already in a list and each group is a series with the correct size index, you can try:
i = 1
for group in groups:
out1=df.ix[group]
out1.to_csv('group%s.csv' % i)
out1.pl.describe()
i += 1