loading data using tf.from_tensor_slices yields OperatorNotAllowedInGraphError - list

I am trying to use tf.data pipeline to get finer control over loading image data but I receive the following error which I think is because of usage of list comprehension. My Code looks like this:
def load_files(data_dir: str, val_split=0.2):
assert len(os.listdir(os.path.join(data_dir + 'images/'))) == \
len(os.listdir(os.path.join(data_dir, 'ground_truth/'))), print("No. of image files != No. of gt files")
image_count = len(os.listdir(os.path.join(data_dir + 'images/')))
files = os.listdir(os.path.join(data_dir + 'images/'))
image_files = [os.path.join(data_dir + 'images/', file) for file in files]
image_files = np.array(image_files)
ds = tf.data.Dataset.from_tensor_slices(files)
ds = ds.map(process_data)
# train_ds = ds.skip(int(val_split * image_count))
# val_ds = ds.take(int(val_split * image_count))
return ds
def process_data(file_path):
image, = tf.io.read_file(file_path)
image = tf.io.decode_jpeg(image, channels=3)
label = tf.strings.split(file_path)
label = tf.io.decode_png(label, channels=0, dtype=tf.uint8)
return image, label
some_dir = "../../../TuSimple_lane_detection/"
img_dir = some_dir + "images/"
mask_dir = some_dir + "ground_truth/"
data_train = load_files(some_dir)
for f in data_train.take(5):
print(f.numpy())
The error looks like this:
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\eager\function.py", line 3210, in _get_concrete_function_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\eager\function.py", line 3557, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\eager\function.py", line 3392, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\framework\func_graph.py", line 1143, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 4510, in wrapped_fn
ret = wrapper_helper(*args)
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 4440, in wrapper_helper
ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
File "E:\Datasets\KITTI_3D_Object_detection\venv\PycharmProjects\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 699, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in user code:
File "E:/Datasets/KITTI_3D_Object_detection/KITTI_2D/EndToEndLaneDetection/Dataloader.py", line 21, in process_data *
image, = tf.io.read_file(file_path)
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I did not have the problem when I was working with TF1.12 before, when I shifted o TF 2.3 I am encountering this error.

Related

Django - Pulp Optimisation AttributeError: 'str' object has no attribute 'hash'

I am writing a script for pulp Optimisation to engage with my Django database. The problem contains a few thousand variables to be optimised and several hundred constraints which vary depending upon the values of a,b,c.
var_names[]
var_values{}
for _foo_ in list_1:
for _bar_ in list_2:
for _var_ in list_3:
for _eet_ list_4:
var_name = str(_foo_)+str(_bar_)+str(_var_)+str(_eet_)
var_names.append(var_name)
exec(str(_foo_)+str(_bar_)+str(_var_)+str(_eet_) + "= LpVariable("str(_foo_)+str(_bar_)+str(_var_)+str(_eet_)+", lowBound=0, cat='Integer')")
var_value = DataBase.objects.get(column_A = str(_foo_)+str(_var_)).value
var_values.append(var_value)
obj_func = LpAffineExpression([var_names[i],var_values[i] for in in range(len(var_names))])
problem = LpProblem(name="name", sense=LpMinimise)
#Example of constraints
exec("problem += (" + str(function(a1,b1,c1)_) +str(function(a1,b1,c2)) +" >= Database_2.objects.get(column_A = z1).value")
problem += obj_func
problem.sovle()
The code works in jupyter notebook when I load the database info as a dataframe. However, I keep receiving this following error code when using in Djagno:
File "/path/to/files/prob.py", line 1610, in <module>
problem.solve()
File "/path/to/files/lib/python3.9/site-packages/pulp/pulp.py", line 1913, in solve
status = solver.actualSolve(self, **kwargs)
File "/path/to/files/lib/python3.9/site-packages/pulp/apis/coin_api.py", line 137, in actualSolve
return self.solve_CBC(lp, **kwargs)
File "/path/to/files/lib/python3.9/site-packages/pulp/apis/coin_api.py", line 153, in solve_CBC
vs, variablesNames, constraintsNames, objectiveName = lp.writeMPS(
File "/path/to/files/lib/python3.9/site-packages/pulp/pulp.py", line 1782, in writeMPS
return mpslp.writeMPS(self, filename, mpsSense=mpsSense, rename=rename, mip=mip)
File "/path/to/files/lib/python3.9/site-packages/pulp/mps_lp.py", line 204, in writeMPS
constrNames, varNames, cobj.name = LpProblem.normalisedNames()
File "/path/to/files/lib/python3.9/site-packages/pulp/pulp.py", line 1546, in normalisedNames
_variables = self.variables()
File "/path/to/files/lib/python3.9/site-packages/pulp/pulp.py", line 1624, in variables
self.addVariables(list(self.objective.keys()))
File "/path/to/files/lib/python3.9/site-packages/pulp/pulp.py", line 1614, in addVariables
self.addVariable(v)
File "/path/to/files/lib/python3.9/site-packages/pulp/pulp.py", line 1603, in addVariable
if variable.hash not in self._variable_ids:
AttributeError: 'str' object has no attribute 'hash'
I have the code stored as .py file which is called views.py.
I believe it may be an issue with the namespace of the LpVariable creations. I have tried to:
define the whole problem encapsulated as a function with no entry variable and returns a dict of the solution.
Define the problem as a class with problem.create and problem.solve as methods to create the variables and solve the function.
Update the exec() code to store variables in the globals diction.
exec(str(_foo_)+str(_bar_)+str(_var_)+str(_eet_) + "= LpVariable("str(_foo_)+str(_bar_)+str(_var_)+str(_eet_)+", lowBound=0, cat='Integer')", globals())
And alternately creating a local dict and executing the above code with locals(),local_dict .
Used LpVariable.dict
variables = LpVariable.dicts("variables", [(_foo_, _bar_, _var_, _eet_), for _foo_ in list1 for _bar_ in list2 for _var_ in list3 for _eet_ in list4], lowBound=o, cat="Integer")
This does create all the variables, however the function used in the constraints references the variables as per the name str(foo)+str(bar)+str(var)+str(eet) and not variable[i] , which then generates undefined variable errors.
As mentioned, this code does work in jupyter, I am just at a loss as to what the error may be a result of.
LpAffineExpression expects LpVariable, not var_names[i].
var_items = []
# exec(str(_foo_)+str(_bar_)+str(_var_)+str(_eet_) + "= LpVariable(" + str(_foo_)+str(_bar_)+str(_var_)+str(_eet_)+", lowBound=0, cat='Integer')")
# var_value = DataBase.objects.get(column_A = str(_foo_)+str(_var_)).value
# var_values.append(var_value)
var_value = LpVariable(var_name, lowBound=0, cat='Integer')
var_coeff = DataBase.objects.get(column_A = str(_foo_)+str(_var_)).value
var_items.append((var_value, var_coeff))
# obj_func = LpAffineExpression([var_names[i],var_values[i] for i in range(len(var_names))])
obj_func = LpAffineExpression(var_items)
Reference: https://coin-or.github.io/pulp/technical/pulp.html#pulp.LpAffineExpression

Python Zipline : "pandas_datareader._utils.RemoteDataError" & local data

It's my first post, I hope it will be well done.
I'm trying to run the following ZipLine Algo with local AAPL data :
import pandas as pd
from collections import OrderedDict
import pytz
from zipline.api import order, symbol, record, order_target
from zipline.algorithm import TradingAlgorithm
data = OrderedDict()
data['AAPL'] = pd.read_csv('AAPL.csv', index_col=0, parse_dates=['Date'])
panel = pd.Panel(data)
panel.minor_axis = ['Open', 'High', 'Low', 'Close', 'Volume', 'Price']
panel.major_axis = panel.major_axis.tz_localize(pytz.utc)
print panel["AAPL"]
def initialize(context):
context.security = symbol('AAPL')
def handle_data(context, data):
MA1 = data[context.security].mavg(50)
MA2 = data[context.security].mavg(100)
date = str(data[context.security].datetime)[:10]
current_price = data[context.security].price
current_positions = context.portfolio.positions[symbol('AAPL')].amount
cash = context.portfolio.cash
value = context.portfolio.portfolio_value
current_pnl = context.portfolio.pnl
# code (this will come under handle_data function only)
if (MA1 > MA2) and current_positions == 0:
number_of_shares = int(cash / current_price)
order(context.security, number_of_shares)
record(date=date, MA1=MA1, MA2=MA2, Price=
current_price, status="buy", shares=number_of_shares, PnL=current_pnl, cash=cash, value=value)
elif (MA1 < MA2) and current_positions != 0:
order_target(context.security, 0)
record(date=date, MA1=MA1, MA2=MA2, Price=current_price, status="sell", shares="--", PnL=current_pnl, cash=cash,
value=value)
else:
record(date=date, MA1=MA1, MA2=MA2, Price=current_price, status="--", shares="--", PnL=current_pnl, cash=cash,
value=value)
#initializing trading enviroment
algo_obj = TradingAlgorithm(initialize=initialize, handle_data=handle_data)
#run algo
perf_manual = algo_obj.run(panel)
#code
#calculation
print "total pnl : " + str(float(perf_manual[["PnL"]].iloc[-1]))
buy_trade = perf_manual[["status"]].loc[perf_manual["status"] == "buy"].count()
sell_trade = perf_manual[["status"]].loc[perf_manual["status"] == "sell"].count()
total_trade = buy_trade + sell_trade
print "buy trade : " + str(int(buy_trade)) + " sell trade : " + str(int(sell_trade)) + " total trade : " + str(int(total_trade))
I was inspired by https://www.quantinsti.com/blog/introduction-zipline-python/ and https://www.quantinsti.com/blog/importing-csv-data-zipline-backtesting/.
I get this error :
Traceback (most recent call last):
File "C:/Users/main/Desktop/docs/ALGO_TRADING/_DATAS/_zipline_data_bundle /temp.py", line 51, in <module>
algo_obj = TradingAlgorithm(initialize=initialize, handle_data=handle_data)
File "C:\Python27-32\lib\site-packages\zipline\algorithm.py", line 273, in __init__
self.trading_environment = TradingEnvironment()
File "C:\Python27-32\lib\site-packages\zipline\finance\trading.py", line 99, in __init__
self.bm_symbol,
File "C:\Python27-32\lib\site-packages\zipline\data\loader.py", line 166, in load_market_data
environ,
File "C:\Python27-32\lib\site-packages\zipline\data\loader.py", line 230, in ensure_benchmark_data
last_date,
File "C:\Python27-32\lib\site-packages\zipline\data\benchmarks.py", line 50, in get_benchmark_returns
last_date
File "C:\Python27-32\lib\site-packages\pandas_datareader\data.py", line 137, in DataReader
session=session).read()
File "C:\Python27-32\lib\site-packages\pandas_datareader\base.py", line 181, in read
params=self._get_params(self.symbols))
File "C:\Python27-32\lib\site-packages\pandas_datareader\base.py", line 79, in _read_one_data
out = self._read_url_as_StringIO(url, params=params)
File "C:\Python27-32\lib\site-packages\pandas_datareader\base.py", line 90, in _read_url_as_StringIO
response = self._get_response(url, params=params)
File "C:\Python27-32\lib\site-packages\pandas_datareader\base.py", line 139, in _get_response
raise RemoteDataError('Unable to read URL: {0}'.format(url))
pandas_datareader._utils.RemoteDataError: Unable to read URL: http://www.google.com/finance/historical?q=SPY&startdate=Dec+29%2C+1989&enddate=Dec+20%2C+2017&output=csv
I don't understand : "http://www.google.com/finance/historical?q=SPY&startdate=Dec+29%2C+1989&enddate=Dec+20%2C+2017&output=csv".
I don't ask for online data request... and not 'SPY' stock but 'APPL'...
What does this error mean to you ?
Thanks a lot for your help !
C.
Only reference and workaround I found regarding this issue is here:
from pandas_datareader.google.daily import GoogleDailyReader
#property
def url(self):
return 'http://finance.google.com/finance/historical'
GoogleDailyReader.url = url
do:
pip install fix_yahoo_finance
then modify the file: zipline/lib/pythonx.x/site-packages/zipline/data/benchmarks.py
add the following two statements to the file:
import fix_yahoo_finance as yf
yf.pdr_override ()
then change following instruction:
data = pd_reader.DataReader (symbol, 'Google' first_date, last_date)
to:
data = pd_reader.get_data_yahoo(symbol,first_date, last_date)

type matching error in theano [ Cannot convert Type Generic (of Variable <Generic>) into Type TensorType]

thisfile.py
import cPickle
import gzip
import os
import numpy
import theano
import theano.tensor as T
def load_data(dataset):
f = gzip.open(dataset, 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()
def shared_dataset(data_xy, borrow=True):
data_x, data_y = data_xy
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int32')
test_set_x, test_set_y = shared_dataset(test_set)
valid_set_x, valid_set_y = shared_dataset(valid_set)
train_set_x, train_set_y = shared_dataset(train_set)
rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
(test_set_x, test_set_y)]
return rval
class PCA(object):
def __init__(self):
self.param = 0
def dimemsion_transform(self, X):
m_mean = T.mean(X, axis=0)
X = X - m_mean ##################### this line makes error
return X
if __name__ == '__main__':
dataset = 'mnist.pkl.gz'
# load the MNIST data
data = load_data(dataset)
X = T.matrix('X')
m_pca = PCA()
transform = theano.function(
inputs=[],
outputs=m_pca.dimemsion_transform(X),
givens={
X: data
}
)
error showing like below
Traceback (most recent call last):
File ".../thisfile.py", line 101, in <module>
X: data
File ".../Theano/theano/compile/function.py", line 322, in function
output_keys=output_keys)
File ".../Theano/theano/compile/pfunc.py", line 443, in pfunc
no_default_updates=no_default_updates)
File ".../Theano/theano/compile/pfunc.py", line 219, in rebuild_collect_shared
cloned_v = clone_v_get_shared_updates(v, copy_inputs_over)
File ".../Theano/theano/compile/pfunc.py", line 93, in clone_v_get_shared_updates
clone_v_get_shared_updates(i, copy_inputs_over)
File ".../Theano/theano/compile/pfunc.py", line 93, in clone_v_get_shared_updates
clone_v_get_shared_updates(i, copy_inputs_over)
File ".../Theano/theano/compile/pfunc.py", line 93, in clone_v_get_shared_updates
clone_v_get_shared_updates(i, copy_inputs_over)
File ".../Theano/theano/compile/pfunc.py", line 96, in clone_v_get_shared_updates
[clone_d[i] for i in owner.inputs], strict=rebuild_strict)
File ".../Theano/theano/gof/graph.py", line 242, in clone_with_new_inputs
new_inputs[i] = curr.type.filter_variable(new)
File ".../Theano/theano/tensor/type.py", line 234, in filter_variable
self=self))
TypeError: Cannot convert Type Generic (of Variable <Generic>) into Type TensorType(float64, matrix). You can try to manually convert <Generic> into a TensorType(float64, matrix).
I am making PCA function with theano but have a problem.
mean value is subtracted from MNIST data in dimension_transform in PCA class
I do not get why it gives type matching error and how do I fix it
Your problem comes from these lines:
data = load_data(dataset)
Here data is a list (as this is what load_data() returns).
transform = theano.function(
inputs=[],
outputs=m_pca.dimemsion_transform(X),
givens={
X: data
}
)
And here you pass it as a value. You have to extract the item you want from the return value of load_data() like so:
[(train_set_x, train_set_y), (valid_set_x, valid_set_y),
(test_set_x, test_set_y)] = load_data(dataset)
and then use
givens={
X: train_set_x
}
or one of the other values.

got error when dump object to json

Hi guys I want ask about json.dump
I use scikit to tune some method with parameters and I want dump it to json, but I got some error here :
I have parameter with method KNN:
KNeighborsClassifier(algorithm=u'auto', leaf_size=30, metric=u'manhattan',
metric_params=None, n_jobs=-1, n_neighbors=300, p=2,
weights=u'distance')
But got error like this:
Traceback (most recent call last):
File "jamu.py", line 1018, in <module>
main(argv)
File "jamu.py", line 863, in main
json.dumps(meta_clf, f)
File "C:\Python27\lib\json\__init__.py", line 250, in d
sort_keys=sort_keys, **kw).encode(obj)
File "C:\Python27\lib\json\encoder.py", line 207, in en
chunks = self.iterencode(o, _one_shot=True)
File "C:\Python27\lib\json\encoder.py", line 270, in it
return _iterencode(o, 0)
File "C:\Python27\lib\json\encoder.py", line 184, in de
raise TypeError(repr(o) + " is not JSON serializable"
TypeError: KNeighborsClassifier(algorithm=u'auto', leaf_s
tan',
metric_params=None, n_jobs=-1, n_neighbors=300
weights=u'distance') is not JSON serializable
Is anything wrong with my code?
It is what it says on the tin, KNeighborsClassifier cannot be serialised with json.
You'll have to use a different way to serialise a model. For example you can use joblib:
from sklearn.external import joblib
# Suppose your KNeighborsClassifier model is called knn
joblib.dump(knn, 'some/kind/of/path/knn.joblib')
Loading a model is equally simple:
knn = joblib.load('some/kind/of/path/knn.joblib')
Check the joblib docs for what else it is good for.
finally i use this way:
meta_clf = KNeighborsClassifier(algorithm=u'auto', leaf_size=30, metric=u'manhattan',
metric_params=None, n_jobs=-1, n_neighbors=300, p=2,
weights=u'distance')
def print_to_json(meta_clf):
meta_clf_str = str(meta_clf)
meta_clf_str = meta_clf_str[meta_clf_str.index("(") + 1:meta_clf_str.rindex(")")]
meta_clf_str = meta_clf_str.replace('\n ', '')
meta_clf_str = meta_clf_str.replace(' ', '')
meta_clf_str = meta_clf_str.replace('=u\'', '=\'')
meta_clf_str = meta_clf_str.replace('\'', '')
meta_clf_str_list = meta_clf_str.split(',')
meta_clf_str_list_len = len(meta_clf_str_list)
meta_clf_str_lists = []
params = {}
for x in meta_clf_str_list:
meta_clf_str_list = x.split('=')
if meta_clf_str_list[1].isdigit() == True:
meta_clf_str_list[1] = int(meta_clf_str_list[1])
meta_clf_str_lists.append(meta_clf_str_list)
params[meta_clf_str_list[0]] = meta_clf_str_list[1]
return params
it's enough for me.
thanks for the asnwer thomas, i appreciate it.

debug:decoding Unicode is not supported

My code can run,but when I debug,it can enter the Subroutine.And the error is:"decoding Unicode is not supported".
I use anaconda.When I open the untitled0.py,the encoding is UTF-8 at the bottom of the screen,but when I open the fhmm_exact.py,the encoding is UTF-8-GUESSED.
Traceback (most recent call last):
File "<ipython-input-1-f6910c2dfa77>", line 1, in <module>
debugfile('/home/wenwu/untitled0.py', wdir='/home/wenwu')
File "/home/wenwu/anaconda/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 702, in debugfile
debugger.run("runfile(%r, args=%r, wdir=%r)" % (filename, args, wdir))
File "/home/wenwu/anaconda/lib/python2.7/bdb.py", line 400, in run
exec cmd in globals, locals
File "<string>", line 1, in <module>
File "/home/wenwu/anaconda/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 682, in runfile
execfile(filename, namespace)
File "/home/wenwu/anaconda/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 78, in execfile
builtins.execfile(filename, *where)
File "/home/wenwu/untitled0.py", line 37, in <module>
fhmm.disaggregate(test_elec.mains(),output,sample_period = 60)
File "/home/wenwu/nilmtk/nilmtk/disaggregate/fhmm_exact.py", line 287, in disaggregate
mains_data_location = '{}/elec/meter1'.format(building_path)
File "/home/wenwu/nilmtk/nilmtk/disaggregate/fhmm_exact.py", line 287, in disaggregate
mains_data_location = '{}/elec/meter1'.format(building_path)
File "/home/wenwu/anaconda/lib/python2.7/bdb.py", line 49, in trace_dispatch
return self.dispatch_line(frame)
File "/home/wenwu/anaconda/lib/python2.7/bdb.py", line 67, in dispatch_line
self.user_line(frame)
File "/home/wenwu/anaconda/lib/python2.7/pdb.py", line 158, in user_line
self.interaction(frame, None)
File "/home/wenwu/anaconda/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 488, in interaction
self.notify_spyder(frame) #-----Spyder-specific-------------------------
File "/home/wenwu/anaconda/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 432, in notify_spyder
fname = unicode(fname, "utf-8")
TypeError: decoding Unicode is not supported
The following is code.
untitled0.py*
from matplotlib import rcParams
import matplotlib.pyplot as plt
rcParams['figure.figsize'] = (13,6)
plt.style.use('ggplot')
from nilmtk import DataSet,TimeFrame,MeterGroup,HDFDataStore
train = DataSet('/home/wenwu/redd.h5')
test = DataSet('/home/wenwu/redd.h5')
building = 1
train.set_window(end = '30-4-2011')
test.set_window(start = '30-4-2011')
train_elec = train.buildings[1].elec
test_elec = test.buildings[1].elec
fridge_meter = train_elec['fridge']
fridge_df = fridge_meter.load().next()
fridge_df.head()
mains = train_elec.mains()
mains_df = mains.load().next()
top_5_train_elec = train_elec.submeters().select_top_k(k = 5)
from nilmtk.disaggregate import fhmm_exact
from nilmtk.metrics import f1_score
fhmm = fhmm_exact.FHMM()
fhmm.train(top_5_train_elec,sample_period = 60)
disag_filename = '/home/wenwu/redd-disag-fhmm.h5'
output = HDFDataStore(disag_filename,'w')
fhmm.disaggregate(test_elec.mains(),output,sample_period = 60)
output.close()
disag_fhmm = DataSet(disag_filename)
disag_fhmm_elec = disag_fhmm.buildings[building].elec
f1_fhmm = f1_score(disag_fhmm_elec,test_elec)
f1_fhmm.plot(kind = 'barh')
disaggreate part of fhmm_exact.py
def disaggregate(self, mains, output_datastore, **load_kwargs):
'''Disaggregate mains according to the model learnt previously.
Parameters
----------
mains : nilmtk.ElecMeter or nilmtk.MeterGroup
output_datastore : instance of nilmtk.DataStore subclass
For storing power predictions from disaggregation algorithm.
output_name : string, optional
The `name` to use in the metadata for the `output_datastore`.
e.g. some sort of name for this experiment. Defaults to
"NILMTK_FHMM_<date>"
resample_seconds : number, optional
The desired sample period in seconds.
**load_kwargs : key word arguments
Passed to `mains.power_series(**kwargs)`
'''
import warnings
warnings.filterwarnings("ignore", category=Warning)
MIN_CHUNK_LENGTH = 100
if not self.model:
raise RuntimeError(
"The model needs to be instantiated before"
" calling `disaggregate`. For example, the"
" model can be instantiated by running `train`.")
# Extract optional parameters from load_kwargs
date_now = datetime.now().isoformat().split('.')[0]
output_name = load_kwargs.pop('output_name', 'NILMTK_FHMM_' + date_now)
resample_seconds = load_kwargs.pop('resample_seconds', 60)
resample_rule = '{:d}S'.format(resample_seconds)
timeframes = []
building_path = '/building{}'.format(mains.building())
mains_data_location = '{}/elec/meter1'.format(building_path)
data_is_available = False
for chunk in mains.power_series(**load_kwargs):
# Check that chunk is sensible size before resampling
if len(chunk) < MIN_CHUNK_LENGTH:
continue
# Record metadata
timeframes.append(chunk.timeframe)
measurement = chunk.name
chunk = chunk.resample(rule=resample_rule)
# Check chunk size *again* after resampling
if len(chunk) < MIN_CHUNK_LENGTH:
continue
# Start disaggregation
predictions = self.disaggregate_chunk(chunk)
for meter in predictions.columns:
data_is_available = True
meter_instance = meter.instance()
cols = pd.MultiIndex.from_tuples([chunk.name])
predicted_power = predictions[[meter]]
output_df = pd.DataFrame(predicted_power)
output_df.columns = pd.MultiIndex.from_tuples([chunk.name])
output_datastore.append('{}/elec/meter{}'
.format(building_path, meter_instance),
output_df)
# Copy mains data to disag output
output_datastore.append(key=mains_data_location,
value=pd.DataFrame(chunk, columns=cols))
if not data_is_available:
return
##################################
# Add metadata to output_datastore
# TODO: `preprocessing_applied` for all meters
# TODO: split this metadata code into a separate function
# TODO: submeter measurement should probably be the mains
# measurement we used to train on, not the mains measurement.
# DataSet and MeterDevice metadata:
meter_devices = {
'FHMM': {
'model': 'FHMM',
'sample_period': resample_seconds,
'max_sample_period': resample_seconds,
'measurements': [{
'physical_quantity': measurement[0],
'type': measurement[1]
}]
},
'mains': {
'model': 'mains',
'sample_period': resample_seconds,
'max_sample_period': resample_seconds,
'measurements': [{
'physical_quantity': measurement[0],
'type': measurement[1]
}]
}
}
merged_timeframes = merge_timeframes(timeframes, gap=resample_seconds)
total_timeframe = TimeFrame(merged_timeframes[0].start,
merged_timeframes[-1].end)
dataset_metadata = {'name': output_name, 'date': date_now,
'meter_devices': meter_devices,
'timeframe': total_timeframe.to_dict()}
output_datastore.save_metadata('/', dataset_metadata)
# Building metadata
# Mains meter:
elec_meters = {
1: {
'device_model': 'mains',
'site_meter': True,
'data_location': mains_data_location,
'preprocessing_applied': {}, # TODO
'statistics': {
'timeframe': total_timeframe.to_dict()
}
}
}
# TODO: FIX THIS! Ugly hack for now
# Appliances and submeters:
appliances = []
for i, meter in enumerate(self.meters):
meter_instance = meter.instance()
for app in meter.appliances:
appliance = {
'meters': [meter_instance],
'type': app.identifier.type,
'instance': app.identifier.instance
# TODO this `instance` will only be correct when the
# model is trained on the same house as it is tested on.
# https://github.com/nilmtk/nilmtk/issues/194
}
appliances.append(appliance)
elec_meters.update({
meter_instance: {
'device_model': 'FHMM',
'submeter_of': 1,
'data_location': ('{}/elec/meter{}'
.format(building_path, meter_instance)),
'preprocessing_applied': {}, # TODO
'statistics': {
'timeframe': total_timeframe.to_dict()
}
}
})
# Setting the name if it exists
if meter.name:
if len(meter.name) > 0:
elec_meters[meter_instance]['name'] = meter.name
building_metadata = {
'instance': mains.building(),
'elec_meters': elec_meters,
'appliances': appliances
}
output_datastore.save_metadata(building_path, building_metadata)