format dictionary using python - python-2.7

I am new to python, can someone tell me how can we change the given code into the formatted code given below .
The data is stored in a dictionary and i want to store the formatted data also in a dictionary.
I am using python version 2.7.
Is it possible to do the same?
Can the alterations be made in the same dictionary or we need to create a new dictionary?
The code I have :
{
"roles":
{
"AA":[
{
"date":"20150101",
"total":6.0
},
{
"date":"20150201",
"total":14.5
},
{
"date":"20150301",
"total":14.5
}
],
"AB":[
{
"date":"20150301",
"total":1.6
},
{
"date":"20150401",
"total":1.6
},
{
"date":"20150501",
"total":7.24
}
]
}
}
The code I want :
{
"key" : "roles",
"values" : [
{
"key": "AA",
"values": [
{
"date": "20150101",
"total": 6
},
{
"date": "20150201",
"total": 14.5
},
{
"date": "20150301",
"total": 14.5
}
]
},
{
"key": "AB",
"values": [
{
"date": "20150301",
"total": 1.6
},
{
"date": "20150401",
"total": 1.6
},
{
"date": "20150501",
"total": 7.24
}
]
}
]
}

Not necessarily the most robust but gets the job done as long as the input format stays consistent with the example you provided (using pprint only for formatted output. You don't have to install it for the code to work)
from pprint import pprint
output = {}
for key, value in input_dict.items():
output['key'] = key
output['values'] = [{'key': inner_key, 'values': inner_value}
for inner_key, inner_value in value.items()]
pprint(output)
Outputs
{'key': 'roles',
'values': [{'key': 'AA',
'values': [{'date': '20150101', 'total': 6.0},
{'date': '20150201', 'total': 14.5},
{'date': '20150301', 'total': 14.5}]},
{'key': 'AB',
'values': [{'date': '20150301', 'total': 1.6},
{'date': '20150401', 'total': 1.6},
{'date': '20150501', 'total': 7.24}]}]}

Related

Boto3 create glue triggers with different types in one workflow

Can anyone please guide me steps to create multiple triggers types one with conditional and other with scheduled trigger type in single workflow
So far I have used create_trigger function . But above requirement not sure how to address.
Can any one help here please.
I have tried with below syntax didn't work
response = client.create_trigger(
Name='two_triggers',
WorkflowName='wf_With_two_tirggers',
Type='SCHEDULED',
Schedule='cron(0 12 * * ? *)',
Actions=[
{
'JobName': 'abc_dev',
'Arguments': {
'string': 'string'
},
'Timeout': 123,
'SecurityConfiguration': 'string',
'NotificationProperty': {
'NotifyDelayAfter': 123
},
'Trigger': 'string'
},
],
Type='CONDITIONAL',
Predicate={
'Logical': 'ANY',
'Conditions': [
{
'LogicalOperator': 'EQUALS',
'JobName': 'def_dev',
'State': 'SUCCEEDED'
},
]
},
Actions=[
{
'JobName': 'ghi_dev',
'Arguments': {
'string': 'string'
},
'Timeout': 123,
'SecurityConfiguration': 'string',
'NotificationProperty': {
'NotifyDelayAfter': 123
},
'CrawlerName': 'string'
},
],
Description='string',
StartOnCreation=True,
Tags={
'string': 'string'
}
)
Below is the design workflow struggling to write code for. Tried with above code for below design using boto3 didn't work
Yes I figured out on an answer. Below is the code for design given in question
import boto3
import os
import logging
glue = boto3.client(service_name="glue", region_name='us-east-1')
response = glue.create_workflow(
Name="dual_trigger_wf")
response1 = glue.create_trigger(
Name="trigger_one_to_many",
WorkflowName="dual_trigger_wf",
Type="SCHEDULED",
Schedule="cron(0 8 * * ? *)",
Actions=[
{
"JobName": "abc",
"Arguments": {"string": "string"},
"Timeout": 123,
"SecurityConfiguration": "string",
"NotificationProperty": {"NotifyDelayAfter": 123},
},
{
"JobName": "def",
"Arguments": {"string": "string"},
"Timeout": 123,
"SecurityConfiguration": "string",
"NotificationProperty": {"NotifyDelayAfter": 123},
},
],
Description="string",
StartOnCreation=False,
)
response2 = glue.create_trigger(
Name="trigger_many_to_one",
WorkflowName="dual_trigger_wf",
Type="CONDITIONAL",
Predicate={
"Logical": "AND",
"Conditions": [
{
"LogicalOperator": "EQUALS",
"JobName": "abc",
"State": "SUCCEEDED",
},
{
"LogicalOperator": "EQUALS",
"JobName": "def",
"State": "SUCCEEDED",
},
],
},
Actions=[
{
"JobName": "ghi",
"Arguments": {"string": "string"},
"Timeout": 123,
"SecurityConfiguration": "string",
"NotificationProperty": {"NotifyDelayAfter": 123},
}
],
Description="string",
StartOnCreation=False,
)

Elastic Search Sort

I have a table for some activities like
[
{
"id": 123,
"name": "Ram",
"status": 1,
"activity": "Poster Design"
},
{
"id": 123,
"name": "Ram",
"status": 1,
"activity": "Poster Design"
},
{
"id": 124,
"name": "Leo",
"categories": [
"A",
"B",
"C"
],
"status": 1,
"activity": "Brochure"
},
{
"id": 134,
"name": "Levin",
"categories": [
"A",
"B",
"C"
],
"status": 1,
"activity": "3D Printing"
}
]
I want to get this data from elastic search 5.5 by sorting on field activity, but I need all the data corresponding to name = "Ram" first and then remaining in a single query.
You can use function score query to boost the result based on match for the filter(this case ram in name).
Following query should work for you
POST sort_index/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"boost": "5",
"functions": [{
"filter": {
"match": {
"name": "ram"
}
},
"random_score": {},
"weight": 1000
}],
"score_mode": "max"
}
},
"sort": [{
"activity.keyword": {
"order": "desc"
}
}]
}
I would suggest using a bool query combined with the should clause.
U will also need to use the sort clause on your field.

alter the structure Tastypie uses in the list view

I have json list view that look like this:
{
"objects": [
{
"active": false,
"id": 4,
},
{
"active": false,
"id": 5,
}
]
}
I want to get rid of "objects" word, so that structure will look like this:
{
[
{
"active": false,
"id": 4,
},
{
"active": false,
"id": 5,
}
]
}
This link to docs has no clue in it
It's impossible. {} means dict. Dict needs key and value.
I guess You need
[
{
"active": false,
"id": 4,
},
{
"active": false,
"id": 5,
}
]
If yes, overwrite Resource.alter_list_data_to_serialize function:
def alter_list_data_to_serialize(self, request, data):
return data[self._meta.collection_name]
Paginator class need to be dict with field named Resouce._meta.collection_name.

MongoDB Aggregate Regex Match or Full Text Search returns whole Document

Ex. Record
[
{
"_id": "5528cfd2e71144e020cb6494",
"__v": 11,
"Product": [
{
"_id": "5528cfd2e71144e020cb6495",
"isFav": true,
"quantity": 27,
"price": 148,
"description": "100g",
"brand": "JaldiLa",
"name": "Grapes",
"sku": "GRP"
},
{
"_id": "552963ed63d867b81e18d357",
"isFav": false,
"quantity": 13,
"price": 290,
"description": "100g",
"brand": "JaldiLa",
"name": "Apple",
"sku": "APL"
}
],
"brands": [
"Whole Foods",
"Costco",
"Bee's",
"Masons"
],
"sku": "FRT",
"name": "Fruits"
}
]
My Mongoose function to return query from AngularJS(http://localhost:8080/api/search?s=)
router.route('/search')
.get(function(req, res) {
Dept.aggregate(
{ $match: { $text: { $search: req.query.s } } },
{ $project : { name : 1, _id : 1, 'Product.name' : 1, 'Product._id' : 1} },
{ $unwind : "$Product" },
{ $group : {
_id : "$_id",
Category : { $addToSet : "$name"},
Product : { $push : "$Product"}
}}
)
});
RESULT: e.g. http://localhost:8080/api/search?s=Apple / Grape / Carrot, result is same for all.
[
{
"_id": "5528cfd2e71144e020cb6494",
"Category": ["Fruits"],
"Product": [
{
"_id": "5528cfd2e71144e020cb6495",
"name": "Grapes"
},
{
"_id": "552963ed63d867b81e18d357",
"name": "Apple"
},
{
"_id": "552e61920c530fb848c61510",
"name": "Carrots"
}
]
}
]
PROBLEM: On a query of "apple", it returns all objects within Product instead of just "grapes", i think maybe putting match after unwind would do the trick or $regex case
WHAT I WANT: e.g. for a searchString of "grape"
Also I want it to start sending results as soon as I send in the first two letters of my query.
[{
"_id": ["5528cfd2e71144e020cb6494"], //I want this in array as it messes my loop up
"Category": "Fruits", //Yes I do not want this in array like I'm getting in my resutls
"Product": [{
"_id": "5528cfd2e71144e020cb6495",
"name": "Grapes"
}]
}]
Thanks for being patient.
Use the following aggregation pipeline:
var search = "apple",
pipeline = [
{
"$match": {
"Product.name": { "$regex": search, "$options": "i" }
}
},
{
"$unwind": "$Product"
},
{
"$match": {
"Product.name": { "$regex": search, "$options": "i" }
}
},
{
"$project": {
"Category": "$name",
"Product._id": 1,
"Product.name": 1
}
}
];
db.collection.aggregate(pipeline);
With the above sample document and a regex (case-insensitive) search for "apple" on the name field of the Product array, the above aggregation pipeline produces the result:
Output:
/* 1 */
{
"result" : [
{
"_id" : "5528cfd2e71144e020cb6494",
"Product" : {
"_id" : "552963ed63d867b81e18d357",
"name" : "Apple"
},
"Category" : "Fruits"
}
],
"ok" : 1
}

django-haystack autocomplete returns too wide results

I have created an Index with field title_auto:
class GameIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, model_attr='title')
title = indexes.CharField(model_attr='title')
title_auto = indexes.NgramField(model_attr='title')
Elastic search settings look like this:
ELASTICSEARCH_INDEX_SETTINGS = {
'settings': {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["haystack_ngram"],
"token_chars": ["letter", "digit"]
},
"edgengram_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["haystack_edgengram"]
}
},
"tokenizer": {
"haystack_ngram_tokenizer": {
"type": "nGram",
"min_gram": 1,
"max_gram": 15,
},
"haystack_edgengram_tokenizer": {
"type": "edgeNGram",
"min_gram": 1,
"max_gram": 15,
"side": "front"
}
},
"filter": {
"haystack_ngram": {
"type": "nGram",
"min_gram": 1,
"max_gram": 15
},
"haystack_edgengram": {
"type": "edgeNGram",
"min_gram": 1,
"max_gram": 15
}
}
}
}
}
I try to do autocomplete search, it works, however returns too many irrelevant results:
qs = SearchQuerySet().models(Game).autocomplete(title_auto=search_phrase)
OR
qs = SearchQuerySet().models(Game).filter(title_auto=search_phrase)
Both of them produce the same output.
If search_phrase is "monopoly", first results contain "Monopoly" in their titles, however, as there are only 2 relevant items, it returns 51. The others have nothing to do with "Monopoly" at all.
So my question is - how can I change relevance of the results?
It's hard to tell for sure since I haven't seen your full mapping, but I suspect the problem is that the analyzer (one of them) is being used for both indexing and searching. So when you index a document, lots of ngram terms get created and indexed. If you search and your search text is also analyzed the same way, lots of search terms get generated. Since your smallest ngram is a single letter, pretty much any query is going to match a lot of documents.
We wrote a blog post about using ngrams for autocomplete that you might find helpful, here: http://blog.qbox.io/multi-field-partial-word-autocomplete-in-elasticsearch-using-ngrams. But I'll give you a simpler example to illustrate what I mean. I'm not super familiar with haystack so I probably can't help you there, but I can explain the issue with ngrams in Elasticsearch.
First I'll set up an index that uses an ngram analyzer for both indexing and searching:
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 1,
"max_gram": 15,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"title": {
"type": "string",
"analyzer": "nGram_analyzer"
}
}
}
}
}
and add some docs:
PUT /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"title":"monopoly"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"title":"oligopoly"}
{"index":{"_index":"test_index","_type":"doc","_id":3}}
{"title":"plutocracy"}
{"index":{"_index":"test_index","_type":"doc","_id":4}}
{"title":"theocracy"}
{"index":{"_index":"test_index","_type":"doc","_id":5}}
{"title":"democracy"}
and run a simple match search for "poly":
POST /test_index/_search
{
"query": {
"match": {
"title": "poly"
}
}
}
it returns all five documents:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 4.729521,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "2",
"_score": 4.729521,
"_source": {
"title": "oligopoly"
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 4.3608603,
"_source": {
"title": "monopoly"
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "3",
"_score": 1.0197333,
"_source": {
"title": "plutocracy"
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "4",
"_score": 0.31496215,
"_source": {
"title": "theocracy"
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "5",
"_score": 0.31496215,
"_source": {
"title": "democracy"
}
}
]
}
}
This is because the search term "poly" gets tokenized into the terms "p", "o", "l", and "y", which, since the "title" field in each of the documents was tokenized into single-letter terms, matches every document.
If we rebuild the index with this mapping instead (same analyzer and docs):
"mappings": {
"doc": {
"properties": {
"title": {
"type": "string",
"index_analyzer": "nGram_analyzer",
"search_analyzer": "standard"
}
}
}
}
the query will return what we expect:
POST /test_index/_search
{
"query": {
"match": {
"title": "poly"
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1.5108256,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1.5108256,
"_source": {
"title": "monopoly"
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "2",
"_score": 1.5108256,
"_source": {
"title": "oligopoly"
}
}
]
}
}
Edge ngrams work similarly, except that only terms that start at the beginning of the words will be used.
Here is the code I used for this example:
http://sense.qbox.io/gist/b24cbc531b483650c085a42963a49d6a23fa5579
Unfortunately at this point in time there seems to be no way (apart from implementing a custom backend) to configure search analyzers and index analyzers through Django-Haystack separately.
In case Django-Haystack autocomplete returns too wide results you can make use of the score value provided with each search result to optimize the output.
if search_query != "":
# Use autocomplete query or filter
# with results_filtered being a SearchQuerySet()
results_filtered = results_filtered.filter(text=search_query)
#Remove objects with a low score
for result in results_filtered:
if result.score < SEARCH_SCORE_THRESHOLD:
results_filtered = results_filtered.exclude(id=result.id)
It worked reasonable well for me without having to define my own backend and scheme building.