elixir phoenix - protocol error during testing - unit-testing

I have a simple data:
{
"name" : "example task",
"description":"example description",
"type" : "task",
"details" : {
"__type__":"task",
"amount":20.00,
},
"publish" : true
}
When I test my API with postman, everything works. But when I write a unit test for controller, I got this error:
1) test create_task: assign task (OkBackendWeb.TaskControllerTest)
test/ok_backend_web/controllers/task_controller_test.exs:87
** (Protocol.UndefinedError) protocol Phoenix.Param not implemented for 20.0 of type Float. This protocol is implemented for the following type(s): Any, Atom, BitString, Integer, Map
code: conn = post(conn, Routes.item_path(conn, :create_all, #create_attrs))
stacktrace:
(phoenix 1.5.13) lib/phoenix/param.ex:121: Phoenix.Param.Any.to_param/1
(plug 1.12.1) lib/plug/conn/query.ex:275: Plug.Conn.Query.encode_value/2
(plug 1.12.1) lib/plug/conn/query.ex:246: Plug.Conn.Query.encode_pair/3
(plug 1.12.1) lib/plug/conn/query.ex:262: anonymous fn/3 in Plug.Conn.Query.encode_kv/3
(elixir 1.13.1) lib/enum.ex:1213: anonymous fn/3 in Enum.flat_map/2
(stdlib 3.14) maps.erl:233: :maps.fold_1/3
(elixir 1.13.1) lib/enum.ex:2408: Enum.flat_map/2
(plug 1.12.1) lib/plug/conn/query.ex:266: Plug.Conn.Query.encode_kv/3
(plug 1.12.1) lib/plug/conn/query.ex:262: anonymous fn/3 in Plug.Conn.Query.encode_kv/3
(elixir 1.13.1) lib/enum.ex:4086: Enum.flat_map_list/2
(elixir 1.13.1) lib/enum.ex:4087: Enum.flat_map_list/2
(plug 1.12.1) lib/plug/conn/query.ex:266: Plug.Conn.Query.encode_kv/3
(plug 1.12.1) lib/plug/conn/query.ex:204: Plug.Conn.Query.encode/2
(ok_backend 0.1.0) lib/ok_backend_web/router.ex:1: OkBackendWeb.Router.Helpers.segments/5
(ok_backend 0.1.0) lib/ok_backend_web/router.ex:1: OkBackendWeb.Router.Helpers.item_path/3
test/ok_backend_web/controllers/task_controller_test.exs:91: (test)
this is my test exs:
defmodule ExampleWeb.TaskControllerTest do
use ExampleWeb.ConnCase
alias Example.Users
alias Example.Items
#create_attrs %{
"name" => "milk with cow",
"description"=> "cow are cute.",
"type" => "task",
"details" => %{
"amount"=> 20.00,
},
"publish" => true
}
setup %{conn: conn} do
{:ok, conn: put_req_header(conn, "accept", "application/json")}
end
describe "create_task:" do
test "create task", %{conn: conn} do
%{ token: [ token ] } = get_poster_token(conn)
conn = post(conn, Routes.item_path(conn, :create_all, #create_attrs))
assert json_response(conn, 200)["status"] == 1
end
end
defp get_poster_token(conn) do
conn = post(conn, Routes.passwordless_path(conn, :user_auth, #user_poster))
token = get_resp_header(conn, "authorization")
%{token: token}
end
How can I resolve this?

You're passing the POST data as an argument to the routes function. It should be a separate argument to post/3:
post(conn, Routes.item_path(conn, :create_all), item: #create_attrs)

Related

DataStore - Data won't be synchronized. No GraphQL endpoint configured. Did you forget `Amplify.configure(awsconfig)`?

Please help, i spent 2 days trying simply to use Datastore of amplify. Am building NPM package to wrap an Amplify project backend. Auth works good but datastore not.
Am testing code with ts-node and Jest. It is package so i do not have a framework.
When i call the save function it does not work and throw this warning:
[WARN] 24:08.886 DataStore - Data won't be synchronized. No GraphQL endpoint configured. Did you forget `Amplify.configure(awsconfig)`? {
config: {
authProviders: undefined,
maxRecordsToSync: 10000,
syncPageSize: 1000
}
}
My api endpoint is 100% correct, also the api-key. The backend works in another project built with react. But in my case (NPM package) it does not work.
The script am trying to run:
import { DataStore } from '#aws-amplify/datastore';
import { AccountDetails } from '#/models';
import { SignUp } from "#/Authentication/SignUp";
import InvalidAccountRequirementsException from "#/Exceptions/InvalidAccountRequirementsException";
import "#/app.config";
/**
* #name CreateAccount
* #description:
* build the correct pattern to check specific string type.
*
* #type function
* #param email
* #param password
* #param hasOwnCompany
* #param optInMarketingEmail
* #param tacAgreed
* #constructor
* #return {Promise<AccountDetails>}
*/
const CreateAccount = async (
email: string,
password: string,
hasOwnCompany: boolean,
optInMarketingEmail: boolean,
tacAgreed: boolean,
): Promise<AccountDetails> => {
if (!tacAgreed) {
throw new InvalidAccountRequirementsException('Terms and conditions mus be agreed');
}
const user = await SignUp(email, password);
return await DataStore.save(
new AccountDetails({
cognitoSubId: user.userSub,
email: email,
optInMarketingEmail: optInMarketingEmail,
tacAgreed: tacAgreed,
hasOwnCompany: hasOwnCompany,
})
);
}
export default CreateAccount;
app.conf.ts
import Amplify from 'aws-amplify';
import awsExports from '#/aws-exports';
Amplify.configure(awsExports);
package.json
{
"name": "#financiallease/driver-seat-amplify-api",
"version": "1.0.0",
"description": "",
"author": "itsupport#financiallease.nl",
"main": "dist/financiallease.cjs.js",
"module": "dist/financiallease.es.js",
"browser": "dist/financiallease.js",
"license": "ISC",
"scripts": {
"build": "rollup -c",
"lint": "eslint --config eslint.config.js '{src,test}/**/*.ts' --no-ignore",
"autoformat": "eslint --config eslint.config.js '{src,test}/**/*.ts' --no-ignore --fix",
"test": "jest -c jest.config.ts",
"coverage": "jest --collectCoverage --coverageDirectory=\"./coverage\" --ci --reporters=default --reporters=jest-junit --watchAll=false",
"docs:generate": "typedoc --readme README.md --entryPoints src --entryPointStrategy expand --out docs --theme hierarchy --name \"Driver Seat Amplify Api - docs\" --includeVersion",
"amplify-modelgen": "node amplify/scripts/amplify-modelgen.js",
"amplify-push": "node amplify/scripts/amplify-push.js"
},
"publishConfig": {
"#financiallease:registry": "https://gitlab.com/api/v4/projects/35071033/packages/npm/"
},
"dependencies": {
"#aws-amplify/core": "^4.5.1",
"#aws-amplify/datastore": "^3.11.0",
"#rollup/plugin-alias": "^3.1.9",
"aws-amplify": "^4.3.19",
"dotenv": "^16.0.0",
"ts-node": "^10.7.0",
"typescript": "^4.6.3"
},
"devDependencies": {
"#babel/core": "^7.17.9",
"#babel/preset-env": "^7.16.11",
"#babel/preset-typescript": "^7.16.7",
"#rollup/plugin-babel": "^5.3.1",
"#rollup/plugin-commonjs": "^21.0.3",
"#rollup/plugin-node-resolve": "^13.1.3",
"#rollup/plugin-typescript": "^8.3.1",
"#types/amplify": "^1.1.25",
"#types/jest": "^27.4.1",
"#types/node": "^17.0.30",
"#typescript-eslint/eslint-plugin": "^5.18.0",
"#typescript-eslint/parser": "^5.18.0",
"babel-jest": "^28.0.3",
"babel-plugin-module-resolver": "^4.1.0",
"eslint": "^8.12.0",
"eslint-import-resolver-alias": "^1.1.2",
"eslint-plugin-react": "^7.29.4",
"esm": "^3.2.25",
"jest": "^27.5.1",
"jest-junit": "^13.2.0",
"jsdoc": "^3.6.10",
"nodemon": "^2.0.16",
"rollup": "^2.70.1",
"rollup-plugin-terser": "^7.0.2",
"ts-jest": "^27.1.4",
"tsconfig-paths": "^3.14.1",
"tslib": "^2.3.1",
"typedoc": "^0.22.15",
"typedoc-theme-hierarchy": "^1.1.1",
"ini": "^1.3.5",
"inquirer": "^6.5.1"
}
}
Any ideas?

Converting Set to List Yields Unpredictable Number of Elements

Terraform v0.13.7
provider registry.terraform.io/hashicorp/aws v2.70.0 / v3.59.0
provider registry.terraform.io/hashicorp/template v2.2.0
Hi,
I'm trying to move from version 2.70.0 to version 3.X of the AWS provider plug-in. That entails dealing with a change in the domain_validation_options attribute of aws_acm_certificate, which becomes a set rather than a list.
I have code managing a certificate with 3 SANs in addition to the main certificate name. With version 2.70.0 of the AWS provider plug-in, as expected, this produces a four-element list, as can be seen in this output from a terraform show -json planfile:
"domain_validation_options": [
{
"domain_name": "example.net",
"resource_record_name": "blah-1.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-1.acm-validations.aws."
},
{
"domain_name": "*.a.example.net",
"resource_record_name": "blah-2.a.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-2.acm-validations.aws."
},
{
"domain_name": "*.b.example.net",
"resource_record_name": "blah-3.b.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-3.acm-validations.aws."
},
{
"domain_name": "*.c.example.net",
"resource_record_name": "blah-4.c.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-4.acm-validations.aws."
}
],
Also as expected, I can address each one of these list elements by its index, e.g.:
aws_acm_certificate.sslcert.domain_validation_options[0]
When I install a 3.X version of the plug-in, however, a set is returned. I am trying to make things easy by converting it to a list with the tolist() function. That returns a lexicographically ordered list--which is expected.
"domain_validation_options": [
{
"domain_name": "*.a.example.net",
"resource_record_name": "blah-2.a.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-2.acm-validations.aws."
},
{
"domain_name": "*.b.example.net",
"resource_record_name": "blah-3.b.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-3.acm-validations.aws."
},
{
"domain_name": "*.c.example.net",
"resource_record_name": "blah-4.c.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-4.acm-validations.aws."
},
{
"domain_name": "example.net",
"resource_record_name": "blah-1.example.net.",
"resource_record_type": "CNAME",
"resource_record_value": "blah-1.acm-validations.aws."
},
],
What is unexpected is Terraform reports that it is a two-element rather than a four-element list. When I try to access what appears to be the last element in the second terraform show -json planfile output quoted above ("domain_name": "example.net"), it gives me the following error:
on common/certificate/certificate.tf line 14, in locals:
14: vopt = tolist(aws_acm_certificate.sslcert.domain_validation_options)[3]
aws_acm_certificate.sslcert.domain_validation_options is set of object with 2 elements
Can anyone help me understand why this is happening? And is there a more reliable way to inspect variables than browsing the output of terraform show -json planfile?
Thanks!
UPDATE
Sorry, I probably should have shared the code that is generating my problem. Here is an anonymized version of it.
Calling module:
module "sslcert-example_net" {
source = "./common/certificate"
name = "example.net"
SAN = [
"*.a.example.net",
"*.b.example.net",
"*.c.example.net",
]
}
Child module:
resource "aws_acm_certificate" "sslcert" {
domain_name = var.name
subject_alternative_names = var.SAN
validation_method = "DNS"
lifecycle {
create_before_destroy = true
}
}
locals {
vopt = tolist(aws_acm_certificate.sslcert.domain_validation_options)[3]
}
resource "ns1_record" "dnsvalidation-sslcert" {
depends_on = [aws_acm_certificate.sslcert]
zone = var.name
domain = substr(
local.vopt["resource_record_name"],
0,
length(local.vopt["resource_record_name"]) - 1,
)
type = "CNAME"
answers {
answer = local.vopt["resource_record_value"]
}
}

How to determine if an AWS s3 bucket has at least one public object?

Let's suppose that I have a bucket with many folders and objects.
This bucket has Objects can be public as policy access. If I want to know if there is at least one public object or list all public objects, how should I do this? Is there any way to do this automatically?
It appears that you would need to loop through every object and call GetObjectAcl().
You'd preferably do it in a programming language, but here is an example with the AWS CLI:
aws s3api get-object-acl --bucket my-bucket --key foo.txt
{
"Owner": {
"DisplayName": "...",
"ID": "..."
},
"Grants": [
{
"Grantee": {
"DisplayName": "...",
"ID": "...",
"Type": "CanonicalUser"
},
"Permission": "FULL_CONTROL"
},
{
"Grantee": {
"Type": "Group",
"URI": "http://acs.amazonaws.com/groups/global/AllUsers"
},
"Permission": "READ"
}
]
}
I granted the READ permission by using Make Public in the S3 management console. Please note that objects could also be made public via a Bucket Policy, which would not show up in the ACL.
Use this method from AWS SDK to do this with JavaScript:
https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#listObjects-property. There is an SDK for every major language like Java etc. Use the one that you know.
var params = {
Bucket: "examplebucket",
MaxKeys: 2
};
s3.listObjectsV2(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else {
// bucket isnt empty
if (data.length != 0)
console.log(data); // successful response
}
/*
data = {
Contents: [
{
ETag: "\"70ee1738b6b21e2c8a43f3a5ab0eee71\"",
Key: "happyface.jpg",
LastModified: <Date Representation>,
Size: 11,
StorageClass: "STANDARD"
},
{
ETag: "\"becf17f89c30367a9a44495d62ed521a-1\"",
Key: "test.jpg",
LastModified: <Date Representation>,
Size: 4192256,
StorageClass: "STANDARD"
}
],
IsTruncated: true,
KeyCount: 2,
MaxKeys: 2,
Name: "examplebucket",
NextContinuationToken: "1w41l63U0xa8q7smH50vCxyTQqdxo69O3EmK28Bi5PcROI4wI/EyIJg==",
Prefix: ""
}
*/
});```
Building off of John's answer, you might find this helpful:
import concurrent.futures
import boto3
BUCKETS = [
"TODO"
]
def get_num_objs(bucket):
num_objs = 0
s3_client = boto3.client("s3")
paginator = s3_client.get_paginator("list_objects_v2")
for res in paginator.paginate(
Bucket=bucket,
):
if "Contents" not in res:
print(f"""No contents in res={res}""")
continue
num_objs += len(res["Contents"])
return num_objs
for BUCKET in BUCKETS:
print(f"Analyzing bucket={BUCKET}...")
num_objs = get_num_objs(BUCKET)
print(f"BUCKET={BUCKET} has num_objs={num_objs}")
# if num_objs > 10_000:
# raise Exception(f"num_objs={num_objs}")
s3_client = boto3.client("s3")
def assert_no_public_obj(res):
if res["ResponseMetadata"]["HTTPStatusCode"] != 200:
raise Exception(res)
if "Contents" not in res:
print(f"""No contents in res={res}""")
return
print(f"""Fetched page with {len(res["Contents"])} objs...""")
for i, obj in enumerate(res["Contents"]):
if i % 100 == 0:
print(f"""Fetching {i}-th obj in page...""")
res = s3_client.get_object_acl(Bucket=BUCKET, Key=obj["Key"])
for grant in res["Grants"]:
# Amazon S3 considers a bucket or object ACL public if it grants any permissions to members of the predefined AllUsers or AuthenticatedUsers groups.
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-control-block-public-access.html#access-control-block-public-access-policy-status
uri = grant["Grantee"].get("URI")
if not uri:
continue
if "AllUsers" in uri or "AuthenticatedUsers" in uri:
raise Exception(f"""Grantee={grant["Grantee"]} found for {BUCKET}/{obj["Key"]}""")
paginator = s3_client.get_paginator("list_objects_v2")
with concurrent.futures.ThreadPoolExecutor() as executor:
for res in paginator.paginate(
Bucket=BUCKET,
):
executor.submit(assert_no_public_obj, res)

How to pass a list to a nested stack parameter in AWS CloudFormation?

Im using nested stack to create ELB and application stacks...And i need to pass list of subnets to ELB and Application stack...
And the main json has the below code...
"Mappings":{
"params":{
"Subnets": {
"dev":[
"subnet-1”,
"subnet-2”
],
"test":[
"subnet-3”,
"subnet-4”,
"subnet-5”,
"subnet-6”
],
"prod":[
"subnet-7”,
"subnet-8”,
"subnet-9”
]
}
}
},
"Parameters":{
"Environment":{
"AllowedValues":[
"prod",
"preprod",
"dev"
],
"Default":"prod",
"Description":"What environment type is it (prod, preprod, test, dev)?",
"Type":"String"
}
},
Resources:{
"ELBStack": {
"Type": "AWS::CloudFormation::Stack",
"Properties": {
"TemplateURL": {
"Fn::Join":[
"",
[
"https://s3.amazonaws.com/",
"myS3bucket",
"/ELB.json"
]
]
},
"Parameters": {
"Environment":{"Ref":"Environment"},
"ELBSHORTNAME":{"Ref":"ELBSHORTNAME"},
"Subnets":{"Fn::FindInMap":[
"params",
"Subnets",
{
"Ref":"Environment"
}
]},
"S3Bucket":{"Ref":"S3Bucket"},
},
"TimeoutInMinutes": "60"
}
}
now when i run this json using lambda or cloudformation i get the below error under cloudformation Events Tab....
CREATE_FAILED AWS::CloudFormation::Stack ELBStack Value of property Parameters must be an object with String (or simple type) properties
using below lambda
import boto3
import time
date = time.strftime("%Y%m%d")
time = time.strftime("%H%M%S")
stackname = 'FulfillSNSELB'
client = boto3.client('cloudformation')
response = client.create_stack(
StackName= (stackname + '-' + date + '-' + time),
TemplateURL='https://s3.amazonaws.com/****/**/myapp.json',
Parameters=[
{
'ParameterKey': 'Environment',
'ParameterValue': 'dev',
'UsePreviousValue': False
}]
)
def lambda_handler(event, context):
return(response)
You can't pass a list to a nested stack. You have to pass a concatenation of items with the intrinsic function Join like this: !Join ["separator", [item1, item2, …]].
In the nested stack, the type of the parameter needs to be List<Type>.
Your JSON is not well-formed. Running your JSON through aws cloudformation validate-template (or even jsonlint.com) quickly reveals several basic syntax errors:
Resources:{ requires the key to be surrounded by quotes: "Resources": {
Some of your quotation marks are invalid 'smart-quotes' "subnet-1”, that need to be replaced with standard ASCII quotes: "subnet-1",
(This is the one your error message refers to) The "Properties" object in your "ELBStack" resource "S3Object: {"Ref: "S3Bucket"}," has a trailing comma after its last element that needs to be removed: "S3Object: {"Ref: "S3Bucket"}"

ElasticSearch: Getting old visitor data into an index

I'm learning ElasticSearch in the hopes of dumping my business data into ES and viewing it with Kibana. After a week of various issues I finally have ES and Kibana working (1.7.0 and 4 respectively) on 2 Ubuntu 14.04 desktop machines (clustered).
The issue I'm having now is how to get the data into ES best. The data flow is that I capture the PHP global variables $_REQUEST and $_SERVER for each visit to text file with a unique ID. From there, if they fill in a form I capture that data in a text file also named with that unique ID in a different directory. Then my customers tell me if that form fill was any good with a delay of up to 50 days.
So I'm starting with the visitor data - $_REQUEST and $_SERVER. A lot of it is redundant so I'm really just attempting to capture the timestamp of their arrival, their IP, the IP of the server they visited, the domain they visited, the unique ID, and their User Agent. So I created this mapping:
time_date_mapping = { 'type': 'date_time' }
str_not_analyzed = { 'type': 'string'} # Originally this included 'index': 'not analyzed' as well
visit_mapping = {
'properties': {
'uniqID': str_not_analyzed,
'pages': str_not_analyzed,
'domain': str_not_analyzed,
'Srvr IP': str_not_analyzed,
'Visitor IP': str_not_analyzed,
'Agent': { 'type': 'string' },
'Referrer': { 'type': 'string' },
'Entrance Time': time_date_mapping, # Stored as a Unix timestamp
'Request Time': time_date_mapping, # Stored as a Unix timestamp
'Raw': { 'type': 'string', 'index': 'not_analyzed' },
},
}
I then enter it into ES with:
es.index(
index=Visit_to_ElasticSearch.INDEX,
doc_type=Visit_to_ElasticSearch.DOC_TYPE,
id=self.uniqID,
timestamp=int(math.floor(self._visit['Entrance Time'])),
body=visit
)
When I look at the data in the index on ES only Entrance Time, _id, _type, domain, and uniqID are indexed for searching (according to Kibana). All of the data is present in the document but most of the fields show "Unindexed fields can not be searched."
Additionally, I was attempting to just get a Pie chart of the Agents. But I couldn't figure out to get visualized because no matter what boxes I click on the Agent field is never an option for aggregation. Just mentioned it because it seems the fields which are indexed do show up.
I've attempting to mimic the mapping examples in the elasticsearch.py example which pulls in github. Can someone correct me on how I'm using that map?
Thanks
------------ Mapping -------------
{
"visits": {
"mappings": {
"visit": {
"properties": {
"Agent": {
"type": "string"
},
"Entrance Time": {
"type": "date",
"format": "dateOptionalTime"
},
"Raw": {
"properties": {
"Entrance Time": {
"type": "double"
},
"domain": {
"type": "string"
},
"uniqID": {
"type": "string"
}
}
},
"Referrer": {
"type": "string"
},
"Request Time": {
"type": "string"
},
"Srvr IP": {
"type": "string"
},
"Visitor IP": {
"type": "string"
},
"domain": {
"type": "string"
},
"uniqID": {
"type": "string"
}
}
}
}
}
}
------------- Update and New Mapping -----------
So I deleted the index and recreated it. The original index had some data in it from before I knew anything about mapping the data to specific field types. This seemed to fix the issue with only a few fields being indexed.
However, parts of my mapping appear to be ignored. Specifically the Agent string mapping:
visit_mapping = {
'properties': {
'uniqID': str_not_analyzed,
'pages': str_not_analyzed,
'domain': str_not_analyzed,
'Srvr IP': str_not_analyzed,
'Visitor IP': str_not_analyzed,
'Agent': { 'type': 'string', 'index': 'not_analyzed' },
'Referrer': { 'type': 'string' },
'Entrance Time': time_date_mapping,
'Request Time': time_date_mapping,
'Raw': { 'type': 'string', 'index': 'not_analyzed' },
},
}
Here's the output of http://localhost:9200/visits_test2/_mapping
{
"visits_test2": {
"mappings": {
"visit": {
"properties": {
"Agent":{"type":"string"},
"Entrance Time": {"type":"date","format":"dateOptionalTime"},
"Raw": {
"properties": {
"Entrance Time":{"type":"double"},
"domain":{"type":"string"},
"uniqID":{"type":"string"}
}
},
"Referrer":{"type":"string"},
"Request Time": {"type":"date","format":"dateOptionalTime"},
"Srvr IP":{"type":"string"},
"Visitor IP":{"type":"string"},
"domain":{"type":"string"},
"uniqID":{"type":"string"}
}
}
}
}
}
Note that I've used an entirely new index. The reason being that I wanted to make to sure nothing was carrying over from one to the next.
Note that I'm using the Python library elasticsearch.py and following their examples for mapping syntax.
--------- Python Code for Entering Data into ES, per comment request -----------
Below is a file name mapping.py, I have not yet fully commented the code since this was just code to test whether this method of data entry into ES was viable. If it is not self-explanatory, let me know and I'll add additional comments.
Note, I programmed in PHP for years before picking up Python. In order to get up and running faster with Python I created a couple of files with basic string and file manipulation functions and made them into a package. They are written in Python and meant to mimic the behavior of a built-in PHP function. So when you see a call to php_basic_* it is one of those functions.
# Standard Library Imports
import json, copy, datetime, time, enum, os, sys, numpy, math
from datetime import datetime
from enum import Enum, unique
from elasticsearch import Elasticsearch
# My Library
import basicconfig, mybasics
from mybasics.cBaseClass import BaseClass, BaseClassErrors
from mybasics.cHelpers import HandleErrors, LogLvl
# This imports several constants, a couple of functions, and a helper class
from basicconfig.startup_config import *
# Connect to ElasticSearch
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])
# Create mappings of a visit
time_date_mapping = { 'type': 'date_time' }
str_not_analyzed = { 'type': 'string'} # This originally included 'index': 'not_analyzed' as well
visit_mapping = {
'properties': {
'uniqID': str_not_analyzed,
'pages': str_not_analyzed,
'domain': str_not_analyzed,
'Srvr IP': str_not_analyzed,
'Visitor IP': str_not_analyzed,
'Agent': { 'type': 'string', 'index': 'not_analyzed' },
'Referrer': { 'type': 'string' },
'Entrance Time': time_date_mapping,
'Request Time': time_date_mapping,
'Raw': { 'type': 'string', 'index': 'not_analyzed' },
'Pages': { 'type': 'string', 'index': 'not_analyzed' },
},
}
class Visit_to_ElasticSearch(object):
"""
"""
INDEX = 'visits'
DOC_TYPE = 'visit'
def __init__(self, fname, index=True):
"""
"""
self._visit = json.loads(php_basic_files.file_get_contents(fname))
self._pages = self._visit.pop('pages')
self.uniqID = self._visit['uniqID']
self.domain = self._visit['domain']
self.entrance_time = self._convert_time(self._visit['Entrance Time'])
# Get a list of the page IDs
self.pages = self._pages.keys()
# Extra IPs and such from a single page
page = self._pages[self.pages[0]]
srvr = page['SERVER']
req = page['REQUEST']
self.visitor_ip = srvr['REMOTE_ADDR']
self.srvr_ip = srvr['SERVER_ADDR']
self.request_time = self._convert_time(srvr['REQUEST_TIME'])
self.agent = srvr['HTTP_USER_AGENT']
# Now go grab data that might not be there...
self._extract_optional()
if index is True:
self.index_with_elasticsearch()
def _convert_time(self, ts):
"""
"""
try:
dt = datetime.fromtimestamp(ts)
except TypeError:
dt = datetime.fromtimestamp(float(ts))
return dt.strftime('%Y-%m-%dT%H:%M:%S')
def _extract_optional(self):
"""
"""
self.referrer = ''
def index_with_elasticsearch(self):
"""
"""
visit = {
'uniqID': self.uniqID,
'pages': [],
'domain': self.domain,
'Srvr IP': self.srvr_ip,
'Visitor IP': self.visitor_ip,
'Agent': self.agent,
'Referrer': self.referrer,
'Entrance Time': self.entrance_time,
'Request Time': self.request_time,
'Raw': self._visit,
'Pages': php_basic_str.implode(', ', self.pages),
}
es.index(
index=Visit_to_ElasticSearch.INDEX,
doc_type=Visit_to_ElasticSearch.DOC_TYPE,
id=self.uniqID,
timestamp=int(math.floor(self._visit['Entrance Time'])),
body=visit
)
es.indices.create(
index=Visit_to_ElasticSearch.INDEX,
body={
'settings': {
'number_of_shards': 5,
'number_of_replicas': 1,
}
},
# ignore already existing index
ignore=400
)
In case it matters this is the simple loop I use to dump the data into ES:
for f in all_files:
try:
visit = mapping.Visit_to_ElasticSearch(f)
except IOError:
pass
where all_files is a list of all the visit files (full path) I have in my test data set.
Here is a sample visit file from a Google Bot visit:
{u'Entrance Time': 1407551587.7385,
u'domain': u'############',
u'pages': {u'6818555600ccd9880bf7acef228c5d47': {u'REQUEST': [],
u'SERVER': {u'DOCUMENT_ROOT': u'/var/www/####/',
u'Entrance Time': 1407551587.7385,
u'GATEWAY_INTERFACE': u'CGI/1.1',
u'HTTP_ACCEPT': u'*/*',
u'HTTP_ACCEPT_ENCODING': u'gzip,deflate',
u'HTTP_CONNECTION': u'Keep-alive',
u'HTTP_FROM': u'googlebot(at)googlebot.com',
u'HTTP_HOST': u'############',
u'HTTP_IF_MODIFIED_SINCE': u'Fri, 13 Jun 2014 20:26:33 GMT',
u'HTTP_USER_AGENT': u'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
u'PATH': u'/usr/local/bin:/usr/bin:/bin',
u'PHP_SELF': u'/index.php',
u'QUERY_STRING': u'',
u'REDIRECT_SCRIPT_URI': u'http://############/',
u'REDIRECT_SCRIPT_URL': u'############',
u'REDIRECT_STATUS': u'200',
u'REDIRECT_URL': u'############',
u'REMOTE_ADDR': u'############',
u'REMOTE_PORT': u'46271',
u'REQUEST_METHOD': u'GET',
u'REQUEST_TIME': u'1407551587',
u'REQUEST_URI': u'############',
u'SCRIPT_FILENAME': u'/var/www/PIAN/index.php',
u'SCRIPT_NAME': u'/index.php',
u'SCRIPT_URI': u'http://############/',
u'SCRIPT_URL': u'/############/',
u'SERVER_ADDR': u'############',
u'SERVER_ADMIN': u'admin#############',
u'SERVER_NAME': u'############',
u'SERVER_PORT': u'80',
u'SERVER_PROTOCOL': u'HTTP/1.1',
u'SERVER_SIGNATURE': u'<address>Apache/2.2.22 (Ubuntu) Server at ############ Port 80</address>\n',
u'SERVER_SOFTWARE': u'Apache/2.2.22 (Ubuntu)',
u'uniqID': u'bbc398716f4703cfabd761cc8d4101a1'},
u'SESSION': {u'Entrance Time': 1407551587.7385,
u'uniqID': u'bbc398716f4703cfabd761cc8d4101a1'}}},
u'uniqID': u'bbc398716f4703cfabd761cc8d4101a1'}
Now I understand better why the Raw field is an object instead of a simple string since it is assigned self._visit which in turn was initialized with json.loads(php_basic_files.file_get_contents(fname)).
Anyway, based on all the information you've given above, my take is that the mapping was never installed via put_mapping. From there on, there's no way anything else can work the way you like. I suggest you modify your code to install the mapping before you index your first visit document.