Related
I am trying to use extended grep to extract data from a JSON. The regex I use is functional on my regexr instance, but for some reason it doesn't work in bash.
I tried many things, notably the bare double dash and various minor edits to the regex for escaping.
#!/bin/bash
networks='{ "networks": [ { "admin_state_up": true, "availability_zone_hints": [], "availability_zones": [], "created_at": "2019-03-12T23:45:13Z", "description": "", "id": "7188504a-72cb-4590-a9b0-414732017837", "ipv4_address_scope": null, "ipv6_address_scope": null, "is_default": false, "mtu": 1450, "name": "BLUE", "port_security_enabled": true, "project_id": "187d635aec4c43fe8e8918afb3a5c82e", "provider:network_type": "vxlan", "provider:physical_network": null, "provider:segmentation_id": 86, "revision_number": 2, "router:external": false, "shared": false, "status": "ACTIVE", "subnets": [], "tags": [], "tenant_id": "187d635aec4c43fe8e8918afb3a5c82e", "updated_at": "2019-03-12T23:45:13Z" }, { "admin_state_up": true, "availability_zone_hints": [], "availability_zones": [], "created_at": "2019-03-12T23:45:13Z", "description": "", "id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae", "ipv4_address_scope": null, "ipv6_address_scope": null, "is_default": false, "mtu": 1450, "name": "RED", "port_security_enabled": true, "project_id": "187d635aec4c43fe8e8918afb3a5c82e", "provider:network_type": "vxlan", "provider:physical_network": null, "provider:segmentation_id": 108, "revision_number": 2, "router:external": false, "shared": false, "status": "ACTIVE", "subnets": [], "tags": [], "tenant_id": "187d635aec4c43fe8e8918afb3a5c82e", "updated_at": "2019-03-12T23:45:13Z" }, { "admin_state_up": true, "availability_zone_hints": [], "availability_zones": [], "created_at": "2019-03-12T23:45:13Z", "description": "", "id": "1eb6647e-869e-4e83-9468-43e2c320bccc", "ipv4_address_scope": null, "ipv6_address_scope": null, "is_default": false, "mtu": 1450, "name": "public", "port_security_enabled": true, "project_id": "187d635aec4c43fe8e8918afb3a5c82e", "provider:network_type": "vxlan", "provider:physical_network": null, "provider:segmentation_id": 32, "revision_number": 2, "router:external": false, "shared": false, "status": "ACTIVE", "subnets": [], "tags": [], "tenant_id": "187d635aec4c43fe8e8918afb3a5c82e", "updated_at": "2019-03-12T23:45:13Z" } ] }'
result=`echo $networks | grep -oE '"(id|name)": "([\w+-]+)"'`
echo $result
The aforementioned code doesn't work but if I switch to the following regex, it works. I just need to add extraction for id field too to be able to extract ids and names using \2 back reference (group 2)
grep -oE '"(id|name)": "(\w+)"'
Can you help me understand why the script doesn't work?
Full formatted JSON
{
"networks": [{
"admin_state_up": true,
"availability_zone_hints": [],
"availability_zones": [],
"created_at": "2019-03-12T23:45:13Z",
"description": "",
"id": "7188504a-72cb-4590-a9b0-414732017837",
"ipv4_address_scope": null,
"ipv6_address_scope": null,
"is_default": false,
"mtu": 1450,
"name": "BLUE",
"port_security_enabled": true,
"project_id": "187d635aec4c43fe8e8918afb3a5c82e",
"provider:network_type": "vxlan",
"provider:physical_network": null,
"provider:segmentation_id": 86,
"revision_number": 2,
"router:external": false,
"shared": false,
"status": "ACTIVE",
"subnets": [],
"tags": [],
"tenant_id": "187d635aec4c43fe8e8918afb3a5c82e",
"updated_at": "2019-03-12T23:45:13Z"
}, {
"admin_state_up": true,
"availability_zone_hints": [],
"availability_zones": [],
"created_at": "2019-03-12T23:45:13Z",
"description": "",
"id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae",
"ipv4_address_scope": null,
"ipv6_address_scope": null,
"is_default": false,
"mtu": 1450,
"name": "RED",
"port_security_enabled": true,
"project_id": "187d635aec4c43fe8e8918afb3a5c82e",
"provider:network_type": "vxlan",
"provider:physical_network": null,
"provider:segmentation_id": 108,
"revision_number": 2,
"router:external": false,
"shared": false,
"status": "ACTIVE",
"subnets": [],
"tags": [],
"tenant_id": "187d635aec4c43fe8e8918afb3a5c82e",
"updated_at": "2019-03-12T23:45:13Z"
}, {
"admin_state_up": true,
"availability_zone_hints": [],
"availability_zones": [],
"created_at": "2019-03-12T23:45:13Z",
"description": "",
"id": "1eb6647e-869e-4e83-9468-43e2c320bccc",
"ipv4_address_scope": null,
"ipv6_address_scope": null,
"is_default": false,
"mtu": 1450,
"name": "public",
"port_security_enabled": true,
"project_id": "187d635aec4c43fe8e8918afb3a5c82e",
"provider:network_type": "vxlan",
"provider:physical_network": null,
"provider:segmentation_id": 32,
"revision_number": 2,
"router:external": false,
"shared": false,
"status": "ACTIVE",
"subnets": [],
"tags": [],
"tenant_id": "187d635aec4c43fe8e8918afb3a5c82e",
"updated_at": "2019-03-12T23:45:13Z"
}]
}
According to man grep:
The Backslash Character and Special Expressions
The symbol \w is a synonym for [[:alnum:]] and \W is a synonym for [^[:alnum:]]. ... A bracket expression is a list of characters enclosed by [ and ]. ... To include a literal ] place it first in the list. Similarly, to include a literal ^ place it anywhere but first. Finally, to include a literal - place it last.
Basically, \w is literally replaced by those characters when evaluated, giving you "([[[:alnum:]]+-]+)", which in a US standard locale gives you "([[a-zA-Z0-9]+-]+)".
Since a bracket expression is truncated by the first ] it sees (unless it is the first element of a bracket expression), the group is only [[[:alnum:]]+, or "1 or more of a digit, letter, and [. This expression is followed by -]+, meaning "exactly one hyphen and one or more ]". This is obviously pretty terrible.
If you try
echo $networks | grep -oE '"(id|name)": "([[:alnum:]+-]+)"'
I.e., \w without the outer bracket expression, the relevant part means "a group (surrounded by ") comprised of one or more digits, letters, hyphens, and plus signs", which outputs:
"id": "7188504a-72cb-4590-a9b0-414732017837"
"name": "BLUE"
"id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae"
"name": "RED"
"id": "1eb6647e-869e-4e83-9468-43e2c320bccc"
"name": "public"
Using PERL (-P) instead of Extended (-E) regexp, looks like the \w is interpreted as expected, without escaping issue: note the -oP
result=$( echo $networks | grep -oP '"(id|name)": "([\w+-]+)"' ) ;
echo $result
"id": "7188504a-72cb-4590-a9b0-414732017837" "name": "BLUE" "id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae" "name": "RED" "id": "1eb6647e-869e-4e83-9468-43e2c320bccc" "name": "public"
As a workaround (it does not resolve the "escaping \w issue)
result=$( echo $networks | grep -oE '"(id|name)": "([a-zA-Z_+-]+)"' ) ;
echo $result
Prints me:
"name": "BLUE" "name": "RED" "name": "public"
Note: prefer using $( ) syntax to execute sub shells rather than the backtick.
I have a model UserB2b extended from User. I have enabled authorization using server.authEnabled in the boot script so that all requests pass through the auth middleware.
Problem: Whenever I have an non-numeric username in my UserB2b model, I get a NaN userId from AccessToken.findForRequest in loopback/common/model/access-token.js.
Here are my models.
AccessToken
{
"name": "AccessToken",
"base": "AccessToken",
"idInjection": false,
"options": {
"validateUpsert": true
},
"mysql": {
"table": "AccessToken"
},
"properties": {
"ttl": {
"type": "Number",
"required": false,
"length": null,
"precision": 10,
"scale": 0,
"mysql": {
"columnName": "ttl",
"dataType": "int",
"dataLength": null,
"dataPrecision": 10,
"dataScale": 0,
"nullable": "Y"
},
"_selectable": true
},
"created": {
"type": "Date",
"required": false,
"length": null,
"precision": null,
"scale": null,
"mysql": {
"columnName": "created",
"dataType": "datetime",
"dataLength": null,
"dataPrecision": null,
"dataScale": null,
"nullable": "Y"
},
"_selectable": true
},
"userId": {
"type": "String",
"required": false,
"length": null,
"precision": 10,
"scale": 0,
"mysql": {
"columnName": "userId",
"dataType": "varchar",
"dataLength": null,
"dataPrecision": 10,
"dataScale": 0,
"nullable": "Y"
},
"_selectable": true
},
"id": {
"type": "String",
"id": true,
"required": false,
"length": 255,
"precision": null,
"scale": null,
"mysql": {
"columnName": "id",
"dataType": "varchar",
"dataLength": 255,
"dataPrecision": null,
"dataScale": null,
"nullable": "N"
},
"_selectable": false
},
"model": {
"type": "string",
"id": false,
"length": 100,
"precision": null,
"scale": null,
"mysql": {
"columnName": "model",
"dataType": "varchar",
"dataLength": 100,
"dataPrecision": null,
"dataScale": null,
"nullable": "N"
},
"_selectable": false
}
},
"validations": [],
"relations": {
"userB2B":{
"type":"belongsTo",
"model":"UserB2b",
"foreignKey":"userId"
}
},
"acls": [
{
"accessType": "*",
"principalType": "ROLE",
"principalId": "$everyone",
"permission": "DENY"
},
{
"accessType": "*",
"principalType": "ROLE",
"principalId":"$authenticated",
"permission": "ALLOW"
}
],
"methods": {}
}
UserB2b
{
"name": "UserB2b",
"base": "User",
"idInjection": false,
"options": {
"validateUpsert": true
},
"mysql": {
"table": "user_b2b"
},
"scope": {
"where": {
"deleted": 0
}
},
"properties": {
"username": {
"type": "String",
"id": true,
"required": true,
"length": 255,
"precision": null,
"scale": null,
"mysql": {
"columnName": "username",
"dataType": "varchar",
"dataLength": 255,
"dataPrecision": null,
"dataScale": null,
"nullable": "N"
},
"_selectable": false
},
"password": {
"type": "String",
"required": true,
"length": 1000,
"precision": null,
"scale": null,
"mysql": {
"columnName": "password",
"dataType": "varchar",
"dataLength": 1000,
"dataPrecision": null,
"dataScale": null,
"nullable": "N"
},
"_selectable": false
},
"name": {
"type": "String",
"required": true,
"length": 255,
"precision": null,
"scale": null,
"mysql": {
"columnName": "name",
"dataType": "varchar",
"dataLength": 255,
"dataPrecision": null,
"dataScale": null,
"nullable": "N"
},
"_selectable": false
},
"contactNumber": {
"type": "String",
"required": true,
"length": 255,
"precision": null,
"scale": null,
"mysql": {
"columnName": "contact_number",
"dataType": "varchar",
"dataLength": 255,
"dataPrecision": null,
"dataScale": null,
"nullable": "N"
},
"_selectable": false
},
"createdAt": {
"type": "Date",
"required": false,
"length": null,
"precision": null,
"scale": null,
"mysql": {
"columnName": "created_at",
"dataType": "timestamp",
"dataLength": null,
"dataPrecision": null,
"dataScale": null,
"nullable": "Y"
},
"_selectable": true
},
"updatedAt": {
"type": "Date",
"required": false,
"length": null,
"precision": null,
"scale": null,
"mysql": {
"columnName": "updated_at",
"dataType": "timestamp",
"dataLength": null,
"dataPrecision": null,
"dataScale": null,
"nullable": "Y"
},
"_selectable": true
},
"deleted": {
"type": "Number",
"required": false,
"length": null,
"precision": 3,
"scale": 0,
"mysql": {
"columnName": "deleted",
"dataType": "tinyint",
"dataLength": null,
"dataPrecision": 3,
"dataScale": 0,
"nullable": "Y"
},
"_selectable": true
},
"email": false,
"created": false,
"lastUpdated": false,
"credentials": false,
"challenges": false,
"status": false,
"verificationToken": false,
"realm": false,
"emailVerified": false
},
"validations": [],
"relations": {
"accessTokens": {
"type": "hasMany",
"model": "Accesstoken",
"foreignKey": "userName"
}
},
"methods": {}
}
Here is the raw packet that I get from the debugger
<-- RowDataPacket
RowDataPacket {
id: 'BJVqc7CtRXzPVVtTyJqmUabijgLRRHBe3sqcCh0bh5NuGBIZCnY8nwLIGeB6dILv',
ttl: 12096000,
created: Tue May 24 2016 05:44:18 GMT+0000 (UTC),
model: 'UserB2b',
userId: 'max1' }
and the query that loopback fires after that is
SELECT `username`,`password`,`name`,`contact_number`,`created_at`,`updated_at`,`deleted` FROM `user_b2b` WHERE (`username`=?) AND (`deleted`=?) ORDER BY `username` LIMIT 1, params: ["NaN",0]
Is there something that needs to be corrected here ?
it seems something wrong here in your access token:
"userId": {
"type": "String",
"required": false,
"length": null,
"precision": 10,
"scale": 0,
"mysql": {
"columnName": "userId",
"dataType": "int",
The type you are using int and you are passing as varchar item and this will throw exception that string is not a number(NAN).
Either change the target type to string/varchar or change the source data type that you are using that matched with target.
Hope this will help you.
I do the following in ColdFusion script:
svc = new http();
svc.setMethod('post');
svc.setCharset('utf-8');
svc.setUrl('https://api.stripe.com/v1/charges');
svc.addParam(type='header', name='Authorization', value='Bearer #Stripe.mySecretKey#');
svc.addParam(type='formfield', name='amount', value=form.amount);
svc.addParam(type='formfield', name='currency', value='usd');
svc.addParam(type='formfield', name='card', value='#form.card#');
svc.addParam(type='formfield', name='description', value='#form.email#');
prefix = svc.send().getPrefix();
response = prefix.FileContent;
WriteOutput(response & '<br>');
WriteOutput(IsSimpleValue(response) & '<br>');
WriteOutput(IsJSON(response));
dump(response);
And I'm getting a string that looks like this:
{ "id": "ch_6HAwRK92OsQPoA", "object": "charge", "created": 1432149035, "livemode": false, "paid": true, "status": "paid", "amount": 100, "currency": "usd", "refunded": false, "source": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "captured": true, "card": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "balance_transaction": "txn_6HAw4bAUUZ6trA", "failure_message": null, "failure_code": null, "amount_refunded": 0, "customer": null, "invoice": null, "description": "PhillipSenn#gmail.com", "dispute": null, "metadata": {}, "statement_descriptor": null, "fraud_details": {}, "receipt_email": null, "receipt_number": null, "shipping": null, "destination": null, "application_fee": null, "refunds": { "object": "list", "total_count": 0, "has_more": false, "url": "/v1/charges/ch_6HAwRK92OsQPoA/refunds", "data": [] } }
As well as "YES" and "YES".
If I do a DeserializeJSON(response), then I get a 500 error.
So, how do I put this simple value string, which looks like json, in a ColdFusion structure so that I can reference response.id and response.paid?
Something's wrong in your CF or web server setup.
I tested your json with CF11 on tryCF and it works.
<cfscript>
json = '{ "id": "ch_6HAwRK92OsQPoA", "object": "charge", "created": 1432149035, "livemode": false, "paid": true, "status": "paid", "amount": 100, "currency": "usd", "refunded": false, "source": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "captured": true, "card": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "balance_transaction": "txn_6HAw4bAUUZ6trA", "failure_message": null, "failure_code": null, "amount_refunded": 0, "customer": null, "invoice": null, "description": "PhillipSenn#gmail.com", "dispute": null, "metadata": {}, "statement_descriptor": null, "fraud_details": {}, "receipt_email": null, "receipt_number": null, "shipping": null, "destination": null, "application_fee": null, "refunds": { "object": "list", "total_count": 0, "has_more": false, "url": "/v1/charges/ch_6HAwRK92OsQPoA/refunds", "data": [] } }';
writeOutput(isJSon(json));
writeDump(DeserializeJSON(json));
</cfscript>
Run: http://trycf.com/gist/cb798ff697ac80396284/acf11
I have a Tweepy Stream Api json file that is included 33K tweets. All of them have non-whitespace space that I couldn't find solution. If you use copy and past it to pro.jsonlint.com, you can see that line 217 has "EOF" problem. I can fix them using notepad++ but first I need to understand that what is the problem and how I can fix it?
If somebody can tell me where is the EOF problem and how fix it with notepad++.
Thanks,
{
"created_at": "Tue Mar 31 20:50:08 +0000 2015",
"id": 583008398612029440,
"id_str": "583008398612029440",
"text": "RT #kamalakmustafa: Hain bir sald\u0131r\u0131 sonucu \u015fehit d\u00fc\u015fen Savc\u0131m\u0131z #MehmetSelimKiraz 'a Allah'tan rahmet, ailesine sab\u0131rlar diliyorum. Millet\u2026",
"source": "\u003ca href=\"http:\/\/www.twitter.com\" rel=\"nofollow\"\u003eTwitter for Windows Phone\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1337409816,
"id_str": "1337409816",
"name": "Atakan \u00c7etin",
"screen_name": "BrownnChild",
"location": "",
"url": null,
"description": "Bir nefesine bile h\u00fckmedemedi\u011fimiz bu d\u00fcnya i\u00e7in boyun mu b\u00fckece\u011fiz? M\u0130LL\u0130 G\u00d6R\u00dc\u015e!",
"protected": false,
"verified": false,
"followers_count": 246,
"friends_count": 116,
"listed_count": 0,
"favourites_count": 1077,
"statuses_count": 2613,
"created_at": "Mon Apr 08 19:31:06 +0000 2013",
"utc_offset": 10800,
"time_zone": "Athens",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "131516",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_tile": true,
"profile_link_color": "009999",
"profile_sidebar_border_color": "EEEEEE",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/579347764670296064\/6yuzFJPG_normal.jpg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/579347764670296064\/6yuzFJPG_normal.jpg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1337409816\/1427653680",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Tue Mar 31 20:38:15 +0000 2015",
"id": 583005408828252160,
"id_str": "583005408828252160",
"text": "Hain bir sald\u0131r\u0131 sonucu \u015fehit d\u00fc\u015fen Savc\u0131m\u0131z #MehmetSelimKiraz 'a Allah'tan rahmet, ailesine sab\u0131rlar diliyorum. Milletimizin ba\u015f\u0131 sa\u011folsun.",
"source": "\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 338288811,
"id_str": "338288811",
"name": "Mustafa Kamalak",
"screen_name": "kamalakmustafa",
"location": "Ankara, T\u00fcrkiye",
"url": "http:\/\/www.saadet.org.tr\/kisi\/mustafa-kamalak",
"description": "Saadet Partisi Genel Ba\u015fkan\u0131\n Prof. Dr. Mustafa Kamalak",
"protected": false,
"verified": false,
"followers_count": 52358,
"friends_count": 2,
"listed_count": 163,
"favourites_count": 0,
"statuses_count": 1574,
"created_at": "Tue Jul 19 10:48:16 +0000 2011",
"utc_offset": 10800,
"time_zone": "Istanbul",
"geo_enabled": false,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "FFFFFF",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/378800000181217982\/FBktDfqo.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/378800000181217982\/FBktDfqo.jpeg",
"profile_background_tile": false,
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "000000",
"profile_sidebar_fill_color": "DDFFCC",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/436119860225908737\/76GmiIPj_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/436119860225908737\/76GmiIPj_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/338288811\/1423922000",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 122,
"favorite_count": 152,
"entities": {
"hashtags": [
{
"text": "MehmetSelimKiraz",
"indices": [
45,
62
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr"
},
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "MehmetSelimKiraz",
"indices": [
65,
82
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
{
"screen_name": "kamalakmustafa",
"name": "Mustafa Kamalak",
"id": 338288811,
"id_str": "338288811",
"indices": [
3,
18
]
}
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr",
"timestamp_ms": "1427835008658"
}
{
"created_at": "Tue Mar 31 20:50:08 +0000 2015",
"id": 583008398649769984,
"id_str": "583008398649769984",
"text": "RT #BekirDeveli: #MehmetSelimKiraz",
"source": "\u003ca href=\"http:\/\/twitterhizmetcisi.com\" rel=\"nofollow\"\u003eTranslation Mobile\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1284184062,
"id_str": "1284184062",
"name": "K\u00fcrsad Celik",
"screen_name": "KrsadC",
"location": "Istanbul-Den\u0131zl\u0131",
"url": null,
"description": null,
"protected": false,
"verified": false,
"followers_count": 166,
"friends_count": 452,
"listed_count": 1,
"favourites_count": 1892,
"statuses_count": 2723,
"created_at": "Wed Mar 20 20:23:46 +0000 2013",
"utc_offset": 10800,
"time_zone": "Athens",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "FFF04D",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/378800000168605771\/dgYo8Miy.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/378800000168605771\/dgYo8Miy.jpeg",
"profile_background_tile": true,
"profile_link_color": "0099CC",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "F6FFD1",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/437899583348572160\/FamxSRwB_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/437899583348572160\/FamxSRwB_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1284184062\/1393238552",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Tue Mar 31 20:39:47 +0000 2015",
"id": 583005791340404737,
"id_str": "583005791340404737",
"text": "#MehmetSelimKiraz",
"source": "\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 607600160,
"id_str": "607600160",
"name": "Bekir Develi",
"screen_name": "BekirDeveli",
"location": "\u0130stanbul\/Turkey",
"url": "http:\/\/www.bekirdeveli.com",
"description": null,
"protected": false,
"verified": false,
"followers_count": 225574,
"friends_count": 2{
"created_at": "Tue Mar 31 20:50:14 +0000 2015",
"id": 583008421152198656,
"id_str": "583008421152198656",
"text": "#mehmetselimkiraz https:\/\/t.co\/ly9MROZ6Yg",
"source": "\u003ca href=\"http:\/\/instagram.com\" rel=\"nofollow\"\u003eInstagram\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 110186170,
"id_str": "110186170",
"name": "\u015eeyma Ceylan",
"screen_name": "cylnm",
"location": "ceylan",
"url": null,
"description": "\u0648\u0642\u062f\u0645 \u0643\u0644 \u0627\u0644\u0627\u0646\u0632\u0639\u0627\u062c \u0631\u0627\u062d\u0629 \u0627\u0644\u0644\u0647I http:\/\/Instagram.com\/seymaceylan_",
"protected": false,
"verified": false,
"followers_count": 1759,
"friends_count": 780,
"listed_count": 15,
"favourites_count": 7472,
"statuses_count": 9036,
"created_at": "Sun Jan 31 17:13:48 +0000 2010",
"utc_offset": -36000,
"time_zone": "Hawaii",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "FF6699",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/866467818\/24dcf5a87af2b92f9ff76fead916e4ef.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/866467818\/24dcf5a87af2b92f9ff76fead916e4ef.jpeg",
"profile_background_tile": true,
"profile_link_color": "9266CC",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "E5507E",
"profile_text_color": "362720",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/574664598248419329\/Budj4Oq3_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/574664598248419329\/Budj4Oq3_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/110186170\/1410798690",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "mehmetselimkiraz",
"indices": [
0,
17
]
}
],
"trends": [
],
"urls": [
{
"url": "https:\/\/t.co\/ly9MROZ6Yg",
"expanded_url": "https:\/\/instagram.com\/p\/05-6PQPQ-j\/",
"display_url": "instagram.com\/p\/05-6PQPQ-j\/",
"indices": [
18,
41
]
}
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": true,
"filter_level": "low",
"lang": "und",
"timestamp_ms": "1427835014032"
}
You need to match individual JSON objects, and I think you can just match them and copy wherever you need using this regex:
\{(?:[^{}]+|(?0))*\}
It works in Notepad++ 6.7.5, and it will select the text from the first { up to the corresponding closing }.
I am using Python to stream Twitter's Tweets via API. For example, the word "car" generates the following results:
{
"created_at": "Fri Sep 05 00:15:32 +0000 2014",
"id": 507683414255108096,
"id_str": "507683414255108096",
"text": "I put 'or nah' in my cousins car and Brenda & I are singing along a",
"source": "\u003ca href=\"http:\/\/www.cloudhopper.com\/\" rel=\"nofollow\"\u003eCloudhopper\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 84729292,
"id_str": "84729292",
"name": "Tracy Ochoa",
"screen_name": "TracyMayy",
"location": "",
"url": "http:\/\/whythefuckinfucknot.tumblr.com",
"description": "New York 16 Instagram - TracyMayy Tumblr - http:\/\/whythefuckinfucknot.tumblr.com http:\/\/ask.fm\/tracyochoa",
"protected": false,
"verified": false,
"followers_count": 1045,
"friends_count": 453,
"listed_count": 22,
"favourites_count": 46035,
"statuses_count": 44720,
"created_at": "Sat Oct 24 00:42:23 +0000 2009",
"utc_offset": -14400,
"time_zone": "Eastern Time (US & Canada)",
"geo_enabled": true,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "000000",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/497215144607236096\/AharMORU.png",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/497215144607236096\/AharMORU.png",
"profile_background_tile": true,
"profile_link_color": "000000",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "09B6D9",
"profile_text_color": "050505",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/504330955637919745\/JAHlbkiS_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/504330955637919745\/JAHlbkiS_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/84729292\/1409681919",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [],
"trends": [],
"urls": [],
"user_mentions": [],
"symbols": []
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "medium",
"lang": "en",
"timestamp_ms": "1409876132921"
}
It looks to me the Twitter user who wrote this tweet has an id "507683414255108096", is there a way to export Twitter's API Tweets with the username of the actual Twitter users who write the corresponding Tweets?
If not through API, do I need to actually follow the people to get streams of Tweets with usernames? Or is there another way around?