Twitter streaming API, where to find originator's name? - python-2.7

I am using Python to stream Twitter's Tweets via API. For example, the word "car" generates the following results:
{
"created_at": "Fri Sep 05 00:15:32 +0000 2014",
"id": 507683414255108096,
"id_str": "507683414255108096",
"text": "I put 'or nah' in my cousins car and Brenda & I are singing along a",
"source": "\u003ca href=\"http:\/\/www.cloudhopper.com\/\" rel=\"nofollow\"\u003eCloudhopper\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 84729292,
"id_str": "84729292",
"name": "Tracy Ochoa",
"screen_name": "TracyMayy",
"location": "",
"url": "http:\/\/whythefuckinfucknot.tumblr.com",
"description": "New York 16 Instagram - TracyMayy Tumblr - http:\/\/whythefuckinfucknot.tumblr.com http:\/\/ask.fm\/tracyochoa",
"protected": false,
"verified": false,
"followers_count": 1045,
"friends_count": 453,
"listed_count": 22,
"favourites_count": 46035,
"statuses_count": 44720,
"created_at": "Sat Oct 24 00:42:23 +0000 2009",
"utc_offset": -14400,
"time_zone": "Eastern Time (US & Canada)",
"geo_enabled": true,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "000000",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/497215144607236096\/AharMORU.png",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/497215144607236096\/AharMORU.png",
"profile_background_tile": true,
"profile_link_color": "000000",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "09B6D9",
"profile_text_color": "050505",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/504330955637919745\/JAHlbkiS_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/504330955637919745\/JAHlbkiS_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/84729292\/1409681919",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [],
"trends": [],
"urls": [],
"user_mentions": [],
"symbols": []
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "medium",
"lang": "en",
"timestamp_ms": "1409876132921"
}
It looks to me the Twitter user who wrote this tweet has an id "507683414255108096", is there a way to export Twitter's API Tweets with the username of the actual Twitter users who write the corresponding Tweets?
If not through API, do I need to actually follow the people to get streams of Tweets with usernames? Or is there another way around?

Related

Django Framework Rest match calendar users with current user

I am doing an exercise where the goal it's to match my current calendar with other users.
To do this, I created a UserProfile App and Schedule App. Each user has a profile that can have multiple intervals.
Considering my current calendar:
{
"count": 1,
"next": null,
"previous": null,
"results": [
{
"id": 3,
"user": {
"id": 3,
"username": "john.doe",
"first_name": "John",
"last_name": "Doe"
},
"calendar": [
{
"id": 1,
"mon": true,
"tue": true,
"wed": true,
"thu": true,
"fri": true,
"sat": true,
"sun": true,
"start_date": "09:30",
"end_date": "12:20"
},
{
"id": 2,
"mon": true,
"tue": true,
"wed": true,
"thu": true,
"fri": true,
"sat": true,
"sun": true,
"start_date": "14:00",
"end_date": "23:00"
}
]
}
]}
When I am doing a call to the endpoint /api/search/users it returns all User Profiles with info from each user.
example:
{
"count": 99,
"next": "http://localhost:8000/api/search/users?page=2",
"previous": null,
"results": [
{
"id": 1,
"user": {
"id": 1,
"username": "john.bender.99",
"first_name": "John",
"last_name": "Bender"
},
"calendar": [
{
"id": 2,
"mon": true,
"tue": true,
"wed": true,
"thu": false,
"fri": true,
"sat": false,
"sun": false,
"start_date": "09:30",
"end_date": "12:20"
},
{
"id": 55,
"mon": false,
"tue": true,
"wed": true,
"thu": false,
"fri": true,
"sat": false,
"sun": false,
"start_date": "14:30",
"end_date": "19:20"
}
]
}
]}
Now, what I want to do actually is a search for related users with my calendar to know what days/hours we have a match.
When I do a call to this endpoint /api/search/users?related=self, I want to see this
{
"count": 2,
"results": [
{
"id": 87,
"user": {
"id": 87,
"username": "diana.taller",
"first_name": "Diana",
"last_name": "Taller"
},
"calendar": [
{
"id": 2,
"mon": true,
"tue": true,
"wed": true,
"thu": false,
"fri": true,
"sat": false,
"sun": false,
"start_date": "10:30",
"end_date": "11:20"
},
{
"id": 55,
"mon": false,
"tue": true,
"wed": true,
"thu": false,
"fri": true,
"sat": false,
"sun": false,
"start_date": "16:30",
"end_date": "17:20"
}
]
},{
"id": 128,
"user": {
"id": 128,
"username": "therockjosh",
"first_name": "Josh",
"last_name": "Bail"
},
"calendar": [
{
"id": 2,
"mon": false,
"tue": false,
"wed": false,
"thu": false,
"fri": true,
"sat": false,
"sun": false,
"start_date": "10:30",
"end_date": "11:20"
},
{
"id": 55,
"mon": false,
"tue": false,
"wed": false,
"thu": false,
"fri": true,
"sat": true,
"sun": true,
"start_date": "14:30",
"end_date": "17:00"
}
]
}
]}
The interception between my availability and from users is done between per day and then each interval to see when we have a match.
Inside my Search App, I created this
if related == "self":
current_user_profile = UserProfile.objects.filter(user=self.request.user)
related_users = UserProfile.objects.filter(calendar__in=current_user_profile.calendar.all())
return related_users
If I call current_user_profile, returns me the current user data as I provided you before.
If I call UserProfile.objects.all(), returns me the user's data as I provided you before.
But for some reason, I can't call calendar from current_user_profile.calendar as this image shows.
Is anyone have some idea how could I do this?
I think you need to use get function if you wanna get the object.
if related == "self":
# not UserProfile.objects.filter in order to get the UserProfile object.
current_user_profile = UserProfile.objects.get(user=self.request.user)
related_users = UserProfile.objects.filter(calendar__in=current_user_profile.calendar.all())
return related_users
Here we have the solution I found to exclude my user from the search.
current_user_profile = UserProfile.objects.get(user=self.request.user)
related_users = UserProfile.objects\
.filter(calendar__in=current_user_profile.calendar.all()) \
.exclude(user_id=current_user_profile.id)
return related_users

Egrep special expressions like \w in bracket expressions []

I am trying to use extended grep to extract data from a JSON. The regex I use is functional on my regexr instance, but for some reason it doesn't work in bash.
I tried many things, notably the bare double dash and various minor edits to the regex for escaping.
#!/bin/bash
networks='{ "networks": [ { "admin_state_up": true, "availability_zone_hints": [], "availability_zones": [], "created_at": "2019-03-12T23:45:13Z", "description": "", "id": "7188504a-72cb-4590-a9b0-414732017837", "ipv4_address_scope": null, "ipv6_address_scope": null, "is_default": false, "mtu": 1450, "name": "BLUE", "port_security_enabled": true, "project_id": "187d635aec4c43fe8e8918afb3a5c82e", "provider:network_type": "vxlan", "provider:physical_network": null, "provider:segmentation_id": 86, "revision_number": 2, "router:external": false, "shared": false, "status": "ACTIVE", "subnets": [], "tags": [], "tenant_id": "187d635aec4c43fe8e8918afb3a5c82e", "updated_at": "2019-03-12T23:45:13Z" }, { "admin_state_up": true, "availability_zone_hints": [], "availability_zones": [], "created_at": "2019-03-12T23:45:13Z", "description": "", "id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae", "ipv4_address_scope": null, "ipv6_address_scope": null, "is_default": false, "mtu": 1450, "name": "RED", "port_security_enabled": true, "project_id": "187d635aec4c43fe8e8918afb3a5c82e", "provider:network_type": "vxlan", "provider:physical_network": null, "provider:segmentation_id": 108, "revision_number": 2, "router:external": false, "shared": false, "status": "ACTIVE", "subnets": [], "tags": [], "tenant_id": "187d635aec4c43fe8e8918afb3a5c82e", "updated_at": "2019-03-12T23:45:13Z" }, { "admin_state_up": true, "availability_zone_hints": [], "availability_zones": [], "created_at": "2019-03-12T23:45:13Z", "description": "", "id": "1eb6647e-869e-4e83-9468-43e2c320bccc", "ipv4_address_scope": null, "ipv6_address_scope": null, "is_default": false, "mtu": 1450, "name": "public", "port_security_enabled": true, "project_id": "187d635aec4c43fe8e8918afb3a5c82e", "provider:network_type": "vxlan", "provider:physical_network": null, "provider:segmentation_id": 32, "revision_number": 2, "router:external": false, "shared": false, "status": "ACTIVE", "subnets": [], "tags": [], "tenant_id": "187d635aec4c43fe8e8918afb3a5c82e", "updated_at": "2019-03-12T23:45:13Z" } ] }'
result=`echo $networks | grep -oE '"(id|name)": "([\w+-]+)"'`
echo $result
The aforementioned code doesn't work but if I switch to the following regex, it works. I just need to add extraction for id field too to be able to extract ids and names using \2 back reference (group 2)
grep -oE '"(id|name)": "(\w+)"'
Can you help me understand why the script doesn't work?
Full formatted JSON
{
"networks": [{
"admin_state_up": true,
"availability_zone_hints": [],
"availability_zones": [],
"created_at": "2019-03-12T23:45:13Z",
"description": "",
"id": "7188504a-72cb-4590-a9b0-414732017837",
"ipv4_address_scope": null,
"ipv6_address_scope": null,
"is_default": false,
"mtu": 1450,
"name": "BLUE",
"port_security_enabled": true,
"project_id": "187d635aec4c43fe8e8918afb3a5c82e",
"provider:network_type": "vxlan",
"provider:physical_network": null,
"provider:segmentation_id": 86,
"revision_number": 2,
"router:external": false,
"shared": false,
"status": "ACTIVE",
"subnets": [],
"tags": [],
"tenant_id": "187d635aec4c43fe8e8918afb3a5c82e",
"updated_at": "2019-03-12T23:45:13Z"
}, {
"admin_state_up": true,
"availability_zone_hints": [],
"availability_zones": [],
"created_at": "2019-03-12T23:45:13Z",
"description": "",
"id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae",
"ipv4_address_scope": null,
"ipv6_address_scope": null,
"is_default": false,
"mtu": 1450,
"name": "RED",
"port_security_enabled": true,
"project_id": "187d635aec4c43fe8e8918afb3a5c82e",
"provider:network_type": "vxlan",
"provider:physical_network": null,
"provider:segmentation_id": 108,
"revision_number": 2,
"router:external": false,
"shared": false,
"status": "ACTIVE",
"subnets": [],
"tags": [],
"tenant_id": "187d635aec4c43fe8e8918afb3a5c82e",
"updated_at": "2019-03-12T23:45:13Z"
}, {
"admin_state_up": true,
"availability_zone_hints": [],
"availability_zones": [],
"created_at": "2019-03-12T23:45:13Z",
"description": "",
"id": "1eb6647e-869e-4e83-9468-43e2c320bccc",
"ipv4_address_scope": null,
"ipv6_address_scope": null,
"is_default": false,
"mtu": 1450,
"name": "public",
"port_security_enabled": true,
"project_id": "187d635aec4c43fe8e8918afb3a5c82e",
"provider:network_type": "vxlan",
"provider:physical_network": null,
"provider:segmentation_id": 32,
"revision_number": 2,
"router:external": false,
"shared": false,
"status": "ACTIVE",
"subnets": [],
"tags": [],
"tenant_id": "187d635aec4c43fe8e8918afb3a5c82e",
"updated_at": "2019-03-12T23:45:13Z"
}]
}
According to man grep:
The Backslash Character and Special Expressions
The symbol \w is a synonym for [[:alnum:]] and \W is a synonym for [^[:alnum:]]. ... A bracket expression is a list of characters enclosed by [ and ]. ... To include a literal ] place it first in the list. Similarly, to include a literal ^ place it anywhere but first. Finally, to include a literal - place it last.
Basically, \w is literally replaced by those characters when evaluated, giving you "([[[:alnum:]]+-]+)", which in a US standard locale gives you "([[a-zA-Z0-9]+-]+)".
Since a bracket expression is truncated by the first ] it sees (unless it is the first element of a bracket expression), the group is only [[[:alnum:]]+, or "1 or more of a digit, letter, and [. This expression is followed by -]+, meaning "exactly one hyphen and one or more ]". This is obviously pretty terrible.
If you try
echo $networks | grep -oE '"(id|name)": "([[:alnum:]+-]+)"'
I.e., \w without the outer bracket expression, the relevant part means "a group (surrounded by ") comprised of one or more digits, letters, hyphens, and plus signs", which outputs:
"id": "7188504a-72cb-4590-a9b0-414732017837"
"name": "BLUE"
"id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae"
"name": "RED"
"id": "1eb6647e-869e-4e83-9468-43e2c320bccc"
"name": "public"
Using PERL (-P) instead of Extended (-E) regexp, looks like the \w is interpreted as expected, without escaping issue: note the -oP
result=$( echo $networks | grep -oP '"(id|name)": "([\w+-]+)"' ) ;
echo $result
"id": "7188504a-72cb-4590-a9b0-414732017837" "name": "BLUE" "id": "ed82083f-0a7c-4322-a4fb-de8db23e2bae" "name": "RED" "id": "1eb6647e-869e-4e83-9468-43e2c320bccc" "name": "public"
As a workaround (it does not resolve the "escaping \w issue)
result=$( echo $networks | grep -oE '"(id|name)": "([a-zA-Z_+-]+)"' ) ;
echo $result
Prints me:
"name": "BLUE" "name": "RED" "name": "public"
Note: prefer using $( ) syntax to execute sub shells rather than the backtick.

Parsing complex JSON using Kinesis Analytics

I have the following JSON stream coming from Twitter.
{
"created_at": "Thu Sep 27 21:02:00 +0000 2018",
"id": 1045418301336244224,
"id_str": "1045418301336244224",
"text": "Conditional Branching Now Supported in AWS Systems Manager Automation - #awscloud #amazon #aws",
"source": "Buffer",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 14687423,
"id_str": "14687423",
"name": "Casey Becking",
"screen_name": "caseybecking",
"location": "Huntington Beach, CA",
"url": "http://caseybecking.com",
"description": "I do stuff with computers for #rackspace , geek at heart! play and watch to much hockey, someday I'll make a personal website.",
"translator_type": "none",
"protected": false,
"verified": false,
"followers_count": 4191,
"friends_count": 2412,
"listed_count": 90,
"favourites_count": 794,
"statuses_count": 12995,
"created_at": "Wed May 07 15:03:23 +0000 2008",
"utc_offset": null,
"time_zone": null,
"geo_enabled": true,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "000000",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme15/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme15/bg.png",
"profile_background_tile": false,
"profile_link_color": "ABB8C2",
"profile_sidebar_border_color": "000000",
"profile_sidebar_fill_color": "000000",
"profile_text_color": "000000",
"profile_use_background_image": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/981617292546060289/RMX0GQFe_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/981617292546060289/RMX0GQFe_normal.jpg",
"profile_banner_url": "https://pbs.twimg.com/profile_banners/14687423/1439137746",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"is_quote_status": false,
"quote_count": 0,
"reply_count": 0,
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [{
"text": "amazon",
"indices": [106, 113]
}, {
"text": "aws",
"indices": [114, 118]
}],
"urls": [{
"url": "",
"expanded_url": "https://buff.ly/2zwRyBx",
"display_url": "buff.ly/2zwRyBx",
"indices": [72, 95]
}],
"user_mentions": [{
"screen_name": "awscloud",
"name": "Amazon Web Services",
"id": 66780587,
"id_str": "66780587",
"indices": [96, 105]
}],
"symbols": []
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "en",
"timestamp_ms": "1538082120628",
"emoticons": [],
"sentiments": "Neutral"
}
How do I parse, analyze and process this JSON using Kinesis Analytics?
The arrays should be flattened and this is very doable in Hive but need to do the same in Kinesis Analytics.

Converting a json looking simple value string to a ColdFusion structure

I do the following in ColdFusion script:
svc = new http();
svc.setMethod('post');
svc.setCharset('utf-8');
svc.setUrl('https://api.stripe.com/v1/charges');
svc.addParam(type='header', name='Authorization', value='Bearer #Stripe.mySecretKey#');
svc.addParam(type='formfield', name='amount', value=form.amount);
svc.addParam(type='formfield', name='currency', value='usd');
svc.addParam(type='formfield', name='card', value='#form.card#');
svc.addParam(type='formfield', name='description', value='#form.email#');
prefix = svc.send().getPrefix();
response = prefix.FileContent;
WriteOutput(response & '<br>');
WriteOutput(IsSimpleValue(response) & '<br>');
WriteOutput(IsJSON(response));
dump(response);
And I'm getting a string that looks like this:
{ "id": "ch_6HAwRK92OsQPoA", "object": "charge", "created": 1432149035, "livemode": false, "paid": true, "status": "paid", "amount": 100, "currency": "usd", "refunded": false, "source": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "captured": true, "card": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "balance_transaction": "txn_6HAw4bAUUZ6trA", "failure_message": null, "failure_code": null, "amount_refunded": 0, "customer": null, "invoice": null, "description": "PhillipSenn#gmail.com", "dispute": null, "metadata": {}, "statement_descriptor": null, "fraud_details": {}, "receipt_email": null, "receipt_number": null, "shipping": null, "destination": null, "application_fee": null, "refunds": { "object": "list", "total_count": 0, "has_more": false, "url": "/v1/charges/ch_6HAwRK92OsQPoA/refunds", "data": [] } }
As well as "YES" and "YES".
If I do a DeserializeJSON(response), then I get a 500 error.
So, how do I put this simple value string, which looks like json, in a ColdFusion structure so that I can reference response.id and response.paid?
Something's wrong in your CF or web server setup.
I tested your json with CF11 on tryCF and it works.
<cfscript>
json = '{ "id": "ch_6HAwRK92OsQPoA", "object": "charge", "created": 1432149035, "livemode": false, "paid": true, "status": "paid", "amount": 100, "currency": "usd", "refunded": false, "source": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "captured": true, "card": { "id": "card_6HAwNGtbdzFdq0", "object": "card", "last4": "4242", "brand": "Visa", "funding": "credit", "exp_month": 12, "exp_year": 2015, "fingerprint": "I2nSF7gS79j9Zhei", "country": "US", "name": null, "address_line1": null, "address_line2": null, "address_city": null, "address_state": null, "address_zip": null, "address_country": null, "cvc_check": "pass", "address_line1_check": null, "address_zip_check": null, "dynamic_last4": null, "metadata": {}, "customer": null }, "balance_transaction": "txn_6HAw4bAUUZ6trA", "failure_message": null, "failure_code": null, "amount_refunded": 0, "customer": null, "invoice": null, "description": "PhillipSenn#gmail.com", "dispute": null, "metadata": {}, "statement_descriptor": null, "fraud_details": {}, "receipt_email": null, "receipt_number": null, "shipping": null, "destination": null, "application_fee": null, "refunds": { "object": "list", "total_count": 0, "has_more": false, "url": "/v1/charges/ch_6HAwRK92OsQPoA/refunds", "data": [] } }';
writeOutput(isJSon(json));
writeDump(DeserializeJSON(json));
</cfscript>
Run: http://trycf.com/gist/cb798ff697ac80396284/acf11

Notepad++ close non-whitespace

I have a Tweepy Stream Api json file that is included 33K tweets. All of them have non-whitespace space that I couldn't find solution. If you use copy and past it to pro.jsonlint.com, you can see that line 217 has "EOF" problem. I can fix them using notepad++ but first I need to understand that what is the problem and how I can fix it?
If somebody can tell me where is the EOF problem and how fix it with notepad++.
Thanks,
{
"created_at": "Tue Mar 31 20:50:08 +0000 2015",
"id": 583008398612029440,
"id_str": "583008398612029440",
"text": "RT #kamalakmustafa: Hain bir sald\u0131r\u0131 sonucu \u015fehit d\u00fc\u015fen Savc\u0131m\u0131z #MehmetSelimKiraz 'a Allah'tan rahmet, ailesine sab\u0131rlar diliyorum. Millet\u2026",
"source": "\u003ca href=\"http:\/\/www.twitter.com\" rel=\"nofollow\"\u003eTwitter for Windows Phone\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1337409816,
"id_str": "1337409816",
"name": "Atakan \u00c7etin",
"screen_name": "BrownnChild",
"location": "",
"url": null,
"description": "Bir nefesine bile h\u00fckmedemedi\u011fimiz bu d\u00fcnya i\u00e7in boyun mu b\u00fckece\u011fiz? M\u0130LL\u0130 G\u00d6R\u00dc\u015e!",
"protected": false,
"verified": false,
"followers_count": 246,
"friends_count": 116,
"listed_count": 0,
"favourites_count": 1077,
"statuses_count": 2613,
"created_at": "Mon Apr 08 19:31:06 +0000 2013",
"utc_offset": 10800,
"time_zone": "Athens",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "131516",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme14\/bg.gif",
"profile_background_tile": true,
"profile_link_color": "009999",
"profile_sidebar_border_color": "EEEEEE",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/579347764670296064\/6yuzFJPG_normal.jpg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/579347764670296064\/6yuzFJPG_normal.jpg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1337409816\/1427653680",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Tue Mar 31 20:38:15 +0000 2015",
"id": 583005408828252160,
"id_str": "583005408828252160",
"text": "Hain bir sald\u0131r\u0131 sonucu \u015fehit d\u00fc\u015fen Savc\u0131m\u0131z #MehmetSelimKiraz 'a Allah'tan rahmet, ailesine sab\u0131rlar diliyorum. Milletimizin ba\u015f\u0131 sa\u011folsun.",
"source": "\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 338288811,
"id_str": "338288811",
"name": "Mustafa Kamalak",
"screen_name": "kamalakmustafa",
"location": "Ankara, T\u00fcrkiye",
"url": "http:\/\/www.saadet.org.tr\/kisi\/mustafa-kamalak",
"description": "Saadet Partisi Genel Ba\u015fkan\u0131\n Prof. Dr. Mustafa Kamalak",
"protected": false,
"verified": false,
"followers_count": 52358,
"friends_count": 2,
"listed_count": 163,
"favourites_count": 0,
"statuses_count": 1574,
"created_at": "Tue Jul 19 10:48:16 +0000 2011",
"utc_offset": 10800,
"time_zone": "Istanbul",
"geo_enabled": false,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "FFFFFF",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/378800000181217982\/FBktDfqo.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/378800000181217982\/FBktDfqo.jpeg",
"profile_background_tile": false,
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "000000",
"profile_sidebar_fill_color": "DDFFCC",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/436119860225908737\/76GmiIPj_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/436119860225908737\/76GmiIPj_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/338288811\/1423922000",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 122,
"favorite_count": 152,
"entities": {
"hashtags": [
{
"text": "MehmetSelimKiraz",
"indices": [
45,
62
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr"
},
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "MehmetSelimKiraz",
"indices": [
65,
82
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
{
"screen_name": "kamalakmustafa",
"name": "Mustafa Kamalak",
"id": 338288811,
"id_str": "338288811",
"indices": [
3,
18
]
}
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr",
"timestamp_ms": "1427835008658"
}
{
"created_at": "Tue Mar 31 20:50:08 +0000 2015",
"id": 583008398649769984,
"id_str": "583008398649769984",
"text": "RT #BekirDeveli: #MehmetSelimKiraz",
"source": "\u003ca href=\"http:\/\/twitterhizmetcisi.com\" rel=\"nofollow\"\u003eTranslation Mobile\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1284184062,
"id_str": "1284184062",
"name": "K\u00fcrsad Celik",
"screen_name": "KrsadC",
"location": "Istanbul-Den\u0131zl\u0131",
"url": null,
"description": null,
"protected": false,
"verified": false,
"followers_count": 166,
"friends_count": 452,
"listed_count": 1,
"favourites_count": 1892,
"statuses_count": 2723,
"created_at": "Wed Mar 20 20:23:46 +0000 2013",
"utc_offset": 10800,
"time_zone": "Athens",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "FFF04D",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/378800000168605771\/dgYo8Miy.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/378800000168605771\/dgYo8Miy.jpeg",
"profile_background_tile": true,
"profile_link_color": "0099CC",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "F6FFD1",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/437899583348572160\/FamxSRwB_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/437899583348572160\/FamxSRwB_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1284184062\/1393238552",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Tue Mar 31 20:39:47 +0000 2015",
"id": 583005791340404737,
"id_str": "583005791340404737",
"text": "#MehmetSelimKiraz",
"source": "\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 607600160,
"id_str": "607600160",
"name": "Bekir Develi",
"screen_name": "BekirDeveli",
"location": "\u0130stanbul\/Turkey",
"url": "http:\/\/www.bekirdeveli.com",
"description": null,
"protected": false,
"verified": false,
"followers_count": 225574,
"friends_count": 2{
"created_at": "Tue Mar 31 20:50:14 +0000 2015",
"id": 583008421152198656,
"id_str": "583008421152198656",
"text": "#mehmetselimkiraz https:\/\/t.co\/ly9MROZ6Yg",
"source": "\u003ca href=\"http:\/\/instagram.com\" rel=\"nofollow\"\u003eInstagram\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 110186170,
"id_str": "110186170",
"name": "\u015eeyma Ceylan",
"screen_name": "cylnm",
"location": "ceylan",
"url": null,
"description": "\u0648\u0642\u062f\u0645 \u0643\u0644 \u0627\u0644\u0627\u0646\u0632\u0639\u0627\u062c \u0631\u0627\u062d\u0629 \u0627\u0644\u0644\u0647I http:\/\/Instagram.com\/seymaceylan_",
"protected": false,
"verified": false,
"followers_count": 1759,
"friends_count": 780,
"listed_count": 15,
"favourites_count": 7472,
"statuses_count": 9036,
"created_at": "Sun Jan 31 17:13:48 +0000 2010",
"utc_offset": -36000,
"time_zone": "Hawaii",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "FF6699",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/866467818\/24dcf5a87af2b92f9ff76fead916e4ef.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/866467818\/24dcf5a87af2b92f9ff76fead916e4ef.jpeg",
"profile_background_tile": true,
"profile_link_color": "9266CC",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "E5507E",
"profile_text_color": "362720",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/574664598248419329\/Budj4Oq3_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/574664598248419329\/Budj4Oq3_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/110186170\/1410798690",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "mehmetselimkiraz",
"indices": [
0,
17
]
}
],
"trends": [
],
"urls": [
{
"url": "https:\/\/t.co\/ly9MROZ6Yg",
"expanded_url": "https:\/\/instagram.com\/p\/05-6PQPQ-j\/",
"display_url": "instagram.com\/p\/05-6PQPQ-j\/",
"indices": [
18,
41
]
}
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": true,
"filter_level": "low",
"lang": "und",
"timestamp_ms": "1427835014032"
}
You need to match individual JSON objects, and I think you can just match them and copy wherever you need using this regex:
\{(?:[^{}]+|(?0))*\}
It works in Notepad++ 6.7.5, and it will select the text from the first { up to the corresponding closing }.