Not able to post to solr server using python and requests - python-2.7

This is the code I am trying to implement:-
import requests
import tornado.ioloop
import tornado.web
import tornado.autoreload
import json
class MainHandler(tornado.web.RequestHandler):
def get(self):
payload = [{"id" : "978-0641723445", "cat" : ["book","hardcover"], "name" : "The Lightning Thief", "author" : "Rick Riordan","series_t" : "Percy Jackson Olympians", "sequence_i" : 1, "genre_s" : "fantasy", "inStock" : True, "price" : 12.50, "pages_i" : 384}]
url = 'http://localhost:8983/solr/update/json'
headers = {'content-type' : 'application/json'}
# files = {'file': ('books.json', open('books.json', 'rb'))}
timeline = requests.post(url, data = json.dumps(payload), headers = headers)
self.write(timeline.text)
class QueryHandler(tornado.web.RequestHandler):
def get(self):
# timeline = requests.get('http://localhost:8983/solr/collection1/select?q=a&wt=json&indent=true')
payload = {'q' : 'a', 'wt' : 'json', 'indent' : True}
timeline = requests.get('http://localhost:8983/solr/collection1/select', params = payload)
self.write(timeline.json())
application = tornado.web.Application([
(r"/", MainHandler),
(r"/query", QueryHandler)
])
if __name__ == "__main__":
application.listen(8888)
io_loop = tornado.ioloop.IOLoop.instance()
tornado.autoreload.start(io_loop)
io_loop.start()
I am able to query the solr server on localhost:8888/query
but on localhost:8888 where I am trying to post the data, I get this response from solr:-
{
responseHeader: {
status: 0,
QTime: 46
}
}
Data is not getting posted to the solr server.
Any suggestions ??

The code doesn't contain commitWithin info in the header. The parameter is in milliseconds. Its only after a commit the data is available for search from Solr. The following may serve as an example to POST data to solr. Add the JSON header along with the commitWithin time and the data as a JSON string to the data param
requests.post("http://localhost:8983/solr/collection1/update?wt=json", headers={"Content-Type":"application/json"}, data='{"add":{ "doc":{"id" : 14, "log_type" : "debug", "log_text" : "A transaction of debug from Kimy"},"boost":1.0,"overwrite":true, "commitWithin": 1000}}')
Response :
{"responseHeader":{"status":0,"QTime":128}}

Related

flask keep blocking post request sent with "fetch" in js [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 days ago.
Improve this question
i'am trying to make a chrome extension to send back DOM content to the flask server written below. but i am facing this error POST http://localhost:5000/api/json net::ERR_BLOCKED_BY_CLIENT content.js:10 and then afterwards : Uncaught (in promise) TypeError: Failed to fetch.
here is the code:
server.py:
from flask import Flask, request
from flask_cors import CORS
app = Flask(__name__)
cors = CORS(app, resources={r"/api/*": {"origins": "*"}})
app.config['WTF_CSRF_CHECK_DEFAULT'] = False
#app.route('/api/json', methods=['POST'])
def json():
request_data = request.get_json()
language = None
framework = None
python_version = None
example = None
boolean_test = None
if request_data:
if 'language' in request_data:
language = request_data['language']
if 'framework' in request_data:
framework = request_data['framework']
if 'version_info' in request_data:
if 'python' in request_data['version_info']:
python_version = request_data['version_info']['python']
if 'examples' in request_data:
if (type(request_data['examples']) == list) and (len(request_data['examples']) > 0):
example = request_data['examples'][0]
if 'boolean_test' in request_data:
boolean_test = request_data['boolean_test']
return '''
The language value is: {}
The framework value is: {}
The Python version is: {}
The item at index 0 in the example list is: {}
The boolean value is: {}'''.format(language, framework, python_version, example, boolean_test)
if __name__ == '__main__':
app.run(host="0.0.0.0",port=5000,debug=True)
content.js:
fetch('http://localhost:5000/api/json', {
method: 'POST',
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json'
},
body: JSON.stringify({
"language" : "Python",
"framework" : "Flask",
"website" : "Scotch",
"version_info" : {
"python" : "3.9.0",
"flask" : "1.1.2"
},
"examples" : ["query", "form", "json"],
"boolean_test" : true
})
})
.then(response => response.json())
.then(response => console.log(JSON.stringify(response)))
and finally manifest.json:
{
"manifest_version": 3,
"name": "test extension",
"version": "1.0",
"content_scripts": [{
"matches":["*://*.google.com/*"],
"js": ["content.js"]
}],
"action": {
"default_title": "Click",
"default_popup": "popup.html"
},
"background":{
"service_worker":"background.js"
},
"permissions":["activeTab"]
}
i tried disabling CORS and CSRF protection, but none of them worked. i couldnt send a post request with postman neither.with these attirbutes set:
changed type to "POST" ,On the body tab, change to raw and selected JSON from the drop-down.
body:
{
"language" : "Python",
"framework" : "Flask",
"website" : "Scotch",
"version_info" : {
"python" : "3.9.0",
"flask" : "1.1.2"
},
"examples" : ["query", "form", "json"],
"boolean_test" : true
}
You are trying to parse a response in JSON format within your fetch call. But the server doesn't send any JSON. Use jsonify inside your endpoint. You can find documentation here.
from flask import jsonify
#app.post('/api/json')
def json():
request_data = request.get_json()
language = None
framework = None
python_version = None
example = None
boolean_test = None
if request_data:
if 'language' in request_data:
language = request_data['language']
if 'framework' in request_data:
framework = request_data['framework']
if 'version_info' in request_data \
and 'python' in request_data['version_info']:
python_version = request_data['version_info']['python']
if 'examples' in request_data \
and (type(request_data['examples']) == list) \
and (len(request_data['examples']) > 0):
example = request_data['examples'][0]
if 'boolean_test' in request_data:
boolean_test = request_data['boolean_test']
return jsonify(
language=language,
framework=framework,
python_version=python_version,
boolean_test=boolean_test
)

How to bind a path variable to the request body parameter?

I want to post a HTTP request like this:
http://localhost/context/{{name}}/{{age}}
And I want to bind these path variables to request body, if my request body is :
{
"name": "Frank",
"age": 18
}
the final request I want to send is:
http://localhost/context/Frank/18
so how to achieve this function in POSTMAN?
postman request
Provisioning your request in Postman (non-parametric url):
Parametric url
I don't think you need to pass variables in your route, since you're already passing them in the request-body. However, here's a brief.
If you're working with NodeJS (using Express) and any JS library, you can send the request as thus, (using axios):
const body = {
"name": "Frank",
"age": 18
}
const requestHandler = async() => {
const serverResponse = await axios.post(`http://localhost/context/${body.name}/${body.age}`, {data: body}, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${backend-token}`
}
};
Then, on your server-side, with a proper definition for your routes (for parametric and non-paramatric choice of url), you'd create a route to handle the request as:
Using Express
import {Router} from "express";
const yourServerRouter = Router();
yourServerRouter.post(`yourPrimaryDomain/:name/:age`, function-to-handle-request)
If you're working with a python framework (Flask or Django), you can do thus:
Flask (using flask_restful):
urls = [f"your_base_url/name/age"]
from flask_restful import Resource, request
import json
class BioData(Resource):
def post(self):
url = request.url
if "context" in url:
request_body = json.loads(request.data)
response = self.context_handler(request_data)
def context_handler(self, request_data):
name, age = request_data
....
....
flask_app = Flask(__name__)
flask_app.add_resource(BioData, *urls)
Django (Using DRF - viewsets)
from rest_framework import viewsets, routers
class BioDataViewsets(viewsets.ModelViewSets):
#action(methods=["POST"], detail=False, url_name="context", url_path="context")
def context(self, *args, **kwargs):
clients_request = json.loads(self.request.body)
**define your path as above (for flask)**
context_router = routers.DefaultRouter()
context_router.register("name/age/", company_viewsets.CompanyViewSets)
url_patterns = [path(f"your_base_url/context", include(context_router.urls())]
Eventually I got some clues from this page. The request body can be parsed through Pre-request-Script, and the attributes of interest can be set as variables and referenced in URL.
var r = JSON.parse(request.data);
pm.variables.set("name", r.name);
pm.variables.set("age", r.age);
And use below form to apply variables set in the Pre-request-Script:
http://localhost/context/{{name}}/{{age}}
the request body is :
{
"name": "Frank",
"age": 18
}
postman request

Not able to connect to the translation API

url: https://translatorappeagle.cognitiveservices.azure.com/
I'm also using my key from my resources page. I get a an error 404: not found. I copied the code from the getting started section for the translation API.
Please let me know what to do.
import os, requests, uuid, json
key_var_name = 'TRANSLATOR_TEXT_SUBSCRIPTION_KEY'
if not key_var_name in os.environ:
raise Exception('Please set/export the environment variable: {}'.format(key_var_name))
subscription_key = os.environ[key_var_name]
endpoint_var_name = 'TRANSLATOR_TEXT_ENDPOINT'
if not endpoint_var_name in os.environ:
raise Exception('Please set/export the environment variable: {}'.format(endpoint_var_name))
endpoint = os.environ[endpoint_var_name]
# If you encounter any issues with the base_url or path, make sure
# that you are using the latest endpoint: https://learn.microsoft.com/azure/cognitive-services/translator/reference/v3-0-translate
path = '/translate?api-version=3.0'
params = '&to=de&to=it'
constructed_url = endpoint + path + params
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
'Content-type': 'application/json',
'X-ClientTraceId': str(uuid.uuid4())
}
# You can pass more than one object in body.
body = [{
'text' : 'Hello World!'
}]
request = requests.post(constructed_url, headers=headers, json=body)
response = request.json()
print(json.dumps(response, sort_keys=True, indent=4, separators=(',', ': ')))

How run an spider sequentially to sites that use session in scrapy

I wanna scrape a web page that first send an AjaxFormPost that open a session and next send an _SearchResultGridPopulate to populate a control that I need to scrape, the response is a json.
this is a fragment of my code:
def parse_AjaxFormPost(self, response):
self.logger.info("parse_AjaxFormPost")
page = response.meta['page']
header = {
'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'en-US,en;q=0.8',
'Connection':'keep-alive',
'Content-Length':'14',
'Content-Type':'application/x-www-form-urlencoded',
'Cookie':'ASP.NET_SessionId=gq4dgcsl500y32xb1n2ciexq',
.
.
.
}
url = '<url>/Search/AjaxFormPost'
cities = ['city1','city2',...]
for city in cities:
formData = {
'City':city
}
re = scrapy.FormRequest(
url,
formdata=formData,
headers=header,
dont_filter=True,
callback=self.parse_GridPopulate
)
yield re
def parse_GridPopulate(self,response):
self.logger.info("parse_LookupPermitTypeDetails")
url = '<url>/Search//_SearchResultGridPopulate?Grid-page=2&Grid-size=10&Grid-CERT_KEYSIZE=128&Grid-CERT_SECRETKEYSIZE=2048&Grid-HTTPS_KEYSIZE=128&Grid-HTTPS_SECRETKEYSIZE=2048'
header = {
'Accept':'*/*',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'en-US,en;q=0.8',
'Connection':'keep-alive',
'Content-Length':'23',
'Content-Type':'application/x-www-form-urlencoded',
'Cookie':'ASP.NET_SessionId=gq4dgcsl500y32xb1n2ciexq',
.
.
.
}
formData = {
'page':'1',
'size':'10'
}
re = scrapy.FormRequest(
url,
formdata=formData,
headers=header,
dont_filter=True,
callback=self.parse
)
yield re
def parse(self, response):
self.logger.info("parse_permit")
data_json = json.loads(response.body)
for row in data_json["data"]:
self.logger.info(row)
item = RedmondPermitItem()
item["item1"] = row["item1"]
item["item2"] = row["item2"]
yield item
The problem is that scrapy do request concurrent and when and the request in parse_AjaxFormPost open a session so when pass to the parse_LookupPermitTypeDetails I got the session of the last request do it in parse_AjaxFormPost. So if I have 10 cities at the end I got 10 times the information of the last city.
In settings I changed the configuration:
CONCURRENT_REQUESTS_PER_DOMAIN = 1
CONCURRENT_REQUESTS_PER_IP = 1
And it doesn't work. On other hand I thought in run the spider only for one city every time something like
from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
class MySpider(scrapy.Spider):
# Your first spider definition
...
...
configure_logging()
runner = CrawlerRunner()
#defer.inlineCallbacks
def crawl():
cities = ['city1','city2',...]
for city in cities:
yield runner.crawl(MySpider1,city=city)
reactor.stop()
crawl()
reactor.run() # the script will block here until the last crawl call is finished
Maybe this can be the only one solution, but I'm not sure. I would like to create a procedure for every site with this characteristic.
Any suggestion about how solve that, is possible to achive this only configuring settings.
thanks in advance.
Update1
I change the title because is important that is for sites that use session
This is a problem of understanding how concurrency works, as this isn't parallelism you can still work sequentially, but between callbacks. I would suggest something like this:
def parse_AjaxFormPost(self, response):
...
cities = ['city1','city2',...]
formData = {
'City':cities[0]
}
re = scrapy.FormRequest(
url,
formdata=formData,
headers=header,
dont_filter=True,
callback=self.parse_remaining_cities,
meta={'remaining_cities': cities[1:]}, # check the meta argument
)
yield re
def parse_remaining_cities(self, response):
remaining_cities = response.meta['remaining_cities']
current_city = remaining_cities[0]
...
yield Request(
...,
meta={'remaining_cities': remaining_cities[1:]},
callback=self.parse_remaining_cities)
This way you are doing one request at a time and in a row from city to city.

passing post data to python api to make a twilio phone call

I would like to send some POST data to the url that will be called once the connection is made in a twilio call. Here is my code:
import urllib, urllib2
from twilio.rest import TwilioRestClient
account = "xxx"
token = "xxx"
client = TwilioRestClient(account, token)
server_url = "http://ec2-xx.xx.xx.compute-1.amazonaws.com/"
values = dict(name='mytime', \
appt_time='2:30 PM', \
location='Arizona Location', \
client = "Suwanee",
)
data = urllib.urlencode(values)
req = urllib2.Request(server_url, data)
call = client.calls.create(to="123456789",
from_="987654321",
url="ec2-xx.xx.xx.compute-1.amazonaws.com/hello/")
How would I pass the urlencoded data to the url as a post?
ec2-xx.xx.xx.compute-1.amazonaws.com is running django, and the this server is able to see the post data when I send the following command:
curl -X POST -d "client=mytime+Suwanee&time=2%3A30+PM&location=Suwanee+Location&name=mytime2" "http://127.0.0.1:8000/remind/"
How do I replicate this same behavior in the code snippet provided in the very beginning? I want to use POST only (not GET).
For your functionality, I would recommend you to use requests library.
Example of making a POST request using that library:
>>> payload = {'key1': 'value1', 'key2': 'value2'}
>>> r = requests.post("http://httpbin.org/post", data=payload)
>>> print r.text
{
...
"form": {
"key2": "value2",
"key1": "value1"
},
...
}