Django Scrapy custom pipeline save images to model - django

I have a scrapy custom pipeline to save images to django model. I have overide some methods from the Scrapy ImagePipeline but still images don't saved to model.What method i must ovveride to save images to the django model ?
class SearchImagesPipeline(ImagesPipeline):
def file_path(self, request, response=None, info=None, *, item=None):
filename = request.url.split('/')[-1]
domain = urlparse(request.url).netloc
return f"{domain}/{filename}"
#domain = urlparse(request.url).netloc
#return f"full/{domain}/{filename}"
def file_downloaded(self, response, request, info, *, item=None):
return self.image_downloaded(response, request, info, item=item)
def image_downloaded(self, response, request, info, *, item=None):
checksum = None
for path, image, buf in self.get_images(response, request, info, item=item):
if checksum is None:
buf.seek(0)
checksum = md5sum(buf)
width, height = image.size
self.store.persist_file(
path,
buf,
info,
meta={"width": width, "height": height},
headers={"Content-Type": "image/jpeg"},
)
# create the domai model
ddomain, d_created = Domain.objects.get_or_create(
title=urlparse(request.url).netloc
)
# create the page model
page_obj, c_created = Page.objects.get_or_create(
domain=ddomain,
url=request.url,
)
# save image to the MediaFile model
media_content = ContentFile(path)
MediaFile.objects.create(
page=page_obj,
media_file=media_content
)
return checksum

Related

How to pass data saved from a POST method to the GET method using REST API Django (without a model)?

I have created an API that allows me to upload an image using the POST method in POSTMAN. After submission, I want to display that image name after making a GET request. I am not using any model and I don't intend to grab the image from the directory it is stored in; since I will be uploading images in a server later.
I have looked at multiple sources. A few examples are this, and this.
This is my current code so far but not successful:
views.py:
class API(APIView):
parser_classes = (MultiPartParser,)
def get(self, request, *args, **kwargs):
name = self.request.GET.get('image')
if name:
return Response({"img_name": name}, status=200)
return Response({"img_name" : None}, status = 400)
def post(self, request):
file = self.request.data
img_file = file['image'] #store the image data in this variable
if img_file:
uploaded_file = img_file
img = [{"image_name": uploaded_file}]
serializer = ImgSerializer(img, many = True).data
return Response(serializer, status = 200)
else:
return Response("Please upload", status = 400)
serializers.py:
from rest_framework import serializers
class ImgSerializer(serializers.Serializer):
image_name = serializers.CharField()
My expected result within GET request should be like this:
{'image_name' : 'image_name_from_POST_Request'}
But I am getting this result instead:
None
How can I pass data from the POST request to the GET request using Django's rest framework? Is there an efficient way to deploy this requirement without using a model?
I figured it out. I just created a JSON file in the POST method and stored the necessary data in it. Finally, in order to view the data within the GET method, I opened the file and returned it as a Response.
views.py:
class API(APIView):
parser_classes = (MultiPartParser,)
def get(self, request):
with open('data.txt') as json_file:
data = json.load(json_file)
if data:
return Response(data, status=200)
return Response({"name" : None}, status = 400)
def post(self, request):
posted_file = self.request.data
img_file = posted_file['image']
if img_file:
uploaded_file = img_file
data = [{"image_name": uploaded_file}]
json_data = {"image_name": uploaded_file}
data = {}
data['key'] = []
data['key'].append(json_data)
with open('data.txt', 'w') as outfile:
json.dump(image, outfile)
serializer = ImgSerializer(image, many = True).data
return Response(serializer, status = 200)
else:
return Response(serializer.errors, status = 400)

Vimeo 'Replace' API Endpoint Not Changing Thumbnail

I am using Vimeo's API for the users of my app to upload videos, or replace their existing video with a new one. I am using a Vimeo Client to help me make the calls in my Django Application. Uploading works without any issues, but when I try to replace an existing video with a new one, the thumbnail stays as the old video. If you play the video, it will play the new one, but the thumbnail never changes.
Model Method that Uploads/Replaces
def vimeo_upload(self):
media_file = self.video_file
if media_file and os.path.exists(media_file.path):
v = vimeo.VimeoClient(token=settings.VIMEO_ACCESS_TOKEN, key=settings.VIMEO_API_KEY,
secret=settings.VIMEO_API_SECRET)
if self.video_url is None:
try:
video_uri = v.upload(media_file.path)
except AssertionError as exc:
logging.error('Vimeo Error: %s Video: %s' % (exc, media_file.path))
else:
self.video_url = video_uri
else:
try:
v.replace(video_uri=self.video_url, filename=media_file.path)
except Exception as exc:
self.video_url = None
logging.error('Vimeo Replace Error: %s Video: %s' % (exc, media_file.path))
# set the video title, description, etc.
if self.video_url:
try:
# convert locale from en-us form to en
v.patch(self.video_url, data={'description': self.customer.full_name, })
except Exception as exc:
logging.error('Vimeo Patch Error: %s Video: %s' % (exc, media_file.path))
Vimeo Client Model, and UploadVideoMixin
class UploadVideoMixin(object):
"""Handle uploading a new video to the Vimeo API."""
UPLOAD_ENDPOINT = '/me/videos'
REPLACE_ENDPOINT = '{video_uri}/files'
def upload(self, filename, upgrade_to_1080=False):
"""Upload the named file to Vimeo."""
ticket = self.post(
self.UPLOAD_ENDPOINT,
data={'type': 'streaming',
'upgrade_to_1080': 'true' if upgrade_to_1080 else 'false'},
params={'fields': 'upload_link,complete_uri'})
return self._perform_upload(filename, ticket)
def replace(self, video_uri, filename, upgrade_to_1080=False):
"""Replace the video at the given uri with the named source file."""
uri = self.REPLACE_ENDPOINT.format(video_uri=video_uri)
ticket = self.put(
uri,
data={'type': 'streaming',
'upgrade_to_1080': 'true' if upgrade_to_1080 else 'false'},
params={'fields': 'upload_link,complete_uri'})
return self._perform_upload(filename, ticket)
def _perform_upload(self, filename, ticket):
"""Take an upload ticket and perform the actual upload."""
if ticket.status_code != 201:
raise UploadTicketCreationFailure(ticket, "Failed to create an upload ticket")
ticket = ticket.json()
# Perform the actual upload.
target = ticket['upload_link']
last_byte = 0
# Try to get size of obj by path. If provided obj is not a file path
# find the size of file-like object.
try:
size = os.path.getsize(filename)
with io.open(filename, 'rb') as f:
while last_byte < size:
try:
self._make_pass(target, f, size, last_byte)
except requests.exceptions.Timeout:
# If there is a timeout here, we are okay with it, since
# we'll check and resume.
pass
last_byte = self._get_progress(target, size)
except TypeError:
size = len(filename.read())
f = filename
while last_byte < size:
try:
self._make_pass(target, f, size, last_byte)
except requests.exceptions.Timeout:
# If there is a timeout here, we are okay with it, since
# we'll check and resume.
pass
last_byte = self._get_progress(target, size)
# Perform the finalization and get the location.
finalized_resp = self.delete(ticket['complete_uri'])
if finalized_resp.status_code != 201:
raise VideoCreationFailure(finalized_resp, "Failed to create the video")
return finalized_resp.headers.get('Location', None)
def _get_progress(self, upload_target, filesize):
"""Test the completeness of the upload."""
progress_response = self.put(
upload_target,
headers={'Content-Range': 'bytes */*'})
range_recv = progress_response.headers.get('Range', None)
_, last_byte = range_recv.split('-')
return int(last_byte)
def _make_pass(self, upload_target, f, size, last_byte):
"""Make a pass at uploading.
This particular function may do many things. If this is a large upload
it may terminate without having completed the upload. This can also
occur if there are network issues or any other interruptions. These
can be recovered from by checking with the server to see how much it
has and resuming the connection.
"""
response = self.put(
upload_target,
timeout=None,
headers={
'Content-Length': str(size),
'Content-Range': 'bytes: %d-%d/%d' % (last_byte, size, size)
}, data=f)
if response.status_code != 200:
raise VideoUploadFailure(response, "Unexpected status code on upload")
class VimeoClient(ClientCredentialsMixin, AuthorizationCodeMixin, UploadMixin):
"""Client handle for the Vimeo API."""
API_ROOT = "https://api.vimeo.com"
HTTP_METHODS = set(('head', 'get', 'post', 'put', 'patch', 'options', 'delete'))
ACCEPT_HEADER = "application/vnd.vimeo.*;version=3.2"
USER_AGENT = "pyvimeo 0.3.10; (http://developer.vimeo.com/api/docs)"
def __init__(self, token=None, key=None, secret=None, *args, **kwargs):
"""Prep the handle with the authentication information."""
self.token = token
self.app_info = (key, secret)
self._requests_methods = dict()
# Make sure we have enough info to be useful.
assert token is not None or (key is not None and secret is not None)
# Internally we back this with an auth mechanism for Requests.
#property
def token(self):
return self._token.token
#token.setter
def token(self, value):
self._token = _BearerToken(value) if value else None
def __getattr__(self, name):
"""This is where we get the function for the verb that was just
requested.
From here we can apply the authentication information we have.
"""
if name not in self.HTTP_METHODS:
raise AttributeError("%r is not an HTTP method" % name)
# Get the Requests based function to use to preserve their defaults.
request_func = getattr(requests, name, None)
if request_func is None:
raise AttributeError(
"%r could not be found in the backing lib" % name
)
#wraps(request_func)
def caller(url, jsonify=True, **kwargs):
"""Hand off the call to Requests."""
headers = kwargs.get('headers', dict())
headers['Accept'] = self.ACCEPT_HEADER
headers['User-Agent'] = self.USER_AGENT
if jsonify \
and 'data' in kwargs \
and isinstance(kwargs['data'], (dict, list)):
kwargs['data'] = json.dumps(kwargs['data'])
headers['Content-Type'] = 'application/json'
kwargs['timeout'] = kwargs.get('timeout', (1, 30))
kwargs['auth'] = kwargs.get('auth', self._token)
kwargs['headers'] = headers
if not url[:4] == "http":
url = self.API_ROOT + url
response = request_func(url, **kwargs)
if response.status_code == 429:
raise APIRateLimitExceededFailure(
response, 'Too many API requests'
)
return response
return caller
class _BearerToken(requests.auth.AuthBase):
"""Model the bearer token and apply it to the request."""
def __init__(self, token):
self.token = token
def __call__(self, request):
request.headers['Authorization'] = 'Bearer ' + self.token
return request

errors when using tastypie resource in view

here is my resource:
class ImageResource(ModelResource):
album = fields.ForeignKey(AlbumResource, 'album')
upload_by = fields.ForeignKey(UserResource, 'upload_by')
class Meta:
always_return_data=True
filtering = {
"album": ('exact',),
}
queryset = Image.objects.all()
cache = SimpleCache(timeout=100)
resource_name = 'image'
authorization = ImageAuthorization()
class ImageAuthorization(Authorization):
def read_list(self, object_list, bundle):
# This assumes a ``QuerySet`` from ``ModelResource``.
userprofile = UserProfile.objects.get(user = bundle.request.user)
album = Album.objects.filter(family=userprofile.family)
return object_list.filter(album__in=album)
and when I try to use ImageResource in view like:
#csrf_exempt
def upload(request):
if request.method == 'POST':
if request.FILES:
uploadedfile = request.FILES
file = uploadedfile['item']
album = Album.objects.get(pk=int(request.POST['album_id']))
img = Image(
album = album,
name = file.name,
src=file,
upload_by = request.user,
)
# img.save()
ir = ImageResource()
uploaded_img = ir.obj_get(src=file)
print uploaded_img
return HttpResponse('True')
this will always rasie an error says
obj_get() takes exactly 2 arguments (1 given)
what's wrong with my code??? and how can I get the just uploaded image's resouce
Why are you trying to create instances of ImageResource? That makes no sense.
obj_get is a method for a tastypie resource, which is part of the resource flow chart. It expects a bundle object.
obj_get(self, bundle, **kwargs): ...
You do not have to create a resource on the fly for every image you upload, you don't even need to instantiate one, as the url module does this for you.
I recommend you re-read the documentation and register an ImageResource and/or AlbumResource accordingly. Those resources will pickup uploaded images or albums automatically after you register the resources to your urls module.

Adding root element to json response (django-rest-framework)

I am trying to determine the best way to add a root element to all json responses using django and django-rest-framework.
I think adding a custom renderer is the best way to accomplish what I want to achieve and this is what I have come up with so far:
from rest_framework.renderers import JSONRenderer
class CustomJSONRenderer(JSONRenderer):
#override the render method
def render(self, data, accepted_media_type=None, renderer_context=None):
#call super, as we really just want to mess with the data returned
json_str = super(CustomJSONRenderer, self).render(data, accepted_media_type, renderer_context)
root_element = 'contact'
#wrap the json string in the desired root element
ret = '{%s: %s}' % (root_element, json_str)
return ret
The tricky part now is dynamically setting the root_element based on the view that render() is being called from.
Any pointers/advice would be greatly appreciated,
Cheers
For posterity, below is the final solution. It has grown slightly from the original as it now reformats paginated results as well.
Also I should have specified before, that the reason for the JSON root element is for integration with an Ember front end solution.
serializer:
from rest_framework.serializers import ModelSerializer
from api.models import Contact
class ContactSerializer(ModelSerializer):
class Meta:
model = Contact
#define the resource we wish to use for the root element of the response
resource_name = 'contact'
fields = ('id', 'first_name', 'last_name', 'phone_number', 'company')
renderer:
from rest_framework.renderers import JSONRenderer
class CustomJSONRenderer(JSONRenderer):
"""
Override the render method of the django rest framework JSONRenderer to allow the following:
* adding a resource_name root element to all GET requests formatted with JSON
* reformatting paginated results to the following structure {meta: {}, resource_name: [{},{}]}
NB: This solution requires a custom pagination serializer and an attribute of 'resource_name'
defined in the serializer
"""
def render(self, data, accepted_media_type=None, renderer_context=None):
response_data = {}
#determine the resource name for this request - default to objects if not defined
resource = getattr(renderer_context.get('view').get_serializer().Meta, 'resource_name', 'objects')
#check if the results have been paginated
if data.get('paginated_results'):
#add the resource key and copy the results
response_data['meta'] = data.get('meta')
response_data[resource] = data.get('paginated_results')
else:
response_data[resource] = data
#call super to render the response
response = super(CustomJSONRenderer, self).render(response_data, accepted_media_type, renderer_context)
return response
pagination:
from rest_framework import pagination, serializers
class CustomMetaSerializer(serializers.Serializer):
next_page = pagination.NextPageField(source='*')
prev_page = pagination.PreviousPageField(source='*')
record_count = serializers.Field(source='paginator.count')
class CustomPaginationSerializer(pagination.BasePaginationSerializer):
# Takes the page object as the source
meta = CustomMetaSerializer(source='*')
results_field = 'paginated_results'
Credit to ever.wakeful for getting me 95% of the way there.
Personally, I wanted to add meta data to every api request for a certain object, regardless of whether or not it was paginated. I also wanted to simply pass in a dict object that I defined manually.
Tweaked Custom Renderer
class CustomJSONRenderer(renderers.JSONRenderer):
def render(self, data, accepted_media_type=None, renderer_context=None):
response_data = {}
# Name the object list
object_list = 'results'
try:
meta_dict = getattr(renderer_context.get('view').get_serializer().Meta, 'meta_dict')
except:
meta_dict = dict()
try:
data.get('paginated_results')
response_data['meta'] = data['meta']
response_data[object_list] = data['results']
except:
response_data[object_list] = data
response_data['meta'] = dict()
# Add custom meta data
response_data['meta'].update(meta_dict)
# Call super to render the response
response = super(CustomJSONRenderer, self).render(response_data, accepted_media_type, renderer_context)
return response
Parent Serializer and View Example
class MovieListSerializer(serializers.ModelSerializer):
class Meta:
model = Movie
meta_dict = dict()
meta_dict['foo'] = 'bar'
class MovieViewSet(generics.ListAPIView):
queryset = Movie.objects.exclude(image__exact = "")
serializer_class = MovieListSerializer
permission_classes = (IsAdminOrReadOnly,)
renderer_classes = (CustomJSONRenderer,)
pagination_serializer_class = CustomPaginationSerializer
paginate_by = 10

Django file upload progress get cache returns None

Django file upload progress process json request getting null response
view.py
def upload_progress(request):
"""
A view to report back on upload progress.
Return JSON object with information about the progress of an upload.
Copied from:
http://djangosnippets.org/snippets/678/
See upload.py for file upload handler.
"""
#import ipdb
#ipdb.set_trace()
progress_id = ''
if 'X-Progress-ID' in request.GET:
progress_id = request.GET['X-Progress-ID']
elif 'X-Progress-ID' in request.META:
progress_id = request.META['X-Progress-ID']
if progress_id:
from django.utils import simplejson
cache_key = "%s_%s" % (request.META['REMOTE_ADDR'], progress_id)
data = cache.get(cache_key)
return HttpResponse(simplejson.dumps(data))
UploadProgressCachedHandler.py
from django.core.files.uploadhandler import FileUploadHandler
from django.core.cache import cache
class UploadProgressCachedHandler(FileUploadHandler):
"""
Tracks progress for file uploads.
The http post request must contain a header or query parameter, 'X-Progress-ID'
which should contain a unique string to identify the upload to be tracked.
Copied from:
http://djangosnippets.org/snippets/678/
See views.py for upload_progress function...
"""
def __init__(self, request=None):
super(UploadProgressCachedHandler, self).__init__(request)
self.progress_id = None
self.cache_key = None
def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
self.content_length = content_length
if 'X-Progress-ID' in self.request.GET :
self.progress_id = self.request.GET['X-Progress-ID']
elif 'X-Progress-ID' in self.request.META:
self.progress_id = self.request.META['X-Progress-ID']
if self.progress_id:
self.cache_key = "%s_%s" % (self.request.META['REMOTE_ADDR'], self.progress_id )
cache.set(self.cache_key, {
'length': self.content_length,
'uploaded' : 0
})
def new_file(self, field_name, file_name, content_type, content_length, charset=None):
pass
def receive_data_chunk(self, raw_data, start):
if self.cache_key:
data = cache.get(self.cache_key)
data['uploaded'] += self.chunk_size
cache.set(self.cache_key, data)
#cache.set(self.cache_key, 5000)
return raw_data
def file_complete(self, file_size):
pass
def upload_complete(self):
if self.cache_key:
cache.delete(self.cache_key)
Iam setting cache with uploadProgressCacheHandler. But when tries to retrieve via json request .The data returning None object.'cache_key' is generating correctly.
Please help.
I guess you are trying to make the JSON request when the flow already has reached upload_complete. At that point cache_key has been deleted from cache (that's why it is empty).
Are you using the Django development server? I think this built-in server only allows to handle one request at time, which in your situation means that first the upload is completed and following the JSON request is processed. You can try with another server (ex: Apache) able to handle multiple requests.