Is there any way to create excel objects using pywin32 even if MS-OFFICE suite is not installed on windows based system ?
This will do what you want.
from openpyxl import Workbook
wb = Workbook()
# grab the active worksheet
ws = wb.active
# Data can be assigned directly to cells
ws['A1'] = 42
# Rows can also be appended
ws.append([1, 2, 3])
# Python types will automatically be converted
import datetime
ws['A2'] = datetime.datetime.now()
# Save the file
wb.save("C:\\Users\\your_path_here\\Desktop\\sample.xlsx")
See this link for details.
https://pypi.python.org/pypi/openpyxl
Related
I'm using the psychopg2 module to make queries against QuestDB from Python. I have had some trouble using the copy_from() cursor object to get CSV data into a table. What's the best way to get this into the database?
I'm trying the following:
import pandas as pd
import numpy as np
import psycopg2
import os
conn = psycopg2.connect(user="admin",
password="quest",
host="127.0.0.1",
port="8812",
database="qdb")
cursor = conn.cursor()
dest_table = "eur_fr_bulk"
temp_dataframe = "./temp_dataframe.csv"
# input
df = pd.read_csv("./data/eur_fr.csv")
df.to_csv(temp_dataframe, index_label='id', header=False)
f = open(temp_dataframe, 'r')
cursor = conn.cursor()
try:
cursor.copy_from(f, dest_table)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
os.remove(temp_dataframe)
print("Error: %s" % error)
conn.rollback()
cursor.close()
cursor.close()
The copy_from() wrapper in psychopg2 is executing some SQL in the background that's not yet supported in QuestDB as of yet, specifically, it will run
COPY my_table FROM stdin WITH DELIMITER AS ' ' NULL AS '\\N'
The DELIMITER keyword is not yet implemented. As a workaround, you can either make the request via HTTP in python, which might be the most convenient:
import requests
csv = {'data': ('my_table_import', open('./data/eur_fr.csv', 'r'))}
server = 'http://localhost:9000/imp'
response = requests.post(server, files=csv)
print(response.text)
or you can specify a copy directory in the server.conf file which allows loading CSV files. This is documented on the COPY documentation page.
I'm trying to speed up a process of webscraping by sending raw data to python in lieu of correctly formatted data.
Current data is received as an excel file with data formatted as:
26 EXAMPLE RD EXAMPLEVILLE SA 5000
Data is formatted in excel via macros to:
Replace all spaces with hyphen
Change all text to lower-case
Paste text onto end of http://example.com/property/
Formatted data is http://www.example.com/property/26-example-rd-exampleville-sa-5000
What i'm trying to accomplish:
Get python to go into excel sheet and follow formatting rules listed above, then pass the records to the scraper.
Here is the code I have been trying to compile - please go easy i am VERY new.
Any advice or reading sources related to python formatting would be appreciated.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import requests
import csv
from lxml import html
import xlrd
# URL_BUILDER
# Source File for UNFORMATTED DATA
file_location = "C:\Python27\Projects\REA_SCRAPER\NewScraper\ScrapeFile.xlsx"
workbook = xlrd.open_workbook(file_location)
sheet = workbook.sheet_by_name('((PythonScraperDNC))')
# REA_SCRAPER
# Pass Data from URL_BUILDER to URL_LIST []
URL_LIST = []
# Search Phrase to capture suitable URL's for Scraping
text2search = \
'''<p class="property-value__title">
RECENTLY SOLD
</p>'''
# Write Sales .CSV file
with open('Results.csv', 'wb') as csv_file:
writer = csv.writer(csv_file)
for (index, url) in enumerate(URL_LIST):
page = requests.get(url)
print '<Scanning Url For Sale>'
if text2search in page.text:
tree = html.fromstring(page.content)
(title, ) = (x.text_content() for x in tree.xpath('//title'))
(price, ) = (x.text_content() for x in tree.xpath('//div[#class="property-value__price"]'))
(sold, ) = (x.text_content().strip() for x intree.xpath('//p[#class="property-value__agent"]'))
writer.writerow([title, price, sold])
else:
writer.writerow(['No Sale'])
If you're just trying to figure out how to do the formatting in Python:
text = '26 EXAMPLE RD EXAMPLEVILLE SA 5000'
url = 'http://example.com/property/' + text.replace(' ', '-').lower()
print(url)
# Output:
# http://example.com/property/26-example-rd-exampleville-sa-5000
right so I'm working on a Python script (Python 2.7) that will extract the metadata from OLE files. I am using OleFileIO_PL and it work perfectly file with OLE files 97 - 2003, but any later then that it just says that it is not an OLE2 file type.
Any way I can modify my code to support both .doc and .docx ? Same with .ppt and .pptx etc.
Thank you in advance
Source Code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import OleFileIO_PL
import StringIO
import optparse
import sys
import os
def printMetadata(fileName):
data = open(fileName, 'rb').read()
f = StringIO.StringIO(data)
OLEFile = OleFileIO_PL.OleFileIO(f)
meta = OLEFile.get_metadata()
print('Author:', meta.author)
print('Title:', meta.title)
print('Creation date:', meta.create_time)
meta.dump()
OLEFile.close()
def main():
parser = optparse.OptionParser('usage = -F + Name of the OLE file with the extention For example: python Ms Office Metadata Extraction Script.py -F myfile.docx ')
parser.add_option('-F', dest='fileName', type='string',\
help='specify OLE (MS Office) file name')
(options, args) = parser.parse_args()
fileName = options.fileName
if fileName == None:
print parser.usage
exit(0)
else:
printMetadata(fileName)
if __name__ == '__main__':
main()
To answer your question, this is because the newer MS Office 2007+ files (docx, xlsx, xlsb, pptx, etc) have a completely different structure from the legacy MS Office 97-2003 formats.
It is mainly a collection of XML files within a Zip archive. So with a little bit of work, you can extract everything you need using zipfile and ElementTree from the standard library.
If openxmllib does not work for you, you may try other solutions:
officedissector: https://www.officedissector.com/
python-opc: https://pypi.python.org/pypi/python-opc
openpack: https://pypi.python.org/pypi/openpack
paradocx: https://pypi.python.org/pypi/paradocx
BTW, OleFileIO_PL has been renamed to olefile, and the new project page is https://github.com/decalage2/olefile
I was looking at the answer given here How do I call an Excel macro from Python using xlwings?. I implemented that solution, but testing about the possibilities of calling a VBA I was wondering if would be possible to call a VBA function that uses RunPython from my python script.
To illustrate this I have three files in the folder
-- myproject.xlsm
-- myproject.py
-- Hello_World.py
VBA in myproject.xlsm
Sub SampleCall()
RunPython ("import myproject; myproject.xl_main()")
End Sub
Sub Hello()
RunPython ("import Hello_World; Hello_World.xl_test()")
End Sub
Sub Message()
MsgBox ("ok")
End Sub
myproject.py
from xlwings import Workbook, Range, Application
import os
import sys
def xl_main():
# Create a WorkBook Object
wb = Workbook('myproject.xlsm')
Range('B1').value = 17
#Call a VBA Function
Application(wb).xl_app.Run("Message")
Application(wb).xl_app.Run("Hello")
if __name__ == "__main__":
if not hasattr(sys, 'frozen'):
# The next two lines are here to run the example from Python
# Ignore them when called in the frozen/standalone version
#TODO: Change the name of excel file
path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'myproject.xlsm'))
Workbook.set_mock_caller(path)
xl_main()
Hello_World.py
import xlwings as xw
from xlwings import Workbook, Range
import os
def xl_test():
# Create a WorkBook Object
wb = xw.Workbook.caller()
Range('A1').value = "Hello World"
if __name__ == '__main__':
# To run from Python, not needed when called from Excel.
# Expects the Excel file next to this source file, adjust accordingly.
path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'myproject.xlsm'))
xw.Workbook.set_mock_caller(path)
xl_test()
The problem:
The problem is that at the time I run the Hello_World.py script It works well, but when I executed the SampleCall() Sub in VBA it crash and gives a blank error.
you can do something like the following
xlwings - python code
wb.xl_workbook.Application.Run("someFunction", param1)
vba fucntion
Public Sub someFunction(param1)
...
End Sub
I've read though the other stack overflow questions regarding this but it doesn't answer my issue, so down vote away. Its version 2.7.
All I want to do is use python to convert a PDF to a Word doc. At minimum convert to text so I can copy and paste into a word doc.
This is the code I have so far. All it prints is the female gender symbol.
Is my code wrong? Am I approaching this wrong? Do some PDFs just not work with PDFMiner? Do you know of any other alternatives to accomplish my goal of converting a PDF to Word, besides using PyPDF2 or PDFMiner?
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from cStringIO import StringIO
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = file('Bottom Dec.pdf', 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)
text = retstr.getvalue()
fp.close()
device.close()
retstr.close()
return text
print convert_pdf_to_txt(1)
from pdf2docx import Converter
pdf_file = 'E:\Muhammad UMER LAR.pdf'
doc_file= 'E:\Lari.docx'
c=Converter(pdf_file)
c.convert(doc_file)
c.close()
Another alternative solution is Aspose.Words Cloud SDK for Python, you can install it from pip for PDF to DOC conversion.
import asposewordscloud
import asposewordscloud.models.requests
api_client = asposewordscloud.ApiClient()
api_client.configuration.host = 'https://api.aspose.cloud'
# Get AppKey and AppSID from https://dashboard.aspose.cloud/
api_client.configuration.api_key['api_key'] = 'xxxxxxxxxxxxxxxxxxxxx' # Put your appKey here
api_client.configuration.api_key['app_sid'] = 'xxxxxxxxx-xxxx-xxxxx-xxxx-xxxxxxxxxx' # Put your appSid here
words_api = asposewordscloud.WordsApi(api_client)
filename = '02_pages.pdf'
remote_name = 'TestPostDocumentSaveAs.pdf'
dest_name = 'TestPostDocumentSaveAs.doc'
#upload PDF file to storage
request_stoarge = asposewordscloud.models.requests.UploadFileRequest(filename,remote_name)
response = words_api.upload_file(request_stoarge)
#Convert PDF to DOC and save to storage
save_options = asposewordscloud.SaveOptionsData(save_format='doc', file_name=dest_name)
request = asposewordscloud.models.requests.SaveAsRequest(remote_name, save_options)
result = words_api.save_as(request)
print("Result {}".format(result))
I'm developer evangelist at Aspose.