Regular expression to find function definitions in Python file - regex
I have a python project with function definitions written in CamelCase style.
I'm trying to write a script to convert them to snake_case style.
CaseFormatter class
import re
class CaseFormatter:
def __init__(self, file_directory):
self.first_cap_re = re.compile('(.)([A-Z][a-z]+)')
self.all_cap_re = re.compile('([a-z0-9])([A-Z])')
self.functions_dict = {}
self.file_directory = file_directory
self.file = open(file_directory, "r", encoding="UTF-8")
self.file_content = self.file.read()
self.names_dictionary = {}
def convert_camel_case_to_snake_case(self, name):
"""this function convert a camel case name to a snake case name """
s1 = self.first_cap_re.sub(r'\1_\2', name)
self.names_dictionary[name] = self.all_cap_re.sub(r'\1_\2', s1).lower()
def find_camel_case_functions(self):
"""this function finds camel case functions in a file """
s1 = re.findall("^\s*def (\S+)\s*\(\s*\S+\s*(?:,\s*\S+)*\):$", self.file_content)
return s1
def replace_functions(self):
# file_content = open(self.file_directory, "r", encoding="UTF-8").read()
self.file_content = self.file_content.replace(";", "")
for key, val in self.names_dictionary.items():
self.file_content = self.file_content.replace(key, val)
# print(self.file_content)
self.file = open(self.file_directory, "w", encoding="UTF-8")
print(self.file_content)
self.file.write(self.file_content)
testing the CaseFormatter class
import os
from CaseFormatter import *
walk_dir = 'some dirctory'
print('walk_dir = ' + walk_dir)
print('walk_dir (absolute) = ' + os.path.abspath(walk_dir))
for root, subDirs, files in os.walk(walk_dir):
print('--\nroot = ' + root)
for filename in files:
file_path = os.path.join(root, filename)
if filename.endswith('.py') and filename != "__init__.py":
print('\t- file %s (full path: %s)' % (filename, file_path))
case_formatter = CaseFormatter(file_path)
# print(case_formatter.find_camel_case_functions())
for function_name in case_formatter.find_camel_case_functions():
case_formatter.convert_camel_case_to_snake_case(function_name)
print(case_formatter.names_dictionary)
case_formatter.replace_functions()
I found the RegEx to find function definitions here.
When I tried it on my project it gave me no results, the RegEx didn't work as I think.
As an example of one of the files in the project:
class UnvoweledPattern(object):
String = ''
Rules = []
IDs = []
def __init__(self, string, rules, ids):
self.String = string
self.Rules = rules
self.IDs = ids
pass
def GetRootsStringsAndRules(self, string):
if (string == None):
string = self.String
rootStrings = []
rootRules = []
for j in range(len(self.Rules)):
rootRule = ''
rootString = ''
for k in range(len(self.Rules[j])):
rootRule += self.Rules[j][k]
if self.Rules[j][k].isdigit():
rootString += string[int(self.Rules[j][k]) - 1]
else:
rootString += self.Rules[j][k]
rootStrings.append(rootString)
rootRules.append(rootRule)
return [rootStrings, rootRules]
Related
\u0000 cannot be converted to text in django/postgreSQl
i have a project with django .on the host when i want to upload an image sometime error occurred(problem with specific images)! the below show how i resize uploaded images: def save_files_to_media(request, is_public=False, klass=None, conversation=None): from apps.file.models import File fs = FileSystemStorage() file_items = {} for data_item in request.data: file_match = re.search('^fileToUpload\[(\d+)\]$', data_item) if file_match and file_match.groups(): item_index = file_match.groups()[0] if item_index not in file_items: file_items[item_index] = {} file_items[item_index]['file_to_upload'] = request.data[data_item] else: optimize_match = re.search('^optimizeType\[(\d+)\]$', data_item) if optimize_match and optimize_match.groups(): item_index = optimize_match.groups()[0] if item_index not in file_items: file_items[item_index] = {} file_items[item_index]['optimize_type'] = request.data[data_item] files = [] for file_item_key in file_items: input_file = file_items[file_item_key]['file_to_upload'] # TODO: checking validation. if input_file.name is not exist optimize_type = file_items[file_item_key].get('optimize_type') file_uuid = str(uuid4()) if is_public: orig_filename, file_ext = splitext(basename(input_file.name)) directory_name = join(settings.MEDIA_ROOT, file_uuid) filename = file_uuid + file_ext else: directory_name = join(settings.MEDIA_ROOT, file_uuid) mkdir(directory_name) filename = input_file.name filepath = join(directory_name, filename) fs.save(filepath, input_file) is_optimized = False if optimize_type == 'image': is_success, filepath = image_optimizer(filepath) filename = basename(filepath) is_optimized = is_success file_obj = File( orig_name=filename, uuid=file_uuid, md5sum=get_md5sum(filepath), filesize=get_filesize(filepath), meta=get_meta_info(filepath), is_optimized=is_optimized, creator=request.user ) if is_public: file_obj.is_public = True else: file_obj.klass = klass file_obj.conversation = conversation file_obj.save() files.append(file_obj) return files here is the error i got with some images: unsupported Unicode escape sequence LINE 1: ..., 'ada90ead20f7994837dced344266cc51', 145216, '', '{"FileTyp... ^ DETAIL: \u0000 cannot be converted to text. CONTEXT: JSON data, line 1: ...ecTimeDigitized": 506779, "MakerNoteUnknownText": its funny that in my local but not in host. for more information i must tell you guys my postgreSQL version is 11.3 and host postgreSQl is 9.5.17 . where you think is problem? as error it's seems for postgreSQL. thank you
How do I limit pyqt4 clipboard just only print text or image path?
def tt(self): cb=QApplication.clipboard() data=cb.mimeData() #if data.hasImage(): #for path in data.urls(): #print path if data.hasText(): tex =unicode (data.text()) print tex if tex != "": r = QtCore.QStringList([]) for ct in tex: py = slug(ct, style=pypinyin.TONE, errors='ignore') if py != '': w = ct + '(' + py + ')' else: w = ct r.append(w) str = r.join("") self.ui.textEdit.setText(QtCore.QString(str)) I use python2.7 and pyqt4 to make something like Chinese characters to Pinyin. So when I copy string, it's fine, the job ding very well. but when I copy image, I just want only print its path . but tex still work, slug() will go error. how do I limit it.
You can use QMimeData.hasUrls() and QMimeData.urls(). The latter returns a list of QUrl objects (which are also used for file-paths): if data.hasUrls() or data.hasImage(): for url in data.urls(): filepath = unicode(url.toLocalFile()) print(filepath) elif data.hasText(): tex =unicode (data.text()) ... EDIT: Here is a test script to get clipboard information: import sys from PyQt4 import QtCore, QtGui class Window(QtGui.QWidget): def __init__(self): super(Window, self).__init__() self.button = QtGui.QPushButton('Get Clipboard Info', self) self.button.clicked.connect(self.handleButton) self.edit = QtGui.QTextEdit(self) layout = QtGui.QVBoxLayout(self) layout.addWidget(self.edit) layout.addWidget(self.button) def handleButton(self): cb = QtGui.QApplication.clipboard() data = cb.mimeData() output = [] if data.hasImage(): image = QtGui.QImage(data.imageData()) output.append('Image: size %s' % image.byteCount()) elif data.hasUrls(): output.append('Urls: count %s' % len(data.urls())) for url in data.urls(): filepath = unicode(url.toLocalFile()) output.append(' %s' % filepath) elif data.hasText(): output.append('Text: length %s' % len(data.text())) output.append('') output.append('Formats: count %s' % len(data.formats())) for fmt in data.formats(): output.append(' %s' % fmt) self.edit.setText('\n'.join(output)) if __name__ == '__main__': app = QtGui.QApplication(sys.argv) window = Window() window.setGeometry(600, 50, 300, 400) window.show() sys.exit(app.exec_())
Replace string before brace in Python-script
I modified jtaubers python script to convert betababel-code to the original greek letters. After some modifications (and help!) I got the script to run. # beta2unicode.py # # Version 2004-11-23 # # James Tauber # http://jtauber.com/ # # You are free to redistribute this, but please inform me of any errors # # USAGE: # # trie = beta2unicodeTrie() # beta = "LO/GOS\n"; # unicode, remainder = trie.convert(beta) # # - to get final sigma, string must end in \n # - remainder will contain rest of beta if not all can be converted class Trie: def __init__(self): self.root = [None, {}] def add(self, key, value): curr_node = self.root for ch in key: curr_node = curr_node[1].setdefault(ch, [None, {}]) curr_node[0] = value def find(self, key): curr_node = self.root for ch in key: try: curr_node = curr_node[1][ch] except KeyError: return None return curr_node[0] def findp(self, key): curr_node = self.root remainder = key for ch in key: try: curr_node = curr_node[1][ch] except KeyError: return (curr_node[0], remainder) remainder = remainder[1:] return (curr_node[0], remainder) def convert(self, keystring): valuestring = "" key = keystring while key: value, key = self.findp(key) if not value: return (valuestring, key) valuestring += value return (valuestring, key) def beta2unicodeTrie(): t = Trie() t.add("*A", u"\u0391") t.add("*B", u"\u0392") t.add("*G", u"\u0393") t.add("*D", u"\u0394") t.add("*E", u"\u0395") t.add("*Z", u"\u0396") t.add("*H", u"\u0397") t.add("*Q", u"\u0398") t.add("*I", u"\u0399") t.add("*K", u"\u039A") t.add("*L", u"\u039B") t.add("*M", u"\u039C") t.add("*N", u"\u039D") t.add("*C", u"\u039E") t.add("*O", u"\u039F") t.add("*P", u"\u03A0") t.add("*R", u"\u03A1") t.add("*S", u"\u03A3") t.add("*T", u"\u03A4") t.add("*U", u"\u03A5") t.add("*F", u"\u03A6") t.add("*X", u"\u03A7") t.add("*Y", u"\u03A8") t.add("*W", u"\u03A9") t.add("A", u"\u03B1") t.add("B", u"\u03B2") t.add("G", u"\u03B3") t.add("D", u"\u03B4") t.add("E", u"\u03B5") t.add("Z", u"\u03B6") t.add("H", u"\u03B7") t.add("Q", u"\u03B8") t.add("I", u"\u03B9") t.add("K", u"\u03BA") t.add("L", u"\u03BB") t.add("M", u"\u03BC") t.add("N", u"\u03BD") t.add("C", u"\u03BE") t.add("O", u"\u03BF") t.add("P", u"\u03C0") t.add("R", u"\u03C1") t.add("S\n", u"\u03C2") t.add("S,", u"\u03C2,") t.add("S.", u"\u03C2.") t.add("S:", u"\u03C2:") t.add("S}", u"\u03C2:") t.add("S;", u"\u03C2;") t.add("S]", u"\u03C2]") t.add("S#", u"\u03C2#") t.add("S_", u"\u03C2_") t.add("S", u"\u03C3") t.add("T", u"\u03C4") t.add("U", u"\u03C5") t.add("F", u"\u03C6") t.add("X", u"\u03C7") t.add("Y", u"\u03C8") t.add("W", u"\u03C9") t.add("I+", U"\u03CA") t.add("U+", U"\u03CB") t.add("A)", u"\u1F00") t.add("A(", u"\u1F01") t.add("A)\\", u"\u1F02") t.add("A(\\", u"\u1F03") t.add("A)!", u"\u1F02") t.add("A(!", u"\u1F03") t.add("A)/", u"\u1F04") t.add("A(/", u"\u1F05") t.add("E)", u"\u1F10") t.add("E(", u"\u1F11") t.add("E)\\", u"\u1F12") t.add("E(\\", u"\u1F13") t.add("E)!", u"\u1F12") t.add("E(!", u"\u1F13") t.add("E)/", u"\u1F14") t.add("E(/", u"\u1F15") t.add("H)", u"\u1F20") t.add("H(", u"\u1F21") t.add("H)\\", u"\u1F22") t.add("H(\\", u"\u1F23") t.add("H)!", u"\u1F22") t.add("H(!", u"\u1F23") t.add("H)/", u"\u1F24") t.add("H(/", u"\u1F25") t.add("I)", u"\u1F30") t.add("I(", u"\u1F31") t.add("I)\\", u"\u1F32") t.add("I(\\", u"\u1F33") t.add("I)!", u"\u1F32") t.add("I(!", u"\u1F33") t.add("I)/", u"\u1F34") t.add("I(/", u"\u1F35") t.add("O)", u"\u1F40") t.add("O(", u"\u1F41") t.add("O)\\", u"\u1F42") t.add("O(\\", u"\u1F43") t.add("O)!", u"\u1F42") t.add("O(!", u"\u1F43") t.add("O)/", u"\u1F44") t.add("O(/", u"\u1F45") t.add("U)", u"\u1F50") t.add("U(", u"\u1F51") t.add("U)\\", u"\u1F52") t.add("U(\\", u"\u1F53") t.add("U)!", u"\u1F52") t.add("U(!", u"\u1F53") t.add("U)/", u"\u1F54") t.add("U(/", u"\u1F55") t.add("W)", u"\u1F60") t.add("W(", u"\u1F61") t.add("W)\\", u"\u1F62") t.add("W(\\", u"\u1F63") t.add("W)!", u"\u1F62") t.add("W(!", u"\u1F63") t.add("W)/", u"\u1F64") t.add("W(/", u"\u1F65") t.add("A)=", u"\u1F06") t.add("A(=", u"\u1F07") t.add("H)=", u"\u1F26") t.add("H(=", u"\u1F27") t.add("I)=", u"\u1F36") t.add("I(=", u"\u1F37") t.add("U)=", u"\u1F56") t.add("U(=", u"\u1F57") t.add("W)=", u"\u1F66") t.add("W(=", u"\u1F67") t.add("*A)", u"\u1F08") t.add("*)A", u"\u1F08") t.add("*A(", u"\u1F09") t.add("*(A", u"\u1F09") # t.add("*(\A", u"\u1F0B") t.add("*A)/", u"\u1F0C") t.add("*)/A", u"\u1F0C") t.add("*A(/", u"\u1F0F") t.add("*(/A", u"\u1F0F") t.add("*E)", u"\u1F18") t.add("*)E", u"\u1F18") t.add("*E(", u"\u1F19") t.add("*(E", u"\u1F19") # t.add("*(\E", u"\u1F1B") t.add("*E)/", u"\u1F1C") t.add("*)/E", u"\u1F1C") t.add("*E(/", u"\u1F1D") t.add("*(/E", u"\u1F1D") t.add("*H)", u"\u1F28") t.add("*)H", u"\u1F28") t.add("*H(", u"\u1F29") t.add("*(H", u"\u1F29") t.add("*H)\\", u"\u1F2A") t.add(")\\*H", u"\u1F2A") t.add("*)\\H", u"\u1F2A") t.add("*H)!", u"\u1F2A") t.add(")!*H", u"\u1F2A") t.add("*)!H", u"\u1F2A") # t.add("*H)/", u"\u1F2C") t.add("*)/H", u"\u1F2C") # t.add("*)=H", u"\u1F2E") t.add("(/*H", u"\u1F2F") t.add("*(/H", u"\u1F2F") t.add("*I)", u"\u1F38") t.add("*)I", u"\u1F38") t.add("*I(", u"\u1F39") t.add("*(I", u"\u1F39") # # t.add("*I)/", u"\u1F3C") t.add("*)/I", u"\u1F3C") # # t.add("*I(/", u"\u1F3F") t.add("*(/I", u"\u1F3F") # t.add("*O)", u"\u1F48") t.add("*)O", u"\u1F48") t.add("*O(", u"\u1F49") t.add("*(O", u"\u1F49") # # t.add("*(\O", u"\u1F4B") t.add("*O)/", u"\u1F4C") t.add("*)/O", u"\u1F4C") t.add("*O(/", u"\u1F4F") t.add("*(/O", u"\u1F4F") # t.add("*U(", u"\u1F59") t.add("*(U", u"\u1F59") # t.add("*(/U", u"\u1F5D") # t.add("*(=U", u"\u1F5F") t.add("*W)", u"\u1F68") t.add("*W(", u"\u1F69") t.add("*(W", u"\u1F69") # # t.add("*W)/", u"\u1F6C") t.add("*)/W", u"\u1F6C") t.add("*W(/", u"\u1F6F") t.add("*(/W", u"\u1F6F") t.add("*A)=", u"\u1F0E") t.add("*)=A", u"\u1F0E") t.add("*A(=", u"\u1F0F") t.add("*W)=", u"\u1F6E") t.add("*)=W", u"\u1F6E") t.add("*W(=", u"\u1F6F") t.add("*(=W", u"\u1F6F") t.add("A\\", u"\u1F70") t.add("A!", u"\u1F70") t.add("A/", u"\u1F71") t.add("E\\", u"\u1F72") t.add("E!", u"\u1F72") t.add("E/", u"\u1F73") t.add("H\\", u"\u1F74") t.add("H!", u"\u1F74") t.add("H/", u"\u1F75") t.add("I\\", u"\u1F76") t.add("I!", u"\u1F76") t.add("I/", u"\u1F77") t.add("O\\", u"\u1F78") t.add("O!", u"\u1F78") t.add("O/", u"\u1F79") t.add("U\\", u"\u1F7A") t.add("U!", u"\u1F7A") t.add("U/", u"\u1F7B") t.add("W\\", u"\u1F7C") t.add("W!", u"\u1F7C") t.add("W/", u"\u1F7D") t.add("A)/|", u"\u1F84") t.add("A(/|", u"\u1F85") t.add("H)|", u"\u1F90") t.add("H(|", u"\u1F91") t.add("H)/|", u"\u1F94") t.add("H)=|", u"\u1F96") t.add("H(=|", u"\u1F97") t.add("W)|", u"\u1FA0") t.add("W(=|", u"\u1FA7") t.add("A=", u"\u1FB6") t.add("H=", u"\u1FC6") t.add("I=", u"\u1FD6") t.add("U=", u"\u1FE6") t.add("W=", u"\u1FF6") t.add("I\\+", u"\u1FD2") t.add("I!+", u"\u1FD2") t.add("I/+", u"\u1FD3") t.add("I+/", u"\u1FD3") t.add("U\\+", u"\u1FE2") t.add("U!+", u"\u1FE2") t.add("U/+", u"\u1FE3") t.add("A|", u"\u1FB3") t.add("A/|", u"\u1FB4") t.add("H|", u"\u1FC3") t.add("H/|", u"\u1FC4") t.add("W|", u"\u1FF3") t.add("W|/", u"\u1FF4") t.add("W/|", u"\u1FF4") t.add("A=|", u"\u1FB7") t.add("H=|", u"\u1FC7") t.add("W=|", u"\u1FF7") t.add("R(", u"\u1FE4") t.add("*R(", u"\u1FEC") t.add("*(R", u"\u1FEC") # t.add("~", u"~") # t.add("-", u"-") # t.add("(null)", u"(null)") # t.add("&", "&") t.add("0", u"0") t.add("1", u"1") t.add("2", u"2") t.add("3", u"3") t.add("4", u"4") t.add("5", u"5") t.add("6", u"6") t.add("7", u"7") t.add("8", u"8") t.add("9", u"9") t.add("#", u"#") t.add("$", u"$") t.add(" ", u" ") t.add(".", u".") t.add(",", u",") t.add("'", u"'") t.add(":", u":") t.add(";", u";") t.add("_", u"_") t.add("[", u"[") t.add("]", u"]") t.add("\n", u"") return t import sys t = beta2unicodeTrie() import re BCODE = re.compile(r'\\bcode{[^}]*}') for line in open(sys.argv[1]): matches = BCODE.search(line) for match in BCODE.findall(line): bcode = match[7:-1] a, b = t.convert(bcode.upper()) if b: raise IOError("failed conversion '%s' in '%s'" % (b, line)) converted = a.encode("utf-8") line = line.replace(match, converted) print(line.rstrip()) There is one thing left though. The final sigma "ς" gets not converted like this when it stands at the end of the \bcode{}-Makro. For example here: \bcode{ei)=dos} The script converts it to the normal sigma "σ" Unicode: U+03C3 How do I make the script recognize to convert the "s" when it stands right before the end-brace to the Unicode U+03C2? This: t.add("S}", u"\u03C2}") does not do the trick
Using Python Tkinter .config() method
I am trying to use the Python Tkinter .config() method to update some message text. I can't get it to work. What might I be doing wrong (see the update_message method): #!/usr/bin/python import alsaaudio as aa import audioop import Tkinter as tk import tkFont import threading import Queue # styles BACKROUND_COLOR = '#000000' TYPEFACE = 'Unit-Bold' FONT_SIZE = 50 TEXT_COLOR = '#777777' TEXTBOX_WIDTH = 400 # text TITLE = 'listen closely' SCORE_MESSAGE = 'your score:\n ' END_MESSAGE = 'too loud!\ntry again' # configuration DEVICE = 'hw:1' # hardware sound card index CHANNELS = 1 SAMPLE_RATE = 8000 # Hz // 44100 PERIOD = 256 # Frames // 256 FORMAT = aa.PCM_FORMAT_S8 # Sound format NOISE_THRESHOLD = 3 class Display(object): def __init__(self, parent, queue): self.parent = parent self.queue = queue self._geom = '200x200+0+0' parent.geometry("{0}x{1}+0+0".format( parent.winfo_screenwidth(), parent.winfo_screenheight())) parent.overrideredirect(1) parent.title(TITLE) parent.configure(background=BACKROUND_COLOR) parent.displayFont = tkFont.Font(family=TYPEFACE, size=FONT_SIZE) self.process_queue() def process_queue(self): try: score = self.queue.get(0) self.print_message(score) except Queue.Empty: pass self.parent.after(100, self.update_queue) def update_queue(self): try: score = self.queue.get(0) self.update_message(score) except Queue.Empty: pass self.parent.after(100, self.update_queue) def print_message(self, messageString): print 'message', messageString displayString = SCORE_MESSAGE + str(messageString) self.message = tk.Message( self.parent, text=displayString, bg=BACKROUND_COLOR, font=self.parent.displayFont, fg=TEXT_COLOR, width=TEXTBOX_WIDTH, justify="c") self.message.place(relx=.5, rely=.5, anchor="c") def update_message(self, messageString): print 'message', messageString displayString = SCORE_MESSAGE + str(messageString) self.message.config(text=displayString) def setup_audio(queue, stop_event): data_in = aa.PCM(aa.PCM_CAPTURE, aa.PCM_NONBLOCK, 'hw:1') data_in.setchannels(2) data_in.setrate(44100) data_in.setformat(aa.PCM_FORMAT_S16_LE) data_in.setperiodsize(256) while not stop_event.is_set(): # Read data from device l, data = data_in.read() if l: # catch frame error try: max_vol = audioop.rms(data, 2) scaled_vol = max_vol // 4680 print scaled_vol if scaled_vol <= 3: # Too quiet, ignore continue queue.put(scaled_vol) except audioop.error, e: if e.message != "not a whole number of frames": raise e def main(): root = tk.Tk() queue = Queue.Queue() window = Display(root, queue) stop_event = threading.Event() audio_thread = threading.Thread(target=setup_audio, args=[queue, stop_event]) audio_thread.start() try: root.mainloop() finally: stop_event.set() audio_thread.join() pass if __name__ == '__main__': main() I don't want to be laying down a new message every time I update. If the .config() doesn't work, is there another method to update the text configuration of the message?
I would use string variables, first create your string variable then set it to want you want it to display at the start next make your object and in text put the sting variable then when you want to change the text in the object change the string variable. self.messaget = StringVar() self.messaget.set("") self.message = tk.Message( self.parent, textvariable=self.messaget, bg=BACKROUND_COLOR, font=self.parent.displayFont, fg=TEXT_COLOR, width=TEXTBOX_WIDTH, justify="c").grid() #note renember to palce the object after you have created it either using #.grid(row = , column =) or .pack() #note that it is textvariable instead of text if you put text instead it will run but #but will show PY_Var instead of the value of the variable edit to change the text without recreating the object you do the name of the string variable you have used and .set self.messaget.set("hi")
django - HTML to PDF in Indian languages with pisa
I'm converting a HTML file into PDF in Django using Pisa. It is working when the content is only in English. But here the content will be in English and five other Indian languages(Tamil, Hindi, Telugu, Malayalam, and Kannada). I have given my code below. views.py def render_to_pdf1(template_src, context_dict): template = get_template(template_src) context = Context(context_dict) html = template.render(context) result = StringIO.StringIO() pdf = pisa.pisaDocument(StringIO.StringIO(html.encode('UTF-8')), result) return result.getvalue() def print_pdf(request): message = Message.objects.get(id = 1) html_table_string = '' html_table_string += '%s' % message.english html_table_string += '%s' % message.tamil html_table_string += '%s' % message.hindi html_table_string += '%s' % message.telugu html_table_string += '%s' % message.kannada html_table_string += '%s' % message.malayalam fileread = str(settings.TEMPLATE_DIRS[0])+str('/base_file.html') fr = open(fileread, "r").read() fr = fr.replace('message_content', html_table_string) result = StringIO.StringIO() filewrite = str(settings.TEMPLATE_DIRS[0]) + str('/temp_file.html') empty = "" fw = open(filewrite, 'w') fw.write(empty) fw.write(fr) fw.close() pdf_contents = render_to_pdf1('temp_file.html',result) file_to_be_saved = ContentFile(pdf_contents) name = (str(request.user.email) + ".pdf").replace("#", '') pdf = Pdf.objects.create(name = name, user = request.user, created_by = request.user) pdf.name.save(name ,file_to_be_saved) file_path = Pdf.objects.get(user = request.user).name pdf_file = str(file_path).split("media")[1] return HttpResponseRedirect('/site_media' + pdf_file) Here what I'm doing is: Having a base template base_file.html. Getting the message object by ID (ID will be dynamically supplied). Then replacing the message_content with current content. Writing it in a file temp_file.html. Converting temp_file.html into a PDF file. The converted PDF will be containing the message in English, Tamil, Hindi, Telugu, Kannada, and Malayalam. But I couldn't write the other language contents in the HTML file and couldn't convert it. The error I'm getting is 'ascii' codec can't encode characters in position 1066-1075: ordinal not in range(128) and occurs in the line fw.write(fr). So how can I achieve this? I want to print the PDF file with content in all these languages.