I wrote an executable example - you can test it. When you start this program you will get three QPushButton()-objects and one QLineEdit()-object. There you can install or deinstall/uninstall the event filter or close the application. Please install the event filter and type a text. You will see what I want. I want the example program to protect the space key. In this current version the user can't press the space key more than 2 times. This program does work.
But I have a little problem. When I write a text in the QLineEdit()-object and then I highlight the text and I press the delete or return key, nothing happens. I am not able to delete the text. I am also not able to copy the marked text.
Whats wrong with the code below?
#!/usr/bin/env python
import sys
from PyQt4.QtCore import QEvent, Qt
from PyQt4.QtGui import QMainWindow, QWidget, QApplication, QVBoxLayout, QLineEdit, QPushButton
class Window(QMainWindow):
def __init__(self, parent=None):
QMainWindow.__init__(self, parent)
self.count_space_pressed = 0
self.current_pos = None
self.init_ui()
self.init_signal_slot_push_button()
def init_ui(self):
centralwidget = QWidget(self)
self.input_line_edit = QLineEdit(self)
self.close_push = QPushButton(self)
self.close_push.setEnabled(False)
self.close_push.setText("Close")
self.push_install = QPushButton(self)
self.push_install.setText("Install eventFilter")
self.push_deinstall = QPushButton(self)
self.push_deinstall.setText("Deinstall eventFilter")
layout = QVBoxLayout(centralwidget)
layout.addWidget(self.input_line_edit)
layout.addWidget(self.push_install)
layout.addWidget(self.push_deinstall)
layout.addWidget(self.close_push)
self.setCentralWidget(centralwidget)
return
def install_filter_event(self, widget_object):
widget_object.installEventFilter(self)
return
def deinstall_filter_event(self, widget_object):
widget_object.removeEventFilter(self)
return
def init_signal_slot_push_button(self):
self.close_push.clicked.connect(self.close)
self.push_install.clicked.connect(lambda: self.install_filter_event(self.input_line_edit))
self.push_deinstall.clicked.connect(lambda: self.deinstall_filter_event(self.input_line_edit))
return
def strip_string(self, content, site=None):
if site == "right":
return content.rstrip()
elif site == "right_left":
return content.strip()
elif site == "left":
return content.lstrip()
def eventFilter(self, received_object, event):
content_line_edit = unicode(received_object.text())
if event.type() == QEvent.KeyPress:
if event.key() == Qt.Key_Space:
'''
Yes, the user did press the Space-Key. We
count how often he pressed the space key.
'''
self.count_space_pressed = self.count_space_pressed + 1
if int(self.count_space_pressed) > 1:
'''
The user did press the space key more than 1 time.
'''
self.close_push.setEnabled(False)
'''
Now we know the user did press the
space key more than 1 time. We take a look,
if variablenamed (sel.current_pos) is None.
That means, no current position is saved.
'''
if self.current_pos is None:
'''
Well no current position is saved,
that why we save the new position anf
then we set the position of the cursor.
'''
self.current_pos = received_object.cursorPosition()
received_object.setCursorPosition(int(self.current_pos))
received_object.clear()
received_object.setText(self.strip_string(content_line_edit, site="right"))
else:
'''
Well the user press the space key again, for
example 3, 4, 5, 6 times we want to keep the
old position of the cursor until he press
no space key.
'''
received_object.setCursorPosition(int(self.current_pos))
'''
We have to remove all spaces in a string
on the right side and set the content on QLineEdit-widget.
'''
received_object.clear()
received_object.setText(self.strip_string(content_line_edit, site="right"))
else: pass
else:
'''
No the user didn't press the space key.
So we set all setting on default.
'''
self.close_push.setEnabled(True)
self.current_pos = None
self.count_space_pressed = 0
received_object.clear()
received_object.setText(self.strip_string(content_line_edit, site="left"))
# Call Base Class Method to Continue Normal Event Processing
return QMainWindow.eventFilter(self, received_object, event)
if __name__ == '__main__':
app = QApplication(sys.argv)
window = Window()
window.show()
app.exec_()
EDIT:
import sys, re
from PyQt4 import QtCore, QtGui
class Window(QtGui.QWidget):
def __init__(self):
super(Window, self).__init__()
self.edit = QtGui.QLineEdit(self)
self.edit.textChanged.connect(self.handleTextChanged)
layout = QtGui.QVBoxLayout(self)
layout.addWidget(self.edit)
# First we save the the regular expression pattern
# in a variable named regex.
## This means: one whitespace character, followed by
## one or more whitespaces chatacters
regex = r"\s\s+"
# Now we comple the pattern.
# After then we save the compiled patter
# as result in a variable named compiled_re.
self.compiled_re = re.compile(regex)
def handleTextChanged(self, text):
# When the text of a widget-object is changed,
# we do something.
# Here I am really not sure.
# Do you want to look if the given text isn't empty?
## No, we want to search the string to see if it
## contains any runs of multiple spaces
if self.compiled_re.search(text):
# We know that given text is a QString-object.
# So we have to convert the given text
# into a python-string, because we want to work
# with them in python.
text = unicode(text)
# NOTICE: Do replacements before and after cursor pos
# We save the current and correct cursor position
# of a QLineEdit()-object in the variable named pos.
pos = self.edit.cursorPosition()
# Search and Replace: Here the sub()-method
# replaces all occurrences of the RE pattern
# in string with text.
# And then it returns modified string and saves
# it in the variables prefix and suffix.
# BUT I am not sure If I understand this: [:pos]
# and [pos:]. I will try to understnand.
# I think we are talking about slicing, right?
# And I think the slicing works like string[start:end]:
# So text[:pos] means, search and replace all whitesapce
# at the end of the text-string. And the same again, but
# text[pos:] means, search and replace all whitesapce
# at the start of the string-text.
## Right, but the wrong way round. text[:pos] means from
## the start of the string up to pos (the prefix); and
## text[pos:] means from pos up to the end of the string
## (the suffix)
prefix = self.compiled_re.sub(' ', text[:pos])
suffix = self.compiled_re.sub(' ', text[pos:])
# NOTICE: Cursor might be between spaces
# Now we take a look if the variable prefix ends
# with a whitespace and we check if suffix starts
# with a whitespace.
# BUT, why we do that?
## Imagine that the string is "A |B C" (with the cursor
## shown as "|"). If "B" is deleted, we will get "A | C"
## with the cursor left between multiple spaces. But
## when the string is split into prefix and suffix,
## each part will contain only *one* space, so the
## regexp won't replace them.
if prefix.endswith(' ') and suffix.startswith(' '):
# Yes its True, so we overwrite the variable named
# suffix and slice it. suffix[1:] means, we starts
# at 1 until open end.
## This removes the extra space at the start of the
## suffix that was missed by the regexp (see above)
suffix = suffix[1:]
# Now we have to set the text of the QLineEdit()-object,
# so we put the both varialbes named prefix and suffix
# together.
self.edit.setText(prefix + suffix)
# After then, we have to set the cursor position.
# I know that the len()-method returns the length of the
# variable named prefix.
# BUT why we have to do that?
## When the text is set, it will clear the cursor. The
## prefix and suffix gives the text before and after the
## old cursor position. Removing spaces may have shifted
## the old position, so the new postion is calculated
## from the length of the current prefix
self.edit.setCursorPosition(len(prefix))
if __name__ == '__main__':
app = QtGui.QApplication(sys.argv)
window = Window()
window.setGeometry(500, 150, 300, 100)
window.show()
sys.exit(app.exec_())
EDIT 2:
Two question:
First Question: in the if.condition, where we take a look if prefix ends and suffix starts with sapces, there we are about to remove the extra space at the start of the suffix. But why don't we also remove the extra space at start of the prefix?
Imagine: The user types " Prefix and Suffix " - with extra whitespaces at start and end. Don't we have to remove the extra space at start of the prefix - like:
prefix= prefix[:1]?
Second Question: At the end of the handleTextChanged()-method, we have to calculate the new position of the cursor. In the current case we use prefix to get the length of the string. Why not the len from the new modified text, that is a part from prefix and suffix?
Example: The old string is " Prefix and Suffix ", the user removes the word 'and". Now our string looks like " Prefix | Suffix ". After all whitespaces are removed we get the new modified text: "Prefix Suffix". Why don't we calculate the new position from the modified text? Or did I miss something?
EDIT 3:
I am sorry, I still don't understand the situation.
First situation: When the user types the following string: "A B C |" (| it is shown as cursor). Now the user presses the space key more than 2 times, we get a prefix that contains "A B C |" - and no suffix. And currently the length of the prexis is 6 - suffix has no lenght, because its empty. And the whole word is length 6. The current position of the cursor is 7.
Second situation: The user types "A B D E F |". And now he is realizing that a letter is missing: C. He moves his cursor back between B and D and types C and then he is about to press the space key 2 times. Now we have prefix that contains "A B C " and suffix which content "D E F". The length of prefix is 6 and of suffix is 5. The length of the whole word is 11. And in this moment the current position of the cursor is 7. In this situation you take the length of prefix and set the cursor position, right?
Filtering key-presses is not enough if you really want to prevent multiple spaces.
For instance, the user can simply drag and drop multiple spaces; or paste them either with the mouse, the built-in context menu, or with the standard keyboard shortcuts.
It's also very easy to break your space-key counting method: for example, just type A B C then move back two places and delete B!
A much more robust way to do this is to connect to the textChanged signal and use a regexp to check if there's any multiple spaces. If there are, use the same regexp to replace them, and then restore the cursor to it's original position.
Here's a demo:
import sys, re
from PyQt4 import QtCore, QtGui
class Window(QtGui.QWidget):
def __init__(self):
super(Window, self).__init__()
self.edit = QtGui.QLineEdit(self)
self.edit.textChanged.connect(self.handleTextChanged)
layout = QtGui.QVBoxLayout(self)
layout.addWidget(self.edit)
self.regexp = re.compile(r'\s\s+')
def handleTextChanged(self, text):
if self.regexp.search(text):
text = unicode(text)
# do replacements before and after cursor pos
pos = self.edit.cursorPosition()
prefix = self.regexp.sub(' ', text[:pos])
suffix = self.regexp.sub(' ', text[pos:])
# cursor might be between spaces
if prefix.endswith(' ') and suffix.startswith(' '):
suffix = suffix[1:]
self.edit.setText(prefix + suffix)
self.edit.setCursorPosition(len(prefix))
if __name__ == '__main__':
app = QtGui.QApplication(sys.argv)
window = Window()
window.setGeometry(500, 150, 300, 100)
window.show()
sys.exit(app.exec_())
if you are using python and you have created button for removing the last character, do the following
self.PB_Back.clicked.connect(self.Keypad_Back)
def Keypad_Back(self):
self.LE_Edit.setText(self.LE_Edit.text()[:-1])
this will remove last character one at a time
to delete all the character at once, do the following
self.PB_DeleteResult.clicked.connect(self.Keypad_DeleteResult)
def Keypad_DeleteResult(self):
self.LE_Edit.setText("")
Related
Hello i can only seem to print letters from an entry not the full entry (sentence).
I simply want to click a button using tkinter and after typing a sentence into a box, the button will store the sentences in a list.
Then I want to create a second button that will then print a random sentence from that list.
When I try this it only prints letters of the stored sentence.
Any ideas would be greatly appreciated as I’ve looked around for the last two days before asking.
Best wishes to you all
from tkinter import *
import random
def super_function():
out = map(Entry.get, entr)
clear_entry_1()
def clear_entry_1():
Entry_field.delete(0,END)
def super_function_2():
print(random.choice(entr))
root = Tk()
root.geometry('400x400')
entr = []
for i in range(10):
entr.append(Entry(root))
Entry_field = Entry()
Entry_field.pack()
Button1 = Button(root, text = 'Add your idea!', command =
super_function)
Button1.pack()
Button2 = Button(root, text='Generate an idea!',
command=super_function_2)
Button2.pack()
root.mainloop()
The for loop is useless and should be removed as it just appends 10 invisible entries.
What you need is to append the sentence inside super_function() instead:
def super_function():
# save the sentence
entr.append(Entry_field.get())
clear_entry_1()
I am trying to populate a Tkinter combobox with pre-defined values to select from. It is populating and I am able to type in and get suggestions. However, in order to do this I have to definitely know the first few characters. If I know some text in the middle or end of the string, its of no use because the combobox does only a 'LIKE%' search and not a '%LIKE%' search.
Expected Output (Typing the word "Ceramic" fetches all names containing the string. Note: This is not a Tkinter screenshot):
This is my adaptation of the code till now, if anyone can suggest how to modify the AutocompleteCombobox class to do a LIKE search, it would be great.
The below working piece of code, as an example, has values "Cranberry" and "Strawberry" , my requirement is to type "berry" and get suggestions of both fruits.
import Tkinter
import ttk
import sqlite3
class AutocompleteCombobox(ttk.Combobox):
def set_completion_list(self, completion_list):
"""Use our completion list as our drop down selection menu, arrows move through menu."""
self._completion_list = sorted(completion_list, key=str.lower) # Work with a sorted list
self._hits = []
self._hit_index = 0
self.position = 0
self.bind('<KeyRelease>', self.handle_keyrelease)
self['values'] = self._completion_list # Setup our popup menu
def autocomplete(self, delta=0):
"""autocomplete the Combobox, delta may be 0/1/-1 to cycle through possible hits"""
if delta: # need to delete selection otherwise we would fix the current position
self.delete(self.position, Tkinter.END)
else: # set position to end so selection starts where textentry ended
self.position = len(self.get())
# collect hits
_hits = []
for element in self._completion_list:
if element.lower().startswith(self.get().lower()): # Match case insensitively
_hits.append(element)
# if we have a new hit list, keep this in mind
if _hits != self._hits:
self._hit_index = 0
self._hits=_hits
# only allow cycling if we are in a known hit list
if _hits == self._hits and self._hits:
self._hit_index = (self._hit_index + delta) % len(self._hits)
# now finally perform the auto completion
if self._hits:
self.delete(0,Tkinter.END)
self.insert(0,self._hits[self._hit_index])
self.select_range(self.position,Tkinter.END)
def handle_keyrelease(self, event):
"""event handler for the keyrelease event on this widget"""
if event.keysym == "BackSpace":
self.delete(self.index(Tkinter.INSERT), Tkinter.END)
self.position = self.index(Tkinter.END)
if event.keysym == "Left":
if self.position < self.index(Tkinter.END): # delete the selection
self.delete(self.position, Tkinter.END)
else:
self.position = self.position-1 # delete one character
self.delete(self.position, Tkinter.END)
if event.keysym == "Right":
self.position = self.index(Tkinter.END) # go to end (no selection)
if len(event.keysym) == 1:
self.autocomplete()
# No need for up/down, we'll jump to the popup
# list at the position of the autocompletion
def test(test_list):
"""Run a mini application to test the AutocompleteEntry Widget."""
root = Tkinter.Tk(className='AutocompleteCombobox')
combo = AutocompleteCombobox(root)
combo.set_completion_list(test_list)
combo.pack()
combo.focus_set()
# I used a tiling WM with no controls, added a shortcut to quit
root.bind('<Control-Q>', lambda event=None: root.destroy())
root.bind('<Control-q>', lambda event=None: root.destroy())
root.mainloop()
if __name__ == '__main__':
test_list = ('apple', 'banana', 'Cranberry', 'dogwood', 'alpha', 'Acorn', 'Anise', 'Strawberry' )
test(test_list)
I suspect you need
if self.get().lower() in element.lower():
instead of
if element.lower().startswith(self.get().lower()):
to get data like with %LIKE% in database
But I don't know if you get good effect because this Combobox replaces text with suggestion so if you type be then it finds Cranberry and put in place be and you can't write ber.
Maybe you should display Cranberry as separated (dropdown) list, or popup tip.
Or maybe you will have to use string.find() to highlight correct place in Cranberry and continue to type ber in correct place.
EDIT: example how to use Entry and Listbox to display filtered list
In listbox_update I added sorting list (comparing lower case strings)
#!/usr/bin/env python3
import tkinter as tk
def on_keyrelease(event):
# get text from entry
value = event.widget.get()
value = value.strip().lower()
# get data from test_list
if value == '':
data = test_list
else:
data = []
for item in test_list:
if value in item.lower():
data.append(item)
# update data in listbox
listbox_update(data)
def listbox_update(data):
# delete previous data
listbox.delete(0, 'end')
# sorting data
data = sorted(data, key=str.lower)
# put new data
for item in data:
listbox.insert('end', item)
def on_select(event):
# display element selected on list
print('(event) previous:', event.widget.get('active'))
print('(event) current:', event.widget.get(event.widget.curselection()))
print('---')
# --- main ---
test_list = ('apple', 'banana', 'Cranberry', 'dogwood', 'alpha', 'Acorn', 'Anise', 'Strawberry' )
root = tk.Tk()
entry = tk.Entry(root)
entry.pack()
entry.bind('<KeyRelease>', on_keyrelease)
listbox = tk.Listbox(root)
listbox.pack()
#listbox.bind('<Double-Button-1>', on_select)
listbox.bind('<<ListboxSelect>>', on_select)
listbox_update(test_list)
root.mainloop()
At start with full list
Later only with filtered items
EDIT: 2020.07.21
If you want to use <KeyPress> then you have to change on_keyrelease and use event.char, event.keysym and/or event.keycode because KeyPress is executed before tkinter update text in Entry and you have to add event.char to text in Entry (or remove last char when you press backspace)
if event.keysym == 'BackSpace':
value = event.widget.get()[:-1] # remove last char
else:
value = event.widget.get() + event.char # add new char at the end
It may need other changes for other special keys Ctrl+A, Ctrl+X, Ctrl+C, Ctrl+E, etc. and it makes big problem.
#!/usr/bin/env python3
import tkinter as tk
def on_keypress(event):
print(event)
print(event.state & 4) # Control
print(event.keysym == 'a')
# get text from entry
if event.keysym == 'BackSpace':
# remove last char
value = event.widget.get()[:-1]
elif (event.state & 4): # and (event.keysym in ('a', 'c', 'x', 'e')):
value = event.widget.get()
else:
# add new char at the end
value = event.widget.get() + event.char
#TODO: other special keys
value = value.strip().lower()
# get data from test_list
if value == '':
data = test_list
else:
data = []
for item in test_list:
if value in item.lower():
data.append(item)
# update data in listbox
listbox_update(data)
def listbox_update(data):
# delete previous data
listbox.delete(0, 'end')
# sorting data
data = sorted(data, key=str.lower)
# put new data
for item in data:
listbox.insert('end', item)
def on_select(event):
# display element selected on list
print('(event) previous:', event.widget.get('active'))
print('(event) current:', event.widget.get(event.widget.curselection()))
print('---')
# --- main ---
test_list = ('apple', 'banana', 'Cranberry', 'dogwood', 'alpha', 'Acorn', 'Anise', 'Strawberry' )
root = tk.Tk()
entry = tk.Entry(root)
entry.pack()
entry.bind('<KeyPress>', on_keypress)
listbox = tk.Listbox(root)
listbox.pack()
#listbox.bind('<Double-Button-1>', on_select)
listbox.bind('<<ListboxSelect>>', on_select)
listbox_update(test_list)
root.mainloop()
BTW:
You can also use textvariable in Entry with StringVar and trace which executes function when StringVar changes content.
var_text = tk.StringVar()
var_text.trace('w', on_change)
entry = tk.Entry(root, textvariable=var_text)
entry.pack()
#!/usr/bin/env python3
import tkinter as tk
def on_change(*args):
#print(args)
value = var_text.get()
value = value.strip().lower()
# get data from test_list
if value == '':
data = test_list
else:
data = []
for item in test_list:
if value in item.lower():
data.append(item)
# update data in listbox
listbox_update(data)
def listbox_update(data):
# delete previous data
listbox.delete(0, 'end')
# sorting data
data = sorted(data, key=str.lower)
# put new data
for item in data:
listbox.insert('end', item)
def on_select(event):
# display element selected on list
print('(event) previous:', event.widget.get('active'))
print('(event) current:', event.widget.get(event.widget.curselection()))
print('---')
# --- main ---
test_list = ('apple', 'banana', 'Cranberry', 'dogwood', 'alpha', 'Acorn', 'Anise', 'Strawberry' )
root = tk.Tk()
var_text = tk.StringVar()
var_text.trace('w', on_change)
entry = tk.Entry(root, textvariable=var_text)
entry.pack()
listbox = tk.Listbox(root)
listbox.pack()
#listbox.bind('<Double-Button-1>', on_select)
listbox.bind('<<ListboxSelect>>', on_select)
listbox_update(test_list)
root.mainloop()
I am working in Python 3.6 with NLTK 3.2.
I am trying to write a program which takes raw text as input and outputs any (maximum) series of consecutive words beginning with the same letter (i.e. alliterative sequences).
When searching for sequences, I want to ignore certain words and punctuation (for instance, 'it', 'that', 'into', ''s', ',', and '.'), but to include them in the output.
For example, inputting
"The door was ajar. So it seems that Sam snuck into Sally's subaru."
should yield
["so", "it", "seems", "that", "sam", "snuck", "into", "sally's", "subaru"]
I am new to programming and the best I could come up with is:
import nltk
from nltk import word_tokenize
raw = "The door was ajar. So it seems that Sam snuck into Sally's subaru."
tokened_text = word_tokenize(raw) #word tokenize the raw text with NLTK's word_tokenize() function
tokened_text = [w.lower() for w in tokened_text] #make it lowercase
for w in tokened_text: #for each word of the text
letter = w[0] #consider its first letter
allit_str = []
allit_str.append(w) #add that word to a list
pos = tokened_text.index(w) #let "pos" be the position of the word being considered
for i in range(1,len(tokened_text)-pos): #consider the next word
if tokened_text[pos+i] in {"the","a","an","that","in","on","into","it",".",",","'s"}: #if it's one of these
allit_str.append(tokened_text[pos+i]) #add it to the list
i=+1 #and move on to the next word
elif tokened_text[pos+i][0] == letter: #or else, if the first letter is the same
allit_str.append(tokened_text[pos+i]) #add the word to the list
i=+1 #and move on to the next word
else: #or else, if the letter is different
break #break the for loop
if len(allit_str)>=2: #if the list has two or more members
print(allit_str) #print it
which outputs
['ajar', '.']
['so', 'it', 'seems', 'that', 'sam', 'snuck', 'into', 'sally', "'s", 'subaru', '.']
['seems', 'that', 'sam', 'snuck', 'into', 'sally', "'s", 'subaru', '.']
['sam', 'snuck', 'into', 'sally', "'s", 'subaru', '.']
['snuck', 'into', 'sally', "'s", 'subaru', '.']
['sally', "'s", 'subaru', '.']
['subaru', '.']
This is close to what I want, except that I don't know how to restrict the program to only print the maximum sequences.
So my questions are:
How can I modify this code to only print the maximum sequence
['so', 'it', 'seems', 'that', 'sam', 'snuck', 'into', 'sally', "'s", 'subaru', '.']?
Is there an easier way to do this in Python, maybe with regular expression or more elegant code?
Here are similar questions asked elsewhere, but which have not helped me modify my code:
How do you effectively use regular expressions to find alliterative expressions?
A reddit challenge asking for a similar program
4chan question regarding counting instances of alliteration
Blog about finding most common alliterative strings in a corpus
(I also think it would be nice to have this question answered on this site.)
Interesting task. Personally, I'd loop through without the use of indices, keeping track of the previous word to compare it with the current word.
Additionally, it's not enough to compare letters; you have to take into account that 's' and 'sh' etc don't alliterate. Here's my attempt:
import nltk
from nltk import word_tokenize
from nltk import sent_tokenize
from nltk.corpus import stopwords
import string
from collections import defaultdict, OrderedDict
import operator
raw = "The door was ajar. So it seems that Sam snuck into Sally's subaru. She seems shy sometimes. Someone save Simon."
# Get the English alphabet as a list of letters
letters = [letter for letter in string.ascii_lowercase]
# Here we add some extra phonemes that are distinguishable in text.
# ('sailboat' and 'shark' don't alliterate, for instance)
# Digraphs go first as we need to try matching these before the individual letters,
# and break out if found.
sounds = ["ch", "ph", "sh", "th"] + letters
# Use NLTK's built in stopwords and add "'s" to them
stopwords = stopwords.words('english') + ["'s"] # add extra stopwords here
stopwords = set(stopwords) # sets are MUCH faster to process
sents = sent_tokenize(raw)
alliterating_sents = defaultdict(list)
for sent in sents:
tokenized_sent = word_tokenize(sent)
# Create list of alliterating word sequences
alliterating_words = []
previous_initial_sound = ""
for word in tokenized_sent:
for sound in sounds:
if word.lower().startswith(sound): # only lowercasing when comparing retains original case
initial_sound = sound
if initial_sound == previous_initial_sound:
if len(alliterating_words) > 0:
if previous_word == alliterating_words[-1]: # prevents duplication in chains of more than 2 alliterations, but assumes repetition is not alliteration)
alliterating_words.append(word)
else:
alliterating_words.append(previous_word)
alliterating_words.append(word)
else:
alliterating_words.append(previous_word)
alliterating_words.append(word)
break # Allows us to treat sh/s distinctly
# This needs to be at the end of the loop
# It sets us up for the next iteration
if word not in stopwords: # ignores stopwords for the purpose of determining alliteration
previous_initial_sound = initial_sound
previous_word = word
alliterating_sents[len(alliterating_words)].append(sent)
sorted_alliterating_sents = OrderedDict(sorted(alliterating_sents.items(), key=operator.itemgetter(0), reverse=True))
# OUTPUT
print ("A sorted ordered dict of sentences by number of alliterations:")
print (sorted_alliterating_sents)
print ("-" * 15)
max_key = max([k for k in sorted_alliterating_sents]) # to get sent with max alliteration
print ("Sentence(s) with most alliteration:", sorted_alliterating_sents[max_key])
This produces a sorted ordered dictionary of sentences with their alliteration counts as its keys. The max_key variable contains the count for the highest alliterating sentence or sentences, and can be used to access the sentences themselves.
The accepted answer is very comprehensive, but I would suggest using Carnegie Mellon's pronouncing dictionary. This is partly because it makes life easier, and partly because identical sounding syllables that are not necessarily identical letter-to-letter are also considered alliterations. An example I found online (https://examples.yourdictionary.com/alliteration-examples.html) is "Finn fell for Phoebe".
# nltk.download('cmudict') ## download CMUdict for phoneme set
# The phoneme dictionary consists of ARPABET which encode
# vowels, consonants, and a representitive stress-level (wiki/ARPABET)
phoneme_dictionary = nltk.corpus.cmudict.dict()
stress_symbols = ['0', '1', '2', '3...', '-', '!', '+', '/',
'#', ':', ':1', '.', ':2', '?', ':3']
# nltk.download('stopwords') ## download stopwords (the, a, of, ...)
# Get stopwords that will be discarded in comparison
stopwords = nltk.corpus.stopwords.words("english")
# Function for removing all punctuation marks (. , ! * etc.)
no_punct = lambda x: re.sub(r'[^\w\s]', '', x)
def get_phonemes(word):
if word in phoneme_dictionary:
return phoneme_dictionary[word][0] # return first entry by convention
else:
return ["NONE"] # no entries found for input word
def get_alliteration_level(text): # alliteration based on sound, not only letter!
count, total_words = 0, 0
proximity = 2 # max phonemes to compare to for consideration of alliteration
i = 0 # index for placing phonemes into current_phonemes
lines = text.split(sep="\n")
for line in lines:
current_phonemes = [None] * proximity
for word in line.split(sep=" "):
word = no_punct(word) # remove punctuation marks for correct identification
total_words += 1
if word not in stopwords:
if (get_phonemes(word)[0] in current_phonemes): # alliteration occurred
count += 1
current_phonemes[i] = get_phonemes(word)[0] # update new comparison phoneme
i = 0 if i == 1 else 1 # update storage index
alliteration_score = count / total_words
return alliteration_score
Above is the proposed script. The variable proximity is introduced so that we consider syllables in alliteration, that are otherwise separated by multiple words. The stress_symbols variables reflect stress levels indicated on the CMU dictionary, and it could be easily incorporated in to the function.
I have some data with name and ethnicity
j-bte letourneau scotish
jane mc-earthar french
amabil bonneau english
I then normalize the name as such by replacing the space with "#" and add trailing "?" to standardize the total length of the name entries. I would like to use sequential three-letter substring as my feature to predict ethnicity.
name_filled substr1 substr2 substr3 \
0 j-bte#letourneau??????????????????????????? j-b -bt bte
1 jane#mc-earthar???????????????????????????? jan ane ne#
2 amabil#bonneau????????????????????????????? ama mab abi
Here is my code for data manipulation to this point:
import pandas as pd
from pandas import DataFrame
import re
# Get csv file into data frame
data = pd.read_csv("C:\Users\KubiK\Desktop\OddNames_sampleData.csv")
frame = DataFrame(data)
frame.columns = ["name", "ethnicity"]
name = frame.name
ethnicity = frame.ethnicity
# Remove missing ethnicity data cases
index_missEthnic = frame.ethnicity.isnull()
index_missName = frame.name.isnull()
frame2 = frame.loc[~index_missEthnic, :]
frame3 = frame2.loc[~index_missName, :]
# Make all letters into lowercase
frame3.loc[:, "name"] = frame3["name"].str.lower()
frame3.loc[:, "ethnicity"] = frame3["ethnicity"].str.lower()
# Remove all non-alphabetical characters in Name
frame3.loc[:, "name"] = frame3["name"].str.replace(r'[^a-zA-Z\s\-]', '') # Retain space and hyphen
# Replace empty space as "#"
frame3.loc[:, "name"] = frame3["name"].str.replace('[\s]', '#')
# Find the longest name in the dataset
##frame3["name_length"] = frame3["name"].str.len()
##nameLength = frame3.name_length
##print nameLength.max() # Longest name has !!!40 characters!!! including spaces and hyphens
# Add "?" to fill spaces up to 43 characters
frame3["name_filled"] = frame3["name"].str.pad(side="right", width=43, fillchar="?")
# Split into three-character strings
for i in range(1, 41):
substr = "substr" + str(i)
frame3[substr] = frame3["name_filled"].str[i-1:i+2]
My question is, would it be a problem to store my 3-character substring this way to run the machine learning algorithm? This could be a problem as the example below.
Imagine two Chinese people both with the last name Chan, but one is called "Li Chan" and the other is called "Joseph Chan".
The Chan will be split into "cha" and "han", but for the first case, the "cha" will be stored in the str4 while the other will be stored in the str8 because the first name pushes it to be stored much later. I wonder if I could and should store the 3-character substrings into just one single variable as a list (for example: ["j-b", "-bt", "bte"] for substr variable for case 0), and if the substrings are stored into one single variable, can it be run with machine learning algorithms to predict ethnicity?
def nextItem(self):
active = self.skill_list_listbox.get(tk.ACTIVE)
listbox_contents = self.skill_list_listbox.get(0, tk.END)
current_pos = listbox_contents.index(active)
if current_pos + 1 < len(listbox_contents):
new_pos = current_pos + 1
self.skill_list_listbox.activate(new_pos)
self.skill_list_listbox.selection_set(tk.ACTIVE)
From what I can see within documentation this should highlight and activate the next item in the listbox. If I omit the selection_set I get what I'm looking for but there's no indicator of what's active. Adding it highlights an item, but if you continue to click the "next" button it simply adds to the highlight instead of just highlighting one item creating a long section of highlighted items, which I don't want. I've tried several different methods and this has got me the closest. If there was a 'clear selection' method I suppose I could get my desired effect of just having the next item selected and highlighted, but 3 calls just to do that seems a bit much for a common task? Any thoughts, or suggestions?
Below is an example of what I think you are trying to accomplish, using a button to select the next item in a Listbox. The gist of it is in the button's callback function, which calls selection_clear then selection_set.
Updated the example, hopefully a bit clearer as to what it happening
import Tkinter
class Application(Tkinter.Frame):
def __init__(self, master):
Tkinter.Frame.__init__(self, master)
self.master.minsize(width=256, height=256)
self.master.config()
self.pack()
self.main_frame = Tkinter.Frame()
self.some_list = [
'One',
'Two',
'Three',
'Four'
]
self.some_listbox = Tkinter.Listbox(self.main_frame)
self.some_listbox.pack(fill='both', expand=True)
self.main_frame.pack(fill='both', expand=True)
# insert our items into the list box
for i, item in enumerate(self.some_list):
self.some_listbox.insert(i, item)
# add a button to select the next item
self.some_button = Tkinter.Button(
self.main_frame, text="Next", command=self.next_selection)
self.some_button.pack(side='top')
# not really necessary, just make things look nice and centered
self.main_frame.place(in_=self.master, anchor='c', relx=.5, rely=.5)
def next_selection(self):
selection_indices = self.some_listbox.curselection()
# default next selection is the beginning
next_selection = 0
# make sure at least one item is selected
if len(selection_indices) > 0:
# Get the last selection, remember they are strings for some reason
# so convert to int
last_selection = int(selection_indices[-1])
# clear current selections
self.some_listbox.selection_clear(selection_indices)
# Make sure we're not at the last item
if last_selection < self.some_listbox.size() - 1:
next_selection = last_selection + 1
self.some_listbox.activate(next_selection)
self.some_listbox.selection_set(next_selection)
root = Tkinter.Tk()
app = Application(root)
app.mainloop()