Computer Science
15-110, Spring 2011
Class Notes: File IO Practice
This code requires that you download words.txt and place it on your Desktop.
# File IO Practice
#########################################################
# File IO code from course website
#########################################################
import os
def getDesktopPath(filename = ""):
# next line is odd, but works in Windows/Mac/Linux
homepath = os.getenv('USERPROFILE') or os.getenv('HOME')
return homepath + os.sep + "Desktop" + os.sep + filename
def fileExists(filename):
return os.path.exists(filename)
def deleteFile(filename):
if (fileExists(filename) == True):
os.remove(filename)
def readTextFile(filename):
if (fileExists(filename) == False):
print "File does not exist:", filename
return None
fileHandler = open(filename, "rt")
text = fileHandler.read()
fileHandler.close()
return text
def readTextFileAsList(filename):
# readlines includes '\n' characters, so we'll use split() instead
text = readTextFile(filename)
if (text == None):
return None
return text.split("\n")
def writeTextFile(text, filename):
fileHandler = open(filename, "wt")
fileHandler.write(text)
fileHandler.close()
#########################################################
# Basic dictionary lookup
#########################################################
def testDictionary():
wordList = readTextFileAsList(getDesktopPath("words.txt"))
done = False
while (not done):
prompt = "Enter a word to lookup (or just <enter> to quit): "
word = raw_input(prompt)
lookupWord = word.lower() # dictionary is all-lowercase
if (word == ""):
done = True
elif (lookupWord in wordList):
print word, "is in the dictionary!"
else:
print word, "is NOT in the dictionary!!!"
print "bye!"
testDictionary()
#########################################################
# LevenshteinEditDistance
# From http://code.activestate.com/recipes/572156-bk-tree/
# Reqiured for spell checker (below)
#########################################################
def levenshteinEditDistance(s1, s2):
if s1 == s2: return 0
if len(s1) > len(s2):
s1,s2 = s2,s1
r1 = range(len(s2) + 1)
r2 = [0] * len(r1)
i = 0
for c1 in s1:
r2[0] = i + 1
j = 0
for c2 in s2:
if c1 == c2:
r2[j+1] = r1[j]
else:
a1 = r2[j]
a2 = r1[j]
a3 = r1[j+1]
if a1 > a2:
if a2 > a3:
r2[j+1] = 1 + a3
else:
r2[j+1] = 1 + a2
else:
if a1 > a3:
r2[j+1] = 1 + a3
else:
r2[j+1] = 1 + a1
j += 1
aux = r1; r1 = r2; r2 = aux
i += 1
return r1[-1]
import time
def spellCheck(targetWord, wordList):
time0 = time.clock()
closestList = []
closestDistance = levenshteinEditDistance(targetWord, wordList[0])
for word in wordList:
if (abs(len(word) - len(targetWord)) <= 1): # make this 2 or 3 for more matches
distance = levenshteinEditDistance(targetWord, word)
if (distance == closestDistance):
closestList += [word]
elif (distance < closestDistance):
closestDistance = distance
closestList = [word]
time1 = time.clock()
print "time: ", (time1 - time0)
return closestList
def testSpellChecker():
wordList = readTextFileAsList(getDesktopPath("words.txt"))
done = False
while (not done):
prompt = "Enter a word to lookup (or just <enter> to quit): "
word = raw_input(prompt)
lookupWord = word.lower() # dictionary is all-lowercase
if (word == ""):
done = True
elif (lookupWord in wordList):
print word, "is in the dictionary!"
else:
print word, "is NOT in the dictionary!!!"
print "nearest words:", spellCheck(lookupWord, wordList)
print "bye!"
testSpellChecker()