import java.io.*; import java.util.StringTokenizer; public class SpellChecker { HashTable dictionary; public static void main(String argv[]) { SpellChecker checker = new SpellChecker(); } public SpellChecker() { // create a hash table. The number of buckets (initial size) should // be prime (http://www.concentric.net/~Ttwang/tech/hashsize.htm) dictionary = new HashTable(53); try { String s, token; // 2.a. Read dictionary from file BufferedReader dictReader = new BufferedReader(new FileReader("dictionary.dat")); while ((s = dictReader.readLine())!= null) { StringTokenizer st = new StringTokenizer(s); while (st.hasMoreTokens()) { token = st.nextToken(); // key and value are identical dictionary.put(token, token); } } dictReader.close(); // 2.b. Read file with commonly misspelled words. BufferedReader misspellings = new BufferedReader(new FileReader("dictionary2.dat")); String wrongSpelling, rightSpelling; while ((s = misspellings.readLine()) != null) { StringTokenizer st = new StringTokenizer(s); // should really check that both of these exist wrongSpelling = st.nextToken(); rightSpelling = st.nextToken(); // key is the wrong spelling, value is the correct spelling dictionary.put(wrongSpelling, rightSpelling); } misspellings.close(); // 2.c-f. Read in the text file to be checked & output corrected file. BufferedReader inputFile = new BufferedReader(new FileReader("testfile.dat")); BufferedWriter outputFile = new BufferedWriter(new FileWriter("checked.dat")); // Preserve original line breaks by reading in one line at a time. Note, however, that // we do not preserve other whitespace, nor do we handle punctuation. while ((s = inputFile.readLine()) != null) { StringTokenizer st = new StringTokenizer(s); while (st.hasMoreTokens()) { String inputWord = st.nextToken(); String outputWord = spellCheckWord(inputWord); outputFile.write(outputWord+" "); } outputFile.newLine(); } inputFile.close(); outputFile.close(); } catch (IOException e) { System.out.println("Error -- " + e.toString()); e.printStackTrace(); System.exit(-1); } } public String spellCheckWord(String wordToCheck) { String lookup, uninflectedWord; String word = wordToCheck.toLowerCase(); // if spelt correctly, output as is // if it is a common mispelling, output the corrected word if ((lookup = (String)dictionary.get(word)) != null) return lookup; // Remove inflections at end of word and try again ("es", "s", "ing", "ed") int length = word.length(); // first check for final 's'. if (length > 1 && word.substring(length - 1).equals("s")) { uninflectedWord = word.substring(0, length-1); if ((lookup = (String)dictionary.get(uninflectedWord)) != null) return lookup + "s"; // don't fail yet. fall through to 'es' check } if (length > 2 && word.substring(length-2).equals("es")) { uninflectedWord = word.substring(0, length-2); if ((lookup = (String)dictionary.get(uninflectedWord)) != null) return lookup + "es"; else // not found return word.toUpperCase(); } if (length > 3 && word.substring(length - 3).equals("ing")) { uninflectedWord = word.substring(0, length-3); if ((lookup = (String)dictionary.get(uninflectedWord)) != null) return lookup + "ing"; else // not found return word.toUpperCase(); } if (length > 2 && word.substring(length - 2).equals("ed")) { uninflectedWord = word.substring(0, length-2); if ((lookup = (String)dictionary.get(uninflectedWord)) != null) return lookup + "ed"; else // not found return word.toUpperCase(); } // word was not found, even after "uninflecting". Assume it is misspelt and return // it in ALL CAPS. return word.toUpperCase(); } }