(!!!Noble Peace Prize nomination here please!!!)
More importantly, I wanted to spell out each of the steps to make it easier to understand the concept. I also wanted to use native libraries so that you can dive into the spelling corrector concepts quickly without first having to learn other technologies like LINQ etc.
Please feel free to post any comments, suggestions for improvement and any bugs you find.
Copy the text below into a VB class file and get the BIG.txt file (http://norvig.com/big.txt).
' VB .NET Implementation of Peter Norvig's Spelling Corrector.
' VERSION 1.0, last updated 03 Mar 10.
' Peter Norvig's original article located at
' Copyright (c) 2010 Shantanu Inamdar
' Permission is hereby granted, free of charge, to any person obtaining a copy
' of this software and associated documentation files (the "Software"), to deal
' in the Software without restriction, including without limitation the rights
' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
' copies of the Software, and to permit persons to whom the Software is
' furnished to do so, subject to the following conditions:
' The above copyright notice and this permission notice shall be included in
' all copies or substantial portions of the Software.
' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
' EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
' MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
' IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
' ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
' CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
' WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
' Some Background: The idea behind this implementation was not build the shortest
' or the fastest version of the spelling corrector.
' I looked at the list of languages that this was implemented in and found VB .NET
' missing. So I decided to fill that void. I also thought of it as a way to unite
' the VB .NET programmers with the others.
' (!!!Noble Peace Prize nomination here please!!!)
' More importantly, I wanted to spell out each of the steps to make it easier to
' understand the concept. I also wanted to use native libraries
' so that you can dive into the spelling corrector concepts quickly
' without first having to learn other technologies like LINQ etc.
' Please feel free to post any comments, suggestions for improvement and any bugs
' you find.
Public Class SimpleSpellingCorrector
Private Const COMMAASCII As Integer = 44
Private Const ALPHABET As String = "abcdefghijklmnopqrstuvwxyz"
Private nWords As New Dictionary(Of String, Integer)
Public Function getCorrection(ByVal word As String) As String
Dim c As String = String.Empty
Dim maxc As Integer = -1
Dim wc As Integer = 0
Dim candidates As System.Collections.Generic.List(Of String)
'Train the model with word occurences in our "dictionary"
nWords = getModel()
'Choose the most probable word with the shortest edit distance
For ed As Integer = 0 To 2
'If we have found a correction, exit loop
If String.Empty <> c Then
'Otherwise, start over
c = String.Empty
wc = 0
maxc = -1
candidates = getCandidates(word, ed)
For Each cd As String In candidates
wc = getWordCount(cd)
If wc > maxc Then
maxc = wc
c = cd
'If no match is found, just send the same word back!
If String.Empty = c Then
c = word
'Get the count of how often the word is found in our "dictionary"
'Return 1 for a "new" word
Private Function getWordCount(ByVal word As String) As Integer
If nWords.ContainsKey(word) Then
'Get the big.txt file from http://norvig.com/big.txt
Private Function getModel() As Dictionary(Of String, Integer)
Dim model As New Dictionary(Of String, Integer)
For Each f As System.Text.RegularExpressions.Match In System.Text.RegularExpressions.Regex.Matches(System.IO.File.ReadAllText("big.txt").ToLower(), "[a-z]+", System.Text.RegularExpressions.RegexOptions.Compiled)
If model.ContainsKey(f.Value) Then
model.Item(f.Value) += 1
'Get Candidate words that are at the given edit distance
Private Function getCandidates(ByVal word As String, ByVal edits As Integer) As List(Of String)
Dim c As New List(Of String)
Select Case edits
Dim wl As New List(Of String)
'Get words that are at an edit distance of 1
Private Function getEdits1(ByVal word As String) As List(Of String)
Dim e1 As New List(Of String)
Dim splits As New List(Of String)
'Create a list of comma separated tuples of all possible ways to split the word
For i As Integer = 0 To word.Length
splits.Add(word.Substring(0, i) & "," & word.Substring(i))
For Each s As String In splits
If String.Empty <> s.Split(Chr(COMMAASCII))(1) Then
e1.Add(s.Split(Chr(COMMAASCII))(0) & s.Split(Chr(COMMAASCII))(1).Substring(1))
If 1 <> s.Split(Chr(COMMAASCII))(1) Then
e1.Add(s.Split(Chr(COMMAASCII))(0) & c & s.Split(Chr(COMMAASCII))(1).Substring(1))
For Each c As Char In ALPHABET.ToCharArray
e1.Add(s.Split(Chr(COMMAASCII))(0) & c & s.Split(Chr(COMMAASCII))(1))
'Get Known words that have edit distance of 2
Private Function getKnownEdits2(ByVal word As String) As List(Of String)
Dim ke2 As New List(Of String)
For Each e1 As String In getEdits1(word)
For Each e2 As String In getEdits1(e1)
If nWords.ContainsKey(e2) Then
'Get Known words; get rid of unknown words
Private Function getKnown(ByRef words As List(Of String)) As List(Of String)
Dim k As New List(Of String)
For Each w As String In words
If nWords.ContainsKey(w) Then