Data Structures & C Programming: Need help identifying what is wrong with spellc
ID: 3726855 • Letter: D
Question
Data Structures & C Programming: Need help identifying what is wrong with spellchecker program for hashtables.
The code compiles and works correctly when a word is input that is spelled correctly.
The issue is when I try a word that is incorrectly spelled, the program only displays "did you mean ...? (null)" instead of the word with the lowest levenshtein distance.
Code and hashtable.c code is provided below!
/********* Spellchecker.c ***************/
#include "hashMap.h"
#include <assert.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/**
* Allocates a string for the next word in the file and returns it. This string
* is null terminated. Returns NULL after reaching the end of the file.
* @param file
* @return Allocated string or NULL.
*/
char* nextWord(FILE* file)
{
int maxLength = 16;
int length = 0;
char* word = malloc(sizeof(char) * maxLength);
while (1)
{
char c = fgetc(file);
if ((c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
c == ''')
{
if (length + 1 >= maxLength)
{
maxLength *= 2;
word = realloc(word, maxLength);
}
word[length] = c;
length++;
}
else if (length > 0 || c == EOF)
{
break;
}
}
if (length == 0)
{
free(word);
return NULL;
}
word[length] = '';
return word;
}
/**
* Loads the contents of the file into the hash map.
* @param file
* @param map
*/
void loadDictionary(FILE* file, HashMap* map)
{ // FIXME: implement
char* word = nextWord(file);
while (word) {
hashMapPut(map, word, 1);
free(word);
word = nextWord(file);
}
free(word);
}
/*********************
* min2 function
* ********************/
int min2(int a, int b) {
int min;
if (a < b)
min = a;
else
min = b;
return min;
}
/**********************************
* HELPER FUNCTION
* code sourced from https://en.wikipedia.org/wiki/Levenshtein_distance#Computing_Levenshtein_distance
* code has been modified to work with hashtables
* *********************************/
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
int levenshtein(char *s1, char *s2) {
unsigned int s1len, s2len, x, y, lastdiag, olddiag;
s1len = strlen(s1);
s2len = strlen(s2);
unsigned int column[s1len+1];
for (y = 1; y <= s1len; y++)
column[y] = y;
for (x = 1; x <= s2len; x++) {
column[0] = x;
for (y = 1, lastdiag = x-1; y <= s1len; y++) {
olddiag = column[y];
column[y] = MIN3(column[y] + 1, column[y-1] + 1, lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1));
lastdiag = olddiag;
}
}
return(column[s1len]);
}
/**
* Prints the concordance of the given file and performance information. Uses
* the file input1.txt by default or a file name specified as a command line
* argument.
* @param argc
* @param argv
* @return
*/
int main(int argc, const char** argv)
{ // FIXME: implement
HashMap* map = hashMapNew(1000);
FILE* file = fopen("dictionary.txt", "r");
//if file not found
if(file==NULL)
{
perror("Error");
}
clock_t timer = clock();
loadDictionary(file, map);
timer = clock() - timer;
printf("Dictionary loaded in %f seconds ", (float)timer / (float)CLOCKS_PER_SEC);
fclose(file);
char inputBuffer[256];
int quit = 0;
while (!quit) //Step 4: continue to prompt user for word until they type quit
{
printf("Enter a word or "quit" to quit: ");
scanf("%s", inputBuffer);
// Implement the spell checker code here..
/*****
* Compare input Buffer to words in the dictionary, computing their Levenshtein distance.
* Store that distance as the value for each key in the table
* *********************/
for (int i = 0; i < map->capacity; i++)
{
HashLink* link = map->table[i];
if (link != NULL)
{
while (link != NULL)
{
char* word = link->key;
int minVal = levenshtein(word, inputBuffer);
hashMapPut(map, word, minVal);
link = link->next;
}
}
}
/*****
* Traverse down hash table, checking each bucket.
* Jump out if you find an exact matching dictionary word
* print a message that "word spelled correctly"
* **********************/
if (hashMapContainsKey(map, inputBuffer))
{
printf("That word is spelled correctly! ");
}
/************
* NEED HELP WITH THIS CODE!!!
* If the input Buffer did not match any of the dictionary words exactly,
* generate an array of 5 words that are the closest matches to input Buffer based
* on the LOWEST Levenshtein distance.
* print the array INCLUDING the message "did you mean ...?"
* ********************/
else
{
const char* array[6];
for(int idx = 0; idx < 6; idx++){
int i = 0;
HashLink* link = map->table[i];
HashLink* link2 = map->table[i+1];
while (link != NULL && link2 != NULL) {
int val1 = *(hashMapGet(map, link->key));
int val2 =*(hashMapGet(map, link2->key));
int minim = min2(val1, val2);
if(minim == val1) {
array[idx] = link->key;
}
link = link->next;
link2 = link->next;
}
}
/* Print the new map */
for (int i = 0; i < 6; i++)
{
printf("Did you mean ...? %s ", array[i]);
}
}
if (strcmp(inputBuffer, "quit") == 0)
{
quit = 1;
}
}
hashMapDelete(map);
return 0;
}
/***************** HashMap.c *****************/
#include "hashMap.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
int hashFunction1(const char* key)
{
int r = 0;
for (int i = 0; key[i] != ''; i++)
{
r += key[i];
}
return r;
}
int hashFunction2(const char* key)
{
int r = 0;
for (int i = 0; key[i] != ''; i++)
{
r += (i + 1) * key[i];
}
return r;
}
/**
* Creates a new hash table link with a copy of the key string.
* @param key Key string to copy in the link.
* @param value Value to set in the link.
* @param next Pointer to set as the link's next.
* @return Hash table link allocated on the heap.
*/
HashLink* hashLinkNew(const char* key, int value, HashLink* next)
{
HashLink* link = malloc(sizeof(HashLink));
link->key = malloc(sizeof(char) * (strlen(key) + 1));
strcpy(link->key, key);
link->value = value;
link->next = next;
return link;
}
/**
* Free the allocated memory for a hash table link created with hashLinkNew.
* @param link
*/
static void hashLinkDelete(HashLink* link)
{
free(link->key);
free(link);
}
/**
* Initializes a hash table map, allocating memory for a link pointer table with
* the given number of buckets.
* @param map
* @param capacity The number of table buckets.
*/
void hashMapInit(HashMap* map, int capacity)
{
map->capacity = capacity;
map->size = 0;
map->table = malloc(sizeof(HashLink*) * capacity);
for (int i = 0; i < capacity; i++)
{
map->table[i] = NULL;
}
}
/**
* Removes all links in the map and frees all allocated memory. You can use
* hashLinkDelete to free the links.
* @param map
*/
void hashMapCleanUp(HashMap* map)
{
// FIXME: implement
for (int i = 0; i < map->capacity; i++) {
HashLink* link = map->table[i];
map->table[i] = NULL;
while (link != NULL)
{
HashLink* temp = link;
link = link->next;
hashLinkDelete(temp);
}
free(map->table[i]);
}
free(map->table);
}
/**
* Creates a hash table map, allocating memory for a link pointer table with
* the given number of buckets.
* @param capacity The number of buckets.
* @return The allocated map.
*/
HashMap* hashMapNew(int capacity)
{
HashMap* map = malloc(sizeof(HashMap));
hashMapInit(map, capacity);
return map;
}
/**
* Removes all links in the map and frees all allocated memory, including the
* map itself.
* @param map
*/
void hashMapDelete(HashMap* map)
{
hashMapCleanUp(map);
free(map);
}
/**
* Returns a pointer to the value of the link with the given key. Returns NULL
* if no link with that key is in the table.
*
* Use HASH_FUNCTION(key) and the map's capacity to find the index of the
* correct linked list bucket. Also make sure to search the entire list.
*
* @param map
* @param key
* @return Link value or NULL if no matching link.
*/
int* hashMapGet(HashMap* map, const char* key)
{
// FIXME: implement
int idx = HASH_FUNCTION(key) % (map->capacity);
HashLink* link = map->table[idx];
while (link != NULL)
{
if (strcmp(link->key, key) == 0)
{
return &(link->value);
}
link = link->next;
}
return NULL;
}
/**
* Resizes the hash table to have a number of buckets equal to the given
* capacity. After allocating the new table, all of the links need to be
* rehashed into it because the capacity has changed.
*
* Remember to free the old table and any old links if you use hashMapPut to
* rehash them.
*
* @param map
* @param capacity The new number of buckets.
*/
void resizeTable(HashMap* map, int capacity)
{ // FIXME: implement
// Save old Map information
int oldCap = map->capacity;
HashLink ** oldTable = map->table;
// Initialize new map data to replace old map data
hashMapInit(map, capacity);
// Rehash old map key/value pairs to new map
for (int i = 0; i < oldCap; i++)
{
HashLink * link = oldTable[i];
while (link != NULL)
{
hashMapPut(map, link->key, link->value);
link = link->next;
}
}
// Free allocated data in old table
for (int i = 0; i < oldCap; i++)
{
HashLink* link = oldTable[i];
oldTable[i] = NULL;
while (link != NULL)
{
HashLink* temp = link;
link = link->next;
hashLinkDelete(temp);
}
free(oldTable[i]);
}
free(oldTable);
}
/**
* Updates the given key-value pair in the hash table. If a link with the given
* key already exists, this will just update the value. Otherwise, it will
* create a new link with the given key and value and add it to the table
* bucket's linked list. You can use hashLinkNew to create the link.
*
* Use HASH_FUNCTION(key) and the map's capacity to find the index of the
* correct linked list bucket. Also make sure to search the entire list.
*
* @param map
* @param key
* @param value
*/
void hashMapPut(HashMap* map, const char* key, int value)
{ // FIXME: implement
// Resize table if exceeds MAX_TABLE_LOAD
if (hashMapTableLoad(map) >= MAX_TABLE_LOAD)
{
resizeTable(map, 2 * map->capacity);
}
// Compute index
int idx = HASH_FUNCTION(key) % (map->capacity);
if (idx < 0)
{
idx += map->capacity;
}
// Add to bucket
HashLink* link = map->table[idx];
HashLink* newLink = NULL;
if (link == NULL) {
// Bucket is currently empty
newLink= hashLinkNew(key, value, NULL);
map->table[idx] = newLink;
map->size++;
return;
}
else
{ //bucket contains @ least 1 link
while (link != NULL)
{
if (strcmp(link->key, key) == 0)
{ //link w/ key already exists
link->value = value;
return;
}
link = link->next;
}
// Link with given key does not already exist, create new Link
newLink = hashLinkNew(key, value, map->table[idx]);
map->table[idx] = newLink;
map->size++;
return;
}
}
/**
* Removes and frees the link with the given key from the table. If no such link
* exists, this does nothing. Remember to search the entire linked list at the
* bucket. You can use hashLinkDelete to free the link.
* @param map
* @param key
*/
void hashMapRemove(HashMap* map, const char* key)
{
// FIXME: implement
if (!hashMapContainsKey(map, key)) {
printf("Not in map ");
return;
}
int idx = HASH_FUNCTION(key) % (map->capacity);
HashLink* cur = map->table[idx];
HashLink* last = map->table[idx];
if (cur == NULL) {
printf("No links in bucket to remove ");
}
while (cur != NULL)
{
if (strcmp(cur->key, key) == 0)
{ //printf("Found key/link to remove ");
if (cur == map->table[idx])
{
//printf("Link to remove is first link ");
map->table[idx] = cur->next;
hashLinkDelete(cur);
map->size--;
cur = 0;
}
else
{
last->next = cur->next;
hashLinkDelete(cur);
map->size--;
cur = 0;
}
}
else
{
last = cur;
cur = cur->next;
}
}
}
/**
* Returns 1 if a link with the given key is in the table and 0 otherwise.
*
* Use HASH_FUNCTION(key) and the map's capacity to find the index of the
* correct linked list bucket. Also make sure to search the entire list.
*
* @param map
* @param key
* @return 1 if the key is found, 0 otherwise.
*/
int hashMapContainsKey(HashMap* map, const char* key)
{
// FIXME: implement
int idx = HASH_FUNCTION(key) % (map->capacity);
HashLink* link = map->table[idx];
while (link != NULL)
{
if (strcmp(link->key, key) == 0)
{
return 1;
}
link = link->next;
}
return 0;
}
/**
* Returns the number of links in the table.
* @param map
* @return Number of links in the table.
*/
int hashMapSize(HashMap* map)
{ // FIXME: implement
return map->size;
}
/**
* Returns the number of buckets in the table.
* @param map
* @return Number of buckets in the table.
*/
int hashMapCapacity(HashMap* map)
{ // FIXME: implement
return map->capacity;
}
/**
* Returns the number of table buckets without any links.
* @param map
* @return Number of empty buckets.
*/
int hashMapEmptyBuckets(HashMap* map)
{ // FIXME: implement
int emptyBuckets = 0;
for (int i = 0; i < map->capacity; i++)
{
HashLink* link = map->table[i];
if (link == NULL)
{
emptyBuckets++;
}
}
return emptyBuckets;
}
/**
* Returns the ratio of (number of links) / (number of buckets) in the table.
* Remember that the buckets are linked lists, so this ratio tells you nothing
* about the number of empty buckets. Remember also that the load is a floating
* point number, so don't do integer division.
* @param map
* @return Table load.
*/
float hashMapTableLoad(HashMap* map)
{ // FIXME: implement
float links = (float)map->size;
float buckets = (float)map->capacity;
return (links/buckets);
}
/**
* Prints all the links in each of the buckets in the table.
* @param map
*/
void hashMapPrint(HashMap* map)
{
for (int i = 0; i < map->capacity; i++)
{
HashLink* link = map->table[i];
if (link != NULL)
{
printf(" Bucket %i ->", i);
while (link != NULL)
{
printf(" (%s, %d) ->", link->key, link->value);
link = link->next;
}
}
}
printf(" ");
}
Explanation / Answer
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"
// Number of linked lists in hash table.
#define HASH_SIZE 2000
// Each element in linked list is a node. Node contains
// a word and a pointer to the next node.
typedef struct node
{
char word[LENGTH + 1];
struct node* next;
}
node;
// Hash table is an array of linked lists.
node* hashtable[HASH_SIZE];
int hash_function(const char* word);
// Tracks number of words in dictionary.
int number_of_words = 0;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
int word_length = strlen(word);
char lower_word[LENGTH+1];
// Convert word to lowercase to accurately compare to hash table.
for (int i = 0; i < word_length; i++)
{
// If character is uppercase, make it lowercase.
if(isupper(word[i]))
{
lower_word[i] = tolower(word[i]) ;
}
// Otherwise it's already lowercase or it's not a letter.
else
{
lower_word[i] = word[i];
}
}
// Add null character to end of char array.
lower_word[word_length] = '';
// Use hash function to find correct "bucket" to place word.
int bucket = hash_function(lower_word);
// Set cursor to first node in bucket.
node* cursor = hashtable[bucket];
// Until the end of the linked list is reached (cursor == NULL),
// compare each word stored in each node to lower_word. If they're
// the same, then the word is in the dictionary and is not mispelled.
// Otherwise, it is spelled incorrectly.
while (cursor != NULL)
{
if (strcmp(lower_word, cursor->word) == 0)
{
// If lowercase'd word is the same as another in the bucket,
// it's a match and return true.
return true;
}
cursor = cursor->next;
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
// Initialize each value in hash table to NULL.
for(int i = 0; i < HASH_SIZE; i++)
{
hashtable[i] = NULL;
}
// Open the dictionary text file.
FILE* the_dictionary;
the_dictionary = fopen(dictionary, "r");
// Check if dictionary opened correctly.
if (the_dictionary == NULL)
{
printf("Failed to load dictionary");
return false;
}
char buffer[LENGTH+1];
// Loop through file until end of file is reached.
// Save each word in buffer.
while (fscanf(the_dictionary, "%s", buffer) > 0)
{
// Allocate memory for a new node.
node* new_node = malloc(sizeof(node));
// Set node's next pointer to NULL.
new_node->next = NULL;
// Set node's word to value stored in buffer.
strcpy(new_node->word, buffer);
// Run word through hash function to set bucket in hash table.
int bucket = hash_function(new_node->word);
// If it's the first node being added to the bucket, replace
// the NULL value with the new node.
if (hashtable[bucket] == NULL)
{
hashtable[bucket] = new_node;
}
// Otherwise set new node's pointer to the first node in the linked list.
// Then set new node to be the first node in the linked list.
else
{
new_node->next = hashtable[bucket];
hashtable[bucket] = new_node;
}
// Track number of words in dictionary.
number_of_words++;
}
// Done with text file. Time to close it.
fclose(the_dictionary);
// Everything seems to have gone well, return true.
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return number_of_words;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// Iterate over all linked lists in hash table. Set
// cursor to point at each one's location in memory.
for (int i = 0; i < HASH_SIZE;i++)
{
node* cursor = hashtable[i];
while (cursor != NULL)
{
// Set temporary pointer to point at cursor's
// location in memory. Move cursor to the next node
// so we don't lose track of it before freeing
// the current node's (temp's) memory.
node* temp = cursor;
cursor = cursor->next;
free(temp);
}
}
return true;
}
// Maps a word to an integer value to place it in the hash table.
// Sum the value of each character in the word, then find the
// remainder after dividing by the size of the hash table.
int hash_function(const char* word)
{
int sum = 0;
int word_length = strlen(word);
for (int i = 0; i < word_length; i++)
{
sum += word[i];
}
int bucket = sum % HASH_SIZE;
return bucket;
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.