This program takes the name of an input file from the command line. This file co
ID: 3675650 • Letter: T
Question
This program takes the name of an input file from the command line. This file contains numbers in the range of 0 to 99.
Your program should print out the number(s) that occur the most often within this file. Run your program via the command ./a.out inputFile
For example, if the file contained the numbers below then you would identify 7 and 90 and 93 as the most occurring numbers.
1 93 7 21 42 59 90 21 90 83 1 90 7 93 63 93 2 97 7
Hint–declare an array of 100 items that holds the “count” of each integer. Each time you see a given number in the input, increment that location in your “count” array by one. After reading all the values in the file, find the maximum of the “count” values and then print each element that has that count.
Explanation / Answer
/ A program to find k most frequent words in a file
#include <stdio.h>
#include <string.h>
#include <ctype.h>
# define MAX_CHARS 26
# define MAX_WORD_SIZE 30
// A Trie node
struct TrieNode
{
bool isEnd; // indicates end of word
unsigned frequency; // the number of occurrences of a word
int indexMinHeap; // the index of the word in minHeap
TrieNode* child[MAX_CHARS]; // represents 26 slots each for 'a' to 'z'.
};
// A Min Heap node
struct MinHeapNode
{
TrieNode* root; // indicates the leaf node of TRIE
unsigned frequency; // number of occurrences
char* word; // the actual word stored
};
// A Min Heap
struct MinHeap
{
unsigned capacity; // the total size a min heap
int count; // indicates the number of slots filled.
MinHeapNode* array; // represents the collection of minHeapNodes
};
// A utility function to create a new Trie node
TrieNode* newTrieNode()
{
// Allocate memory for Trie Node
TrieNode* trieNode = new TrieNode;
// Initialize values for new node
trieNode->isEnd = 0;
trieNode->frequency = 0;
trieNode->indexMinHeap = -1;
for( int i = 0; i < MAX_CHARS; ++i )
trieNode->child[i] = NULL;
return trieNode;
}
// A utility function to create a Min Heap of given capacity
MinHeap* createMinHeap( int capacity )
{
MinHeap* minHeap = new MinHeap;
minHeap->capacity = capacity;
minHeap->count = 0;
// Allocate memory for array of min heap nodes
minHeap->array = new MinHeapNode [ minHeap->capacity ];
return minHeap;
}
// A utility function to swap two min heap nodes. This function
// is needed in minHeapify
void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )
{
MinHeapNode temp = *a;
*a = *b;
*b = temp;
}
// This is the standard minHeapify function. It does one thing extra.
// It updates the minHapIndex in Trie when two nodes are swapped in
// in min heap
void minHeapify( MinHeap* minHeap, int idx )
{
int left, right, smallest;
left = 2 * idx + 1;
right = 2 * idx + 2;
smallest = idx;
if ( left < minHeap->count &&
minHeap->array[ left ]. frequency <
minHeap->array[ smallest ]. frequency
)
smallest = left;
if ( right < minHeap->count &&
minHeap->array[ right ]. frequency <
minHeap->array[ smallest ]. frequency
)
smallest = right;
if( smallest != idx )
{
// Update the corresponding index in Trie node.
minHeap->array[ smallest ]. root->indexMinHeap = idx;
minHeap->array[ idx ]. root->indexMinHeap = smallest;
// Swap nodes in min heap
swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);
minHeapify( minHeap, smallest );
}
}
// A standard function to build a heap
void buildMinHeap( MinHeap* minHeap )
{
int n, i;
n = minHeap->count - 1;
for( i = ( n - 1 ) / 2; i >= 0; --i )
minHeapify( minHeap, i );
}
// Inserts a word to heap, the function handles the 3 cases explained above
void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )
{
// Case 1: the word is already present in minHeap
if( (*root)->indexMinHeap != -1 )
{
++( minHeap->array[ (*root)->indexMinHeap ]. frequency );
// percolate down
minHeapify( minHeap, (*root)->indexMinHeap );
}
// Case 2: Word is not present and heap is not full
else if( minHeap->count < minHeap->capacity )
{
int count = minHeap->count;
minHeap->array[ count ]. frequency = (*root)->frequency;
minHeap->array[ count ]. word = new char [strlen( word ) + 1];
strcpy( minHeap->array[ count ]. word, word );
minHeap->array[ count ]. root = *root;
(*root)->indexMinHeap = minHeap->count;
++( minHeap->count );
buildMinHeap( minHeap );
}
// Case 3: Word is not present and heap is full. And frequency of word
// is more than root. The root is the least frequent word in heap,
// replace root with new word
else if ( (*root)->frequency > minHeap->array[0]. frequency )
{
minHeap->array[ 0 ]. root->indexMinHeap = -1;
minHeap->array[ 0 ]. root = *root;
minHeap->array[ 0 ]. root->indexMinHeap = 0;
minHeap->array[ 0 ]. frequency = (*root)->frequency;
// delete previously allocated memoory and
delete [] minHeap->array[ 0 ]. word;
minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];
strcpy( minHeap->array[ 0 ]. word, word );
minHeapify ( minHeap, 0 );
}
}
// Inserts a new word to both Trie and Heap
void insertUtil ( TrieNode** root, MinHeap* minHeap,
const char* word, const char* dupWord )
{
// Base Case
if ( *root == NULL )
*root = newTrieNode();
// There are still more characters in word
if ( *word != '' )
insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),
minHeap, word + 1, dupWord );
else // The complete word is processed
{
// word is already present, increase the frequency
if ( (*root)->isEnd )
++( (*root)->frequency );
else
{
(*root)->isEnd = 1;
(*root)->frequency = 1;
}
// Insert in min heap also
insertInMinHeap( minHeap, root, dupWord );
}
}
// add a word to Trie & min heap. A wrapper over the insertUtil
void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)
{
insertUtil( root, minHeap, word, word );
}
// A utility function to show results, The min heap
// contains k most frequent words so far, at any time
void displayMinHeap( MinHeap* minHeap )
{
int i;
// print top K word with frequency
for( i = 0; i < minHeap->count; ++i )
{
printf( "%s : %d ", minHeap->array[i].word,
minHeap->array[i].frequency );
}
}
// The main funtion that takes a file as input, add words to heap
// and Trie, finally shows result from heap
void printKMostFreq( FILE* fp, int k )
{
// Create a Min Heap of Size k
MinHeap* minHeap = createMinHeap( k );
// Create an empty Trie
TrieNode* root = NULL;
// A buffer to store one word at a time
char buffer[MAX_WORD_SIZE];
// Read words one by one from file. Insert the word in Trie and Min Heap
while( fscanf( fp, "%s", buffer ) != EOF )
insertTrieAndHeap(buffer, &root, minHeap);
// The Min Heap will have the k most frequent words, so print Min Heap nodes
displayMinHeap( minHeap );
}
// Driver program to test above functions
int main()
{
int k = 5;
FILE *fp = fopen ("file.txt", "r");
if (fp == NULL)
printf ("File doesn't exist ");
else
printKMostFreq (fp, k);
return 0;
}
Run on IDE
/ A program to find k most frequent words in a file
#include <stdio.h>
#include <string.h>
#include <ctype.h>
# define MAX_CHARS 26
# define MAX_WORD_SIZE 30
// A Trie node
struct TrieNode
{
bool isEnd; // indicates end of word
unsigned frequency; // the number of occurrences of a word
int indexMinHeap; // the index of the word in minHeap
TrieNode* child[MAX_CHARS]; // represents 26 slots each for 'a' to 'z'.
};
// A Min Heap node
struct MinHeapNode
{
TrieNode* root; // indicates the leaf node of TRIE
unsigned frequency; // number of occurrences
char* word; // the actual word stored
};
// A Min Heap
struct MinHeap
{
unsigned capacity; // the total size a min heap
int count; // indicates the number of slots filled.
MinHeapNode* array; // represents the collection of minHeapNodes
};
// A utility function to create a new Trie node
TrieNode* newTrieNode()
{
// Allocate memory for Trie Node
TrieNode* trieNode = new TrieNode;
// Initialize values for new node
trieNode->isEnd = 0;
trieNode->frequency = 0;
trieNode->indexMinHeap = -1;
for( int i = 0; i < MAX_CHARS; ++i )
trieNode->child[i] = NULL;
return trieNode;
}
// A utility function to create a Min Heap of given capacity
MinHeap* createMinHeap( int capacity )
{
MinHeap* minHeap = new MinHeap;
minHeap->capacity = capacity;
minHeap->count = 0;
// Allocate memory for array of min heap nodes
minHeap->array = new MinHeapNode [ minHeap->capacity ];
return minHeap;
}
// A utility function to swap two min heap nodes. This function
// is needed in minHeapify
void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )
{
MinHeapNode temp = *a;
*a = *b;
*b = temp;
}
// This is the standard minHeapify function. It does one thing extra.
// It updates the minHapIndex in Trie when two nodes are swapped in
// in min heap
void minHeapify( MinHeap* minHeap, int idx )
{
int left, right, smallest;
left = 2 * idx + 1;
right = 2 * idx + 2;
smallest = idx;
if ( left < minHeap->count &&
minHeap->array[ left ]. frequency <
minHeap->array[ smallest ]. frequency
)
smallest = left;
if ( right < minHeap->count &&
minHeap->array[ right ]. frequency <
minHeap->array[ smallest ]. frequency
)
smallest = right;
if( smallest != idx )
{
// Update the corresponding index in Trie node.
minHeap->array[ smallest ]. root->indexMinHeap = idx;
minHeap->array[ idx ]. root->indexMinHeap = smallest;
// Swap nodes in min heap
swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);
minHeapify( minHeap, smallest );
}
}
// A standard function to build a heap
void buildMinHeap( MinHeap* minHeap )
{
int n, i;
n = minHeap->count - 1;
for( i = ( n - 1 ) / 2; i >= 0; --i )
minHeapify( minHeap, i );
}
// Inserts a word to heap, the function handles the 3 cases explained above
void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )
{
// Case 1: the word is already present in minHeap
if( (*root)->indexMinHeap != -1 )
{
++( minHeap->array[ (*root)->indexMinHeap ]. frequency );
// percolate down
minHeapify( minHeap, (*root)->indexMinHeap );
}
// Case 2: Word is not present and heap is not full
else if( minHeap->count < minHeap->capacity )
{
int count = minHeap->count;
minHeap->array[ count ]. frequency = (*root)->frequency;
minHeap->array[ count ]. word = new char [strlen( word ) + 1];
strcpy( minHeap->array[ count ]. word, word );
minHeap->array[ count ]. root = *root;
(*root)->indexMinHeap = minHeap->count;
++( minHeap->count );
buildMinHeap( minHeap );
}
// Case 3: Word is not present and heap is full. And frequency of word
// is more than root. The root is the least frequent word in heap,
// replace root with new word
else if ( (*root)->frequency > minHeap->array[0]. frequency )
{
minHeap->array[ 0 ]. root->indexMinHeap = -1;
minHeap->array[ 0 ]. root = *root;
minHeap->array[ 0 ]. root->indexMinHeap = 0;
minHeap->array[ 0 ]. frequency = (*root)->frequency;
// delete previously allocated memoory and
delete [] minHeap->array[ 0 ]. word;
minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];
strcpy( minHeap->array[ 0 ]. word, word );
minHeapify ( minHeap, 0 );
}
}
// Inserts a new word to both Trie and Heap
void insertUtil ( TrieNode** root, MinHeap* minHeap,
const char* word, const char* dupWord )
{
// Base Case
if ( *root == NULL )
*root = newTrieNode();
// There are still more characters in word
if ( *word != '' )
insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),
minHeap, word + 1, dupWord );
else // The complete word is processed
{
// word is already present, increase the frequency
if ( (*root)->isEnd )
++( (*root)->frequency );
else
{
(*root)->isEnd = 1;
(*root)->frequency = 1;
}
// Insert in min heap also
insertInMinHeap( minHeap, root, dupWord );
}
}
// add a word to Trie & min heap. A wrapper over the insertUtil
void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)
{
insertUtil( root, minHeap, word, word );
}
// A utility function to show results, The min heap
// contains k most frequent words so far, at any time
void displayMinHeap( MinHeap* minHeap )
{
int i;
// print top K word with frequency
for( i = 0; i < minHeap->count; ++i )
{
printf( "%s : %d ", minHeap->array[i].word,
minHeap->array[i].frequency );
}
}
// The main funtion that takes a file as input, add words to heap
// and Trie, finally shows result from heap
void printKMostFreq( FILE* fp, int k )
{
// Create a Min Heap of Size k
MinHeap* minHeap = createMinHeap( k );
// Create an empty Trie
TrieNode* root = NULL;
// A buffer to store one word at a time
char buffer[MAX_WORD_SIZE];
// Read words one by one from file. Insert the word in Trie and Min Heap
while( fscanf( fp, "%s", buffer ) != EOF )
insertTrieAndHeap(buffer, &root, minHeap);
// The Min Heap will have the k most frequent words, so print Min Heap nodes
displayMinHeap( minHeap );
}
// Driver program to test above functions
int main()
{
int k = 5;
FILE *fp = fopen ("file.txt", "r");
if (fp == NULL)
printf ("File doesn't exist ");
else
printKMostFreq (fp, k);
return 0;
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.