Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

This program takes the name of an input file from the command line. This file co

ID: 3675650 • Letter: T

Question

This program takes the name of an input file from the command line. This file contains numbers in the range of 0 to 99.

Your program should print out the number(s) that occur the most often within this file. Run your program via the command ./a.out inputFile

For example, if the file contained the numbers below then you would identify 7 and 90 and 93 as the most occurring numbers.

1 93 7 21 42 59 90 21 90 83 1 90 7 93 63 93 2 97 7

Hint–declare an array of 100 items that holds the “count” of each integer. Each time you see a given number in the input, increment that location in your “count” array by one. After reading all the values in the file, find the maximum of the “count” values and then print each element that has that count.

Explanation / Answer

/ A program to find k most frequent words in a file

#include <stdio.h>

#include <string.h>

#include <ctype.h>

# define MAX_CHARS 26

# define MAX_WORD_SIZE 30

// A Trie node

struct TrieNode

{

    bool isEnd; // indicates end of word

    unsigned frequency; // the number of occurrences of a word

    int indexMinHeap; // the index of the word in minHeap

    TrieNode* child[MAX_CHARS]; // represents 26 slots each for 'a' to 'z'.

};

// A Min Heap node

struct MinHeapNode

{

    TrieNode* root; // indicates the leaf node of TRIE

    unsigned frequency; // number of occurrences

    char* word; // the actual word stored

};

// A Min Heap

struct MinHeap

{

    unsigned capacity; // the total size a min heap

    int count; // indicates the number of slots filled.

    MinHeapNode* array; // represents the collection of minHeapNodes

};

// A utility function to create a new Trie node

TrieNode* newTrieNode()

{

    // Allocate memory for Trie Node

    TrieNode* trieNode = new TrieNode;

    // Initialize values for new node

    trieNode->isEnd = 0;

    trieNode->frequency = 0;

    trieNode->indexMinHeap = -1;

    for( int i = 0; i < MAX_CHARS; ++i )

        trieNode->child[i] = NULL;

    return trieNode;

}

// A utility function to create a Min Heap of given capacity

MinHeap* createMinHeap( int capacity )

{

    MinHeap* minHeap = new MinHeap;

    minHeap->capacity = capacity;

    minHeap->count = 0;

    // Allocate memory for array of min heap nodes

    minHeap->array = new MinHeapNode [ minHeap->capacity ];

    return minHeap;

}

// A utility function to swap two min heap nodes. This function

// is needed in minHeapify

void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )

{

    MinHeapNode temp = *a;

    *a = *b;

    *b = temp;

}

// This is the standard minHeapify function. It does one thing extra.

// It updates the minHapIndex in Trie when two nodes are swapped in

// in min heap

void minHeapify( MinHeap* minHeap, int idx )

{

    int left, right, smallest;

    left = 2 * idx + 1;

    right = 2 * idx + 2;

    smallest = idx;

    if ( left < minHeap->count &&

         minHeap->array[ left ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = left;

    if ( right < minHeap->count &&

         minHeap->array[ right ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = right;

    if( smallest != idx )

    {

        // Update the corresponding index in Trie node.

        minHeap->array[ smallest ]. root->indexMinHeap = idx;

        minHeap->array[ idx ]. root->indexMinHeap = smallest;

        // Swap nodes in min heap

        swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);

        minHeapify( minHeap, smallest );

    }

}

// A standard function to build a heap

void buildMinHeap( MinHeap* minHeap )

{

    int n, i;

    n = minHeap->count - 1;

    for( i = ( n - 1 ) / 2; i >= 0; --i )

        minHeapify( minHeap, i );

}

// Inserts a word to heap, the function handles the 3 cases explained above

void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )

{

    // Case 1: the word is already present in minHeap

    if( (*root)->indexMinHeap != -1 )

    {

        ++( minHeap->array[ (*root)->indexMinHeap ]. frequency );

        // percolate down

        minHeapify( minHeap, (*root)->indexMinHeap );

    }

    // Case 2: Word is not present and heap is not full

    else if( minHeap->count < minHeap->capacity )

    {

        int count = minHeap->count;

        minHeap->array[ count ]. frequency = (*root)->frequency;

        minHeap->array[ count ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ count ]. word, word );

        minHeap->array[ count ]. root = *root;

        (*root)->indexMinHeap = minHeap->count;

        ++( minHeap->count );

        buildMinHeap( minHeap );

    }

    // Case 3: Word is not present and heap is full. And frequency of word

    // is more than root. The root is the least frequent word in heap,

    // replace root with new word

    else if ( (*root)->frequency > minHeap->array[0]. frequency )

    {

        minHeap->array[ 0 ]. root->indexMinHeap = -1;

        minHeap->array[ 0 ]. root = *root;

        minHeap->array[ 0 ]. root->indexMinHeap = 0;

        minHeap->array[ 0 ]. frequency = (*root)->frequency;

        // delete previously allocated memoory and

        delete [] minHeap->array[ 0 ]. word;

        minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ 0 ]. word, word );

        minHeapify ( minHeap, 0 );

    }

}

// Inserts a new word to both Trie and Heap

void insertUtil ( TrieNode** root, MinHeap* minHeap,

                        const char* word, const char* dupWord )

{

    // Base Case

    if ( *root == NULL )

        *root = newTrieNode();

    // There are still more characters in word

    if ( *word != '' )

        insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),

                         minHeap, word + 1, dupWord );

    else // The complete word is processed

    {

        // word is already present, increase the frequency

        if ( (*root)->isEnd )

            ++( (*root)->frequency );

        else

        {

            (*root)->isEnd = 1;

            (*root)->frequency = 1;

        }

        // Insert in min heap also

        insertInMinHeap( minHeap, root, dupWord );

    }

}

// add a word to Trie & min heap. A wrapper over the insertUtil

void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)

{

    insertUtil( root, minHeap, word, word );

}

// A utility function to show results, The min heap

// contains k most frequent words so far, at any time

void displayMinHeap( MinHeap* minHeap )

{

    int i;

    // print top K word with frequency

    for( i = 0; i < minHeap->count; ++i )

    {

        printf( "%s : %d ", minHeap->array[i].word,

                            minHeap->array[i].frequency );

    }

}

// The main funtion that takes a file as input, add words to heap

// and Trie, finally shows result from heap

void printKMostFreq( FILE* fp, int k )

{

    // Create a Min Heap of Size k

    MinHeap* minHeap = createMinHeap( k );

    

    // Create an empty Trie

    TrieNode* root = NULL;

    // A buffer to store one word at a time

    char buffer[MAX_WORD_SIZE];

    // Read words one by one from file. Insert the word in Trie and Min Heap

    while( fscanf( fp, "%s", buffer ) != EOF )

        insertTrieAndHeap(buffer, &root, minHeap);

    // The Min Heap will have the k most frequent words, so print Min Heap nodes

    displayMinHeap( minHeap );

}

// Driver program to test above functions

int main()

{

    int k = 5;

    FILE *fp = fopen ("file.txt", "r");

    if (fp == NULL)

        printf ("File doesn't exist ");

    else

        printKMostFreq (fp, k);

    return 0;

}

Run on IDE

/ A program to find k most frequent words in a file

#include <stdio.h>

#include <string.h>

#include <ctype.h>

# define MAX_CHARS 26

# define MAX_WORD_SIZE 30

// A Trie node

struct TrieNode

{

    bool isEnd; // indicates end of word

    unsigned frequency; // the number of occurrences of a word

    int indexMinHeap; // the index of the word in minHeap

    TrieNode* child[MAX_CHARS]; // represents 26 slots each for 'a' to 'z'.

};

// A Min Heap node

struct MinHeapNode

{

    TrieNode* root; // indicates the leaf node of TRIE

    unsigned frequency; // number of occurrences

    char* word; // the actual word stored

};

// A Min Heap

struct MinHeap

{

    unsigned capacity; // the total size a min heap

    int count; // indicates the number of slots filled.

    MinHeapNode* array; // represents the collection of minHeapNodes

};

// A utility function to create a new Trie node

TrieNode* newTrieNode()

{

    // Allocate memory for Trie Node

    TrieNode* trieNode = new TrieNode;

    // Initialize values for new node

    trieNode->isEnd = 0;

    trieNode->frequency = 0;

    trieNode->indexMinHeap = -1;

    for( int i = 0; i < MAX_CHARS; ++i )

        trieNode->child[i] = NULL;

    return trieNode;

}

// A utility function to create a Min Heap of given capacity

MinHeap* createMinHeap( int capacity )

{

    MinHeap* minHeap = new MinHeap;

    minHeap->capacity = capacity;

    minHeap->count = 0;

    // Allocate memory for array of min heap nodes

    minHeap->array = new MinHeapNode [ minHeap->capacity ];

    return minHeap;

}

// A utility function to swap two min heap nodes. This function

// is needed in minHeapify

void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )

{

    MinHeapNode temp = *a;

    *a = *b;

    *b = temp;

}

// This is the standard minHeapify function. It does one thing extra.

// It updates the minHapIndex in Trie when two nodes are swapped in

// in min heap

void minHeapify( MinHeap* minHeap, int idx )

{

    int left, right, smallest;

    left = 2 * idx + 1;

    right = 2 * idx + 2;

    smallest = idx;

    if ( left < minHeap->count &&

         minHeap->array[ left ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = left;

    if ( right < minHeap->count &&

         minHeap->array[ right ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = right;

    if( smallest != idx )

    {

        // Update the corresponding index in Trie node.

        minHeap->array[ smallest ]. root->indexMinHeap = idx;

        minHeap->array[ idx ]. root->indexMinHeap = smallest;

        // Swap nodes in min heap

        swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);

        minHeapify( minHeap, smallest );

    }

}

// A standard function to build a heap

void buildMinHeap( MinHeap* minHeap )

{

    int n, i;

    n = minHeap->count - 1;

    for( i = ( n - 1 ) / 2; i >= 0; --i )

        minHeapify( minHeap, i );

}

// Inserts a word to heap, the function handles the 3 cases explained above

void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )

{

    // Case 1: the word is already present in minHeap

    if( (*root)->indexMinHeap != -1 )

    {

        ++( minHeap->array[ (*root)->indexMinHeap ]. frequency );

        // percolate down

        minHeapify( minHeap, (*root)->indexMinHeap );

    }

    // Case 2: Word is not present and heap is not full

    else if( minHeap->count < minHeap->capacity )

    {

        int count = minHeap->count;

        minHeap->array[ count ]. frequency = (*root)->frequency;

        minHeap->array[ count ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ count ]. word, word );

        minHeap->array[ count ]. root = *root;

        (*root)->indexMinHeap = minHeap->count;

        ++( minHeap->count );

        buildMinHeap( minHeap );

    }

    // Case 3: Word is not present and heap is full. And frequency of word

    // is more than root. The root is the least frequent word in heap,

    // replace root with new word

    else if ( (*root)->frequency > minHeap->array[0]. frequency )

    {

        minHeap->array[ 0 ]. root->indexMinHeap = -1;

        minHeap->array[ 0 ]. root = *root;

        minHeap->array[ 0 ]. root->indexMinHeap = 0;

        minHeap->array[ 0 ]. frequency = (*root)->frequency;

        // delete previously allocated memoory and

        delete [] minHeap->array[ 0 ]. word;

        minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ 0 ]. word, word );

        minHeapify ( minHeap, 0 );

    }

}

// Inserts a new word to both Trie and Heap

void insertUtil ( TrieNode** root, MinHeap* minHeap,

                        const char* word, const char* dupWord )

{

    // Base Case

    if ( *root == NULL )

        *root = newTrieNode();

    // There are still more characters in word

    if ( *word != '' )

        insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),

                         minHeap, word + 1, dupWord );

    else // The complete word is processed

    {

        // word is already present, increase the frequency

        if ( (*root)->isEnd )

            ++( (*root)->frequency );

        else

        {

            (*root)->isEnd = 1;

            (*root)->frequency = 1;

        }

        // Insert in min heap also

        insertInMinHeap( minHeap, root, dupWord );

    }

}

// add a word to Trie & min heap. A wrapper over the insertUtil

void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)

{

    insertUtil( root, minHeap, word, word );

}

// A utility function to show results, The min heap

// contains k most frequent words so far, at any time

void displayMinHeap( MinHeap* minHeap )

{

    int i;

    // print top K word with frequency

    for( i = 0; i < minHeap->count; ++i )

    {

        printf( "%s : %d ", minHeap->array[i].word,

                            minHeap->array[i].frequency );

    }

}

// The main funtion that takes a file as input, add words to heap

// and Trie, finally shows result from heap

void printKMostFreq( FILE* fp, int k )

{

    // Create a Min Heap of Size k

    MinHeap* minHeap = createMinHeap( k );

    

    // Create an empty Trie

    TrieNode* root = NULL;

    // A buffer to store one word at a time

    char buffer[MAX_WORD_SIZE];

    // Read words one by one from file. Insert the word in Trie and Min Heap

    while( fscanf( fp, "%s", buffer ) != EOF )

        insertTrieAndHeap(buffer, &root, minHeap);

    // The Min Heap will have the k most frequent words, so print Min Heap nodes

    displayMinHeap( minHeap );

}

// Driver program to test above functions

int main()

{

    int k = 5;

    FILE *fp = fopen ("file.txt", "r");

    if (fp == NULL)

        printf ("File doesn't exist ");

    else

        printKMostFreq (fp, k);

    return 0;

}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote