Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

C Program (n-gram program) Can somebody help me with this C Program? You have to

ID: 3829056 • Letter: C

Question

C Program (n-gram program)

Can somebody help me with this C Program?

You have to write a program which counts the number of 3-grams in a string of text.

Your program will report all the 3-grams and the count for each one, sorted in descending order by count.  

Note: Regarding the punctuations and stop characters (e.g., full stop, exclamation mark.question mark), you can ignore them.

A 3-gram is a counting of three consecutive words like the sentence: 'The man went to the dog park'

'The man went' is the first gram, 'man went to' is 2nd gram, 3rd gram 'went to the' etc. Increasing one spot index in array for each gram.

Thanks!

Explanation / Answer

#include <iostream>

#include <ctype.h>

#include <algorithm>

#include <string.h>

using namespace std;

const unsigned int MAX_STR_LEN = 250000;

const unsigned short NGRAM = 3;

const unsigned int NGRAMS = MAX_STR_LEN-NGRAM;

char ngrams[NGRAMS][NGRAM+1] = { 0 };

bool notTerminated(const char *ptr) {

for (int i=0; i<NGRAM; i++) {

if (ptr[i] == '') {

return false;

}

}

return true;

}

bool noSpace(const char *ptr) {

for (int i=0; i<NGRAM; i++) {

if (!isalpha(ptr[i])) {

return false;

}

}

return true;

}

int strCmp( const void* a, const void* b)

{

char const *char_a = *(char const **)a;

char const *char_b = *(char const **)b;

return strcmp(char_a, char_b);

}

struct FreqNode {

const char* str;

unsigned int freq;

FreqNode *prev;

FreqNode *next;

};

void unlink(FreqNode *node) {

FreqNode *prev = node->prev;

FreqNode *next = node->next;

if (prev && prev->next) {

prev->next = next;

}

if (next && next->prev) {

next->prev = prev;

}

node->next = 0;

node->prev= 0;

}

void insertAfter(FreqNode *insert, FreqNode *after) {

FreqNode *origNext = after->next;

after->next = insert;

insert->prev = after;

origNext->prev = insert;

insert->next = origNext;

}

void insertFreqNode(FreqNode *orig, const char* ptr) {

int freq = 0;

FreqNode *head = orig;

FreqNode *origHead = head;

bool needInsert= true;

while (head->next) {

freq = origHead->next->freq;

if (strcmp(head->next->str, ptr) == 0) {

head->next->freq += 1;

if (head->next->freq >= freq

&& origHead->next != head->next) {

FreqNode *link = head->next;

unlink(link);

insertAfter(link, origHead);

}

needInsert = false;

break;

}

head = head->next;

}

if (needInsert) {

head->next = new FreqNode();

head->next->prev = head;

head->next->next = 0;

head->next->str = ptr;

head->next->freq = 1;

}

}

int main()

{

for (int i=0; i<NGRAMS; i++) {

ngrams[i][0] = ngrams[i][1] = ngrams[i][2] = ngrams[i][3] = '';

}

const char *str = "thibbbs is a test and aaa it may haaave some abbba reptetitions thibbbs is a test and aaa it may ha";

cout << "Length: " << strlen(str) << endl;

const char *ptr = str;

int idx = 0;

while (notTerminated(ptr)) {

if (noSpace(ptr)) {

for (int i=0; i<NGRAM; i++) {

ngrams[idx][i] = ptr[i];

}

ngrams[idx][NGRAM] = '';

idx++;

}

ptr++;

}

FreqNode head = { "HEAD", 0, 0, 0 };

for (int i=0; i<NGRAMS; i++) {

if (ngrams[i][0] == '') break;

insertFreqNode(&head, ngrams[i]);

}

FreqNode *headPtr = &head;

int iter = 0;

while (headPtr->next && iter++<10) {

cout << headPtr->next->str << " " << headPtr->next->freq << endl;

headPtr = headPtr->next;

}

cout << "Winner is: " << head.next->str << " " << " with " << head.next->freq << " occurrences" << endl;

headPtr = head.next;

while (headPtr) {

FreqNode *deleteNode = headPtr;

headPtr = headPtr->next;

delete deleteNode;

}

return 0;

}.