Task: Suppose you want to use pattern matching to identify email messages that a
ID: 3738306 • Letter: T
Question
Task: Suppose you want to use pattern matching to identify email messages that are spam email given a collection of emails. That is, L = { the language of patterns found in spam Specifications: Your strings such as "free access"', "free software": "free vacation" "free trials", and "win/winner(s) email ) ir programming assignment is to design and implement an FA that accepts These strings must be in accepted by your automaton is as follows the body of the message not the subject. Thus, the language to be L Eree access", "free software", "free vacation", "Eree tri "win", "winner, "winners", "winnings") The body of the message may or may not have these strings within" ". Your automaton must accept these strings within and without .. “ The implementation of the FA must simulate the FA by reading an input symbol, computing the transition function and displaying the state or set of states the FA reaches when processing this symbol. Your FA must process one symbol at a time, not words or phrases at a time. Input: A text file named messagefile.txt with xml tags as shown below. Each message is a document identified by msgN, where N is any integer KDOC> KDOCID>msgN Subject body of the emailExplanation / Answer
#include <fstream>
#include <cstring>
#include <iostream>
using namespace std;
int main(int argc, char const *argv[])
{
ifstream myfile;
myfile.open ("messagefile.txt");
if (!myfile)
{
cout<<"Error opening file !! ";
return 0;
}
char character[25];
int length, partial_f=0, start=0, found=0 ;
cout<<"Welcome to the Finite Automata working software: "<<endl;
while (myfile>>character)
{
//cout<<"Word is: "<<character<<endl;
length = strlen(character);
if (character[0] == '"') // ignoring ""
start = 1;
else
start=0;
length = strlen(character);
//if user have already found the word free
if(partial_f == 1)
{
if (character[start] == 'a')
if(character[start+1] == 'c')
if(character[start+2] == 'c')
if(character[start+3] == 'e')
if(character[start+4] == 's')
if (character[start+5] == 's')
{
//checking end of string
if(character[start+6] == '"' || character[start+6]== '')
{ cout<<"Found the spam word free access: ";
found++;
partial_f =0; continue;}
continue;
}
//searching for software word
if (character[start] == 's')
if(character[start+1] == 'o')
if(character[start+2] == 'f')
if(character[start+3] == 't')
if(character[start+4] == 'w')
if (character[start+5] == 'a')
if (character[start+6] == 'r')
if (character[start+7] == 'e')
{
//checking end of string
if(character[start+8] == '"' || character[start+8]== ''){
cout<<"Found the spam word free software: ";
found++;
partial_f =0; continue;
}
continue;
}
//searching for vacations
if (character[start] == 'v')
if(character[start+1] == 'a')
if(character[start+2] == 'c')
if(character[start+3] == 'a')
if(character[start+4] == 't')
if (character[start+5] == 'i')
if (character[start+6] == 'o')
if (character[start+7] == 'n')
{
//checking end of string
if(character[start+8] == '"' || character[start+8]== '')
{ cout<<"Found the spam word free vacation: ";
found++;
partial_f =0; continue;
} continue;
}
//searching for trails
if (character[start] == 't')
if(character[start+1] == 'r')
if(character[start+2] == 'a')
if(character[start+3] == 'i')
if(character[start+4] == 'l')
if (character[start+5] == 's')
{
//checking end of string
if(character[start+6] == '"' || character[start+6]== '')
{ cout<<"Found the spam word free trails: ";
found++;
partial_f =0;
continue;
}
continue;
}
if (character[start] == 'f')
if(character[start+1] == 'r')
if(character[start+2] == 'e')
if(character[start+3] == 'e')
//checking end of string
if(character[start+4]== ''){
partial_f=1;
continue;
}
//making partial flag (already found free) to reiniliaze 0 }
partial_f = 0;
}
//if it string is not partially processed
if (character[start] == 'f')
if(character[start+1] == 'r')
if(character[start+2] == 'e')
if(character[start+3] == 'e')
//checking end of string
if(character[start+4]== '')
partial_f=1;
if (character[start] == 'w')
if(character[start+1] == 'i')
if(character[start+2] == 'n')
{ //checking end of string
if(character[start+3] == '"' || character[start+3]== '')
{ cout<<"Spam word win found: ";
found++;
continue;
}
}
//searching for winner
if (character[start] == 'w')
if(character[start+1] == 'i')
if(character[start+2] == 'n')
if(character[start+3] == 'n')
if(character[start+4] == 'e')
if (character[start+5] == 'r')
{
//checking end of string
if(character[start+6] == '"' || character[start+6]== ''){
cout<<"Found the spam word is winner: ";
found++;
partial_f =0;
continue;
}
}
//searching for winners
if (character[start] == 'w')
if(character[start+1] == 'i')
if(character[start+2] == 'n')
if(character[start+3] == 'n')
if(character[start+4] == 'e')
if (character[start+5] == 'r')
if(character[start+6] == 's')
{
//checking end of string
if(character[start+7] == '"' || character[start+7]== ''){
cout<<"Found the spam word winners: ";
found++;
partial_f =0; continue;
}
continue;
}
//searching for winnings
if (character[start] == 'w')
if(character[start+1] == 'i')
if(character[start+2] == 'n')
if(character[start+3] == 'n')
if(character[start+4] == 'i')
if (character[start+5] == 'n')
if(character[start+6] == 'g')
if(character[start+7] == 's')
{
//checking end of string
if(character[start+8] == '"' || character[start+8]== ''){
cout<<"Found the spam word free trails: ";
found++;
partial_f =0;
}
continue;
}
}
cout<<"Total occurence of spam words in Email is: "<<found<<endl;
myfile.close();
return 0;
}
OUTPUT:
dps@machine:~/Documents/Chegg$ g++ spam_find.cpp -o spam
dps@machine:~/Documents/Chegg$ ./spam
Welcome to the Finite Automata working software:
Found the spam word free access:
Found the spam word free access:
Found the spam word free trails:
Found the spam word free trails:
Spam word win found:
Spam word win found:
Found the spam word winners:
Total occurence of spam words in Email is: 7
dps@machine:~/Documents/Chegg$ cat messagefile.txt
<DOC>
<DOCID> msgN </DOCID>
Subject
body of text email here.
free access "free access" free free trails "free trails" "win" win
winners
</DOC>.
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.