Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

<<NgramAnalyser.java>> import java.util.ArrayList; import java.util.HashMap; imp

ID: 3845363 • Letter: #

Question

<<NgramAnalyser.java>>

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;

import java.util.HashSet;
import java.util.Arrays;

/**
* Perform n-gram analysis of a string.
*
* Analyses the frequency with which distinct n-grams, of length n,
* appear in an input string. For the purposes of all analyses of the input
* string, the final n-1 n-grams appearing in the string should be
* "filled out" to a length of n characters, by adding
* a sequence of contiguous characters from the start of the string.
* e.g. "abbc" includes "bca" and "cab" in its 3-grams
*
* @author
* @version
*/
public class NgramAnalyser
{
    /** dictionary of all distinct n-grams and their frequencies */
    private HashMap<String,Integer> ngram;

    /** number of distinct characters in the input */
    private int alphabetSize;

    /** n-gram size for this object (new field) */
    private int ngramSize;

    /**
     * Analyse the frequency with which distinct n-grams, of length n,
     * appear in an input string.
     * n-grams at the end of the string wrap to the front
     * e.g. "abbbbc" includes "bca" and "cab" in its 3-grams
     * @param int n size of n-grams to create
     * @param String inp input string to be modelled
     */
    public NgramAnalyser(int n, String inp)
    {
        //TODO replace this line with your code
    }

    /**
     * Analyses the input text for n-grams of size 1.
     */
    public NgramAnalyser(String inp)
    {
        this(1,inp);
    }

    /**
     * @return int the size of the alphabet of a given input
     */
    public int getAlphabetSize() {
        //TODO replace this line with your code
        return -1;
    }

    /**
     * @return the total number of distinct n-grams appearing
     *         in the input text.
     */
    public int getDistinctNgramCount() {
        //TODO replace this line with your code
        return -1;
    }

    /**
     * @return Return a set containing all the distinct n-grams
     *         in the input string.
     */
    public Set<String> getDistinctNgrams() {
        //TODO replace this line with your code
        return null;
    }

    /**
     * @return the total number of n-grams appearing
     *         in the input text (not requiring them to be distinct)
     */
    public int getNgramCount() {
        //TODO replace this line with your code
        return -1;
    }

    /** Return the frequency with which a particular n-gram appears
     * in the text. If it does not appear at all, return 0.
     *
     * @param ngram The n-gram to get the frequency of
     * @return The frequency with which the n-gram appears.
     */
    public int getNgramFrequency(String ngram) {
        //TODO replace this line with your code
        return -1;
    }

    /**
     * Generate a summary of the ngrams for this object.
     * @return a string representation of the n-grams in the input text
     * comprising the ngram size and then each ngram and its frequency
     * where ngrams are presented in alphabetical order.   
     */
    public String toString()
    {
        //TODO replace this line with your code
        return null;
    }

}

<<MarkovModel.java>>

import java.util.Set;
/**
* Construct a Markov model of order /k/ based on an input string.
*
* @author
* @version
*/
public class MarkovModel
{

    /** Markov model order parameter */
    int k;
    /** ngram model of order k */
    NgramAnalyser ngram;
    /** ngram model of order k+1 */
    NgramAnalyser n1gram;

    /**
     * Construct an order-k Markov model from string s
     * @param k int order of the Markov model
     * @param s String input to be modelled
     */
    public MarkovModel(int k, String s)
    {
        //TODO replace this line with your code
    }

    /**
     * @return order of this Markov model
     */
    public int getK()
    {
        return k;
    }

    /** Estimate the probability of a sequence appearing in the text
     * using simple estimate of freq seq / frequency front(seq).
     * @param sequence String of length k+1
     * @return double probability of the last letter occuring in the
     * context of the first ones or 0 if front(seq) does not occur.
     */
    public double simpleEstimate(String sequence) {
        //TODO replace this line with your code
        return -1.0;

    }
    /**
     * Calculate the Laplacian probability of string obs given this Markov model
     * @input sequence String of length k+1
     */
    public double laplaceEstimate(String sequence)
    {
        //TODO replace this line with your code
        return -1.0;
    }

    /**
     * @return String representing this Markov model
     */
    public String toString()
    {
        //TODO replace this line with your code
        return null;
    }

}

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

<<ProjectTest.java>>

import static org.junit.Assert.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

/**
* The test class ProjectTest for student test cases.
* Add all new test cases to this task.
*
* @author
* @version
*/
public class ProjectTest
{
    /**
     * Default constructor for test class ProjectTest
     */
    public ProjectTest()
    {
    }

    /**
     * Sets up the test fixture.
     *
     * Called before every test case method.
     */
    @Before
    public void setUp()
    {
    }

    /**
     * Tears down the test fixture.
     *
     * Called after every test case method.
     */
    @After
    public void tearDown()
    {
    }
  
    //TODO add new test cases from here include brief documentation
  
     @Test(timeout=1000)
    public void testSensibleToStringSize() {
        assertEquals(0,1); //TODO replace with test code
    }


    @Test(timeout=1000)
    public void testGetDistinctNgrams() {
         assertEquals(0,1); //TODO replace with test code
    }
  
@Test(timeout=1000)
    public void testLaplaceExample() {
        assertEquals(0,1); //TODO replace with test code
    }
  
    @Test(timeout=1000)
    public void testSimpleExample() {
        assertEquals(0,1); //TODO replace with test code
    }


    @Test
    public void testTask3example()
    {
        MarkovModel model = new MarkovModel(2,"aabcabaacaac");
        ModelMatcher match = new ModelMatcher(model,"aabbcaac");
        assertEquals(0,1); //TODO replace with test code
    }
}

Task i Analysing n-grams in a sample test (NTTAMAnn1vrnT) For thus task, v will need to counglete the ranknal class, and add code to the Fro eotrest cluss. The Mr.ranznalyser lass unalyses an ut strzug, wessed to at the too, and :uuauts all he dramas vl tets that Jocur the stung. Az n-grana 15 sunply a(cuat.Ruvus) acquemoe of n items fram piece of text-the iteme we will becom sklering for this clase are charactors. One could ol60 melyse n-grame of words ayllablee, or even eert meee) For maramee, gram (aleo called a bigram is pinir of character a 3-gram i. a triple of characters, and on of example, udes the following suing The alphabet 0 (uniq haracter luding spac and the 2-gr the stru duyl:cates, they get the following Fren The NTTAmnr 1 T clase ia given a string as impatt to sta com metor and artiomally im n-gram size ia Tr hould amalyse the n-ararms in the mput ring, mad Terord their fenaenci in th hah-mar Tr hould also Tecond the total number of datmet character har app in the implu -ring ear (ie, the "alphabet" used by the input string), and store this count in the field haerw s1zw. he dered foll of padded nur in Anme way M preriaely: ahen nting frequenciera of distinct n-gram in the irnur arening. the final n-1 n-erams in the input string-hould have added ta them a of contiguoua hamrter from the Krart the arring in order to ure rhey are of lengthm Foe instance, in the example giren above: for the purp of calculat ng -gram frequ the last 2-gr in input text would be .au (i.e.. it is if the text eu ded "...in Spaint"). And in the string "4L the 3-grams be L and L. the would Given this background information complete the following sub-tasks. You may wish to complete aub-taak h) before commereme sub-task (g). ion fiel the header comments b. Complete the code for In additian to counting nstnu tor should record the ber of distinct characters nuntered, and field of pul string, and sbould vt len should Ibn checked r1 unku itable the input rening ik the empry g the input string is null the gi en m-eram Rize is n.or n ia greater than the length af the input string

Explanation / Answer

ProjectTest.java

import static org.junit.Assert.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.Set;


public class ProjectTest
{
    /**
     * Default constructor for test class ProjectTest
     */
    public ProjectTest() {

    }

    /**
     * Sets up the test fixture.
     *
     * Called before every test case method.
     */
    @Before
    public void setUp() {
    }

    /**
     * Tears down the test fixture.
     *
     * Called after every test case method.
     */
    @After
    public void tearDown() {
  
    }
  
    //TODO add new test cases from here include brief documentation
  
    /**
     * Test if the number of lines in a string output from Ngram.toString()
     * is valid (i.e equal to the size of the alphabet of that Ngram)
     * Also ensures that the sort, splice and constructor functions work
     * as required to produce the required comparison
     */
    @Test(timeout=1000)
    public void testSensibleToStringSize() {
     
        String[] stringsToTest = {"Hello my friend",
        "be",
        "Have a nice day you filthy animal",
        "asdfghjkl$$sdfghj%%",
        "2",
        "adadadadaaaaa",
        "    "};

        Integer[] ngramSizesToTest = {1, 2, 3, 4, 5};

        NgramAnalyser analysis;
        String analysisString;

        int i = ngramSizesToTest[0];
        String s = stringsToTest[5];

        if (i > s.length()) {
            try {
                analysis = new NgramAnalyser(i, s);
            } catch (IllegalArgumentException e) {
                assertEquals(0, 0);
            }
        } else {
            analysis = new NgramAnalyser(i, s);
            analysisString = analysis.toString();

            //Number of lines is equal to the number of 's plus 1
            int numberofLines = analysisString.length() -
                                analysisString.replace(" ", "").length() + 1;

            assert(numberofLines >= analysis.getAlphabetSize());

        }
    }

    /**
     * Tests various aspects of the getDistinctNgrams function
     * inlcuding set length with comparison to basic boundaries
     */

    @Test(timeout=1000)
    public void testGetDistinctNgrams() {
         String[] stringsToTest = {
            "123!@#123!@#",
            "adadadadadadadad",
            "cadadcdaadcdbed",
            "aaaaaa",
            "HOWWEYVUFXBINEF"
         };

         String stringToTest = stringsToTest[0];
         int ngramSize = 2;
         NgramAnalyser analysis = new NgramAnalyser(ngramSize, stringToTest);

         Set<String> distinctNgrams = analysis.getDistinctNgrams();
         int distinctNgramCount = analysis.getDistinctNgramCount();
         int totalNgramCount = analysis.getNgramCount();

         //Test that there are fewer or equal distinct Ngrams than total Ngrams
         assert(distinctNgramCount <= totalNgramCount);

         //Test that there are fewer or equal distinct Ngrams than the size
         //of the analysed string
         assert(distinctNgramCount <= stringToTest.length());

         //Test that the alphabet size is smaller than
         //or equal to the number of distinct NGrams
         assert(analysis.getAlphabetSize() <= distinctNgramCount);

    }

    /**
     * Tests the NgramAnalyser function for more complicated and longer ngrams
     *
     */
    @Test(timeout=1000)
    public void testNgramAnalyser() {

        String stringToTest = "baaaaaaaaaamsdbfajeduhgtraaaab";
        int ngramSize = 16;
        NgramAnalyser analysis = new NgramAnalyser(ngramSize, stringToTest);

        //Test toString method
        String toString = analysis.toString();
        //System.out.println(toString); //REMOVE BEFORE SUBMITTING!!!!!

        //Test that ngramCount = length of the string
        assert(analysis.getNgramCount() == stringToTest.length());

    }
  
    @Test(timeout=1000)
    public void testLaplaceExample() {
        assertEquals(0,1); //TODO replace with test code
    }
  
    @Test(timeout=1000)
    public void testSimpleExample() {
        assertEquals(0,1); //TODO replace with test code
    }


    @Test
    public void testTask3example()
    {
        MarkovModel model = new MarkovModel(2,"aabcabaacaac");
        ModelMatcher match = new ModelMatcher(model,"aabbcaac");
        assertEquals(0,1); //TODO replace with test code
    }
}


MarkovModel.java


public class MarkovModel
{

   /** Markov model order parameter */
   int k;
   /** ngram model of order k */
   NgramAnalyser ngram;
   /** ngram model of order k+1 */
   NgramAnalyser n1gram;

   /**
   * Construct an order-k Markov model from string s
   * @param k int order of the Markov model
   * @param s String input to be modelled
   */
   public MarkovModel(int k, String s)
   {
       ngram = new NgramAnalyser(k, s);
       n1gram = new NgramAnalyser((k+1), s);
   }

   /**
   * @return order of this Markov model
   */
   public int getK()
   {
       return k;
   }

   /** Estimate the probability of a sequence appearing in the text
   * using simple estimate of freq seq / frequency front(seq).
   * @param sequence String of length k+1
   * @return double probability of the last letter occurring in the
   * context of the first ones or 0 if front(seq) does not occur.
   */
   public double simpleEstimate(String sequence) {
       double prob;
       String seqNotLast = sequence.substring(0, sequence.length()-1);

       if (ngram.getDistinctNgrams().contains(seqNotLast))
       {
           double n1g = n1gram.getNgramFrequency(sequence);
           double ng = ngram.getNgramFrequency(seqNotLast);
           try{
               prob = (n1g/ng);
           }
           catch(ArithmeticException e){
               return 0.0;
           }
           return prob;
       }
       else
       {
           return 0.0;
       }
   }
   /**
   * Calculate the Laplacian probability of string obs given this Markov model
   * @input sequence String of length k+1
   * @return Laplacian Probability
   */
   public double laplaceEstimate(String sequence)
   {
       //TODO replace this line with your code
       String context = sequence.substring(0, sequence.length()-1);
       double npc = n1gram.getNgramFrequency(sequence);
       double np = ngram.getNgramFrequency(context);
       double laplace;
       laplace = (npc + 1)/(np + ngram.getAlphabetSize());
       return laplace;
   }

   /**
   * @return String representing this Markov model
   */
   public String toString()
   {
       //TODO replace this line with your code
       String toRet = "";
       String k = Integer.toString(getK());
       toRet += (k + " ");
       toRet += (Integer.toString(ngram.getAlphabetSize()) + " ");
       toRet += ngram.toString() + n1gram.toString();
       return toRet;
   }

}

NgramAnalyser.java

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;

import java.util.HashSet;
import java.util.Arrays;

//packages for iterating through hashmap
import java.util.Map;
import java.util.Iterator;

public class NgramAnalyser
{
    /** dictionary of all distinct n-grams and their frequencies */
    private HashMap<String,Integer> ngram;

    /** number of distinct characters in the input */
    private int alphabetSize;

    /** n-gram size for this object (new field) */
    private int ngramSize;

    /** input length for this object (new field) */
    private int inputLength;

    /**
     * Analyse the frequency with which distinct n-grams, of length n,
     * appear in an input string.
     * n-grams at the end of the string wrap to the front
     * e.g. "abbbbc" includes "bca" and "cab" in its 3-grams
     * @param int n size of n-grams to create
     * @param String inp input string to be modelled
     */
    public NgramAnalyser(int n, String inp) {
        if(inp != null && inp != "" && n > 0 && n <= inp.length()) {
            this.ngramSize = n;
            this.inputLength = inp.length();
            this.ngram = new HashMap<>(inp.length(), inp.length());
            for (int i = 0; i < inp.length(); i++) { //loops through each character in inp
                    String currentNGram = ""; //new nGram starting at ith position
                    for (int j = i; j-i < n ; j++) { //starting from the ith character, loop n characters after this
                        currentNGram = currentNGram.concat(inp.substring(j%inp.length(), j%inp.length()+1)); //concatonates the jth char to currNGram
                    }
                    if (ngram.containsKey(currentNGram)) { //if the ngram exists, add one to its frequency
                        ngram.put(currentNGram, ngram.get(currentNGram) +1);
                    } else {
                        ngram.put(currentNGram, 1); //otherwise create a key for this ngram
                    }
            }
          
            //Prints the ngram
            /*
            Set set = ngram.entrySet();
            Iterator iterator = set.iterator();
            while(iterator.hasNext()) {
                Map.Entry mentry = (Map.Entry)iterator.next();
                System.out.print("key is: "+ mentry.getKey() + " & Value is: ");
                System.out.println(mentry.getValue());
            }
            */

            //Alphabet size calculation
            if (n != 1) {
            NgramAnalyser alpha = new NgramAnalyser(inp);
            this.alphabetSize = alpha.getDistinctNgramCount(); //find alphabet size by getting number of distinct 1-grams
            } else {
                this.alphabetSize = this.getDistinctNgramCount(); // 1-grams are simply a list of distinct characters, also bottoms recursion.
            }
        } else {
            throw new IllegalArgumentException("ngram size must be between 1 and the length of the input string. Input string must not be null or empty.");
        }
    }

    /**
     * Analyses the input text for n-grams of size 1.
     */
    public NgramAnalyser(String inp) {
        this(1,inp);
    }

    /**
     * @return int the size of the alphabet of a given input
     */
    public int getAlphabetSize() {
        return alphabetSize;
    }

    /**
     * @return the total number of distinct n-grams appearing
     *         in the input text.
     */
    public int getDistinctNgramCount() {
        //TODO replace this line with your code
        return ngram.size();
    }

    /**
     * @return Return a set containing all the distinct n-grams
     *         in the input string.
     */
    public Set<String> getDistinctNgrams() {
        //TODO replace this line with your code
        return ngram.keySet();
    }

    /**
     * @return the total number of n-grams appearing
     *         in the input text (not requiring them to be distinct)
     */
    public int getNgramCount() {
        //TODO replace this line with your code
        return this.inputLength;
    }

    /** Return the frequency with which a particular n-gram appears
     * in the text. If it does not appear at all, return 0.
     *
     * @param ngram The n-gram to get the frequency of
     * @return The frequency with which the n-gram appears.
     */
    public int getNgramFrequency(String ngram) {
        //TODO replace this line with your code
        return this.ngram.get(ngram);
    }

    /**
     * Generate a summary of the ngrams for this object.
     * @return a string representation of the n-grams in the input text
     * comprising the ngram size and then each ngram and its frequency
     * where ngrams are presented in alphabetical order.   
     */
    public String toString() {
        //TODO replace this line with your code
        String[] keys = ngram.keySet().toArray(new String[0]);
        Arrays.sort(keys);
        Integer a = ngramSize;
        String answer = a.toString();
        for (int i =0; i < ngram.keySet().size();i++) {
            answer = answer.concat(" " + keys[i] + " ");
            answer = answer.concat(this.getNgramFrequency(keys[i]) + "");
        }
        return answer;
    }

}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote