Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Tweets Early sentiment analysis work6 included the collection of a set of tweets

ID: 3906614 • Letter: T

Question

Tweets Early sentiment analysis work6 included the collection of a set of tweets, some for learning a machine learning model for sentiment analysis, and some for evaluating how good that model is. We’ll be using that same data; it includes the following information for each tweet:7

• the gold polarity of the tweet (0 = negative, 2 = neutral, 4 = positive, = not given)

• the id of the tweet (2087)

• the date of the tweet (Sat May 16 23:58:44 UTC 2009)

You’ll be basically implementing a simple keyword-based method for sentiment analysis of tweets, counting up the numbers of positive and negative words in a tweet to determine the predicted polarity of the tweet. (This differs from the gold polarity, which is what has been decided as the true polarity of the tweet; you’re going to try to see how well you can predict it based on the content of the tweet.) T1 You will choose approprate representations for the Tweet class. You may or may not choose to base it on other classes I’ve supplied (Vertex, VertexIDList). Material from weeks 9–11 of lectures will be particularly relevant in helping you decide. You’ll need to write a constructor based on your chosen representation that instantiates an empty tweet.

import java.io.IOException;

import java.io.Reader;

import java.io.BufferedReader;

import java.nio.file.Files;

import java.nio.file.Paths;

import java.util.*;

import org.apache.commons.csv.*;

import org.junit.Test;

public class TweetCollection {

// TODO: add appropriate data types

public TweetCollection() {

// Constructor

// TODO

}

/*

* functions for accessing individual tweets

*/

public Tweet getTweetByID (String ID) {

// PRE: -

// POST: Returns the Tweet object that with tweet ID

// TODO

return null;

}

public Integer numTweets() {

// PRE: -

// POST: Returns the number of tweets in this collection

// TODO

return null;

}

/*

* functions for accessing sentiment words

*/

public Polarity getBasicSentimentWordPolarity(String w) {

// PRE: w not null, basic sentiment words already read in from file

// POST: Returns polarity of w

// TODO

return null;

}

public Polarity getFinegrainedSentimentWordPolarity(String w) {

// PRE: w not null, finegrained sentiment words already read in from file

// POST: Returns polarity of w

// TODO

return null;

}

public Strength getFinegrainedSentimentWordStrength(String w) {

// PRE: w not null, finegrained sentiment words already read in from file

// POST: Returns strength of w

// TODO

return null;

}

/*

* functions for reading in tweets

*

*/

public void ingestTweetsFromFile(String fInName) throws IOException {

// PRE: -

// POST: Reads tweets from .csv file, stores in data structure

// NOTES

// Data source, file format description at http://help.sentiment140.com/for-students

// Using apache csv reader: https://www.callicoder.com/java-read-write-csv-file-apache-commons-csv/

try (

Reader reader = Files.newBufferedReader(Paths.get(fInName));

CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);

) {

  

Iterable<CSVRecord> csvRecords = csvParser.getRecords();

  

for (CSVRecord csvRecord : csvRecords) {

// Accessing Values by Column Index

Tweet tw = new Tweet(csvRecord.get(0), // gold polarity

csvRecord.get(1), // ID

csvRecord.get(2), // date

csvRecord.get(4), // user

csvRecord.get(5)); // text

// TODO: insert tweet tw into appropriate data type

}

}   

}

/*

* functions for sentiment words

*/

public void importBasicSentimentWordsFromFile (String fInName) throws IOException {

// PRE: -

// POST: Read in and store basic sentiment words in appropriate data type

// TODO

}

public void importFinegrainedSentimentWordsFromFile (String fInName) throws IOException {

// PRE: -

// POST: Read in and store finegrained sentiment words in appropriate data type

// TODO

}

public Boolean isBasicSentWord (String w) {

// PRE: Basic sentiment words have been read in and stored

// POST: Returns true if w is a basic sentiment word, false otherwise

// TODO

return null;

}

public Boolean isFinegrainedSentWord (String w) {

// PRE: Finegrained sentiment words have been read in and stored

// POST: Returns true if w is a finegrained sentiment word, false otherwise

// TODO

return null;

}

public void predictTweetSentimentFromBasicWordlist () {

// PRE: Finegrained word sentiment already imported

// POST: For all tweets in collection, tweet annotated with predicted sentiment

// based on count of sentiment words in sentWords

// TODO

}

public void predictTweetSentimentFromFinegrainedWordlist (Integer strongWeight, Integer weakWeight) {

// PRE: Finegrained word sentiment already imported

// POST: For all tweets in v, tweet annotated with predicted sentiment

// based on count of sentiment words in sentWords

// TODO

}

/*

* functions for inverse index

*

*/

public Map<String, Vector<String>> importInverseIndexFromFile (String fInName) throws IOException {

// PRE: -

// POST: Read in and returned contents of file as inverse index

// invIndex has words w as key, IDs of tweets that contain w as value

// TODO

return null;

}

/*

* functions for graph construction

*/

public void constructSharedWordGraph(Map<String, Vector<String>> invIndex) {

// PRE: invIndex has words w as key, IDs of tweets that contain w as value

// POST: Graph constructed, with tweets as vertices,

// and edges between them if they share a word

// TODO

}

public Integer numConnectedComponents() {

// PRE: -

// POST: Returns the number of connected components

// TODO

return null;

}

public void annotateConnectedComponents() {

// PRE: -

// POST: Annotates graph so that it is partitioned into components

// TODO

}

public Integer componentSentLabelCount(String ID, Polarity p) {

// PRE: Graph components are identified, ID is a valid tweet

// POST: Returns count of labels corresponding to Polarity p in component containing ID

// TODO

return null;

}

public void propagateLabelAcrossComponent(String ID, Polarity p, Boolean keepPred) {

// PRE: ID is a tweet id in the graph

// POST: Labels tweets in component with predicted polarity p

// (if keepPred == T, only tweets w pred polarity None; otherwise all tweets

// TODO

}

public void propagateMajorityLabelAcrossComponents(Boolean keepPred) {

// PRE: Components are identified

// POST: Tweets in each component are labelled with the majority sentiment for that component

// Majority label is defined as whichever of POS or NEG has the larger count;

// if POS and NEG are both zero, majority label is NONE

// otherwise, majority label is NEUT

// If keepPred is True, only tweets with predicted label None are labelled in this way

// otherwise, all tweets in the component are labelled in this way

// TODO

}

/*

* functions for evaluation

*/

public Double accuracy () {

// PRE: -

// POST: Calculates and returns accuracy of labelling

// TODO

return null;

}

public Double coverage () {

// PRE: -

// POST: Calculates and returns coverage of labelling

// TODO

return null;

}

public static void main(String[] args) {

}

}

Explanation / Answer

*********************************

Tweet.java

public class Tweet {

               

public Tweet(int gold_polarity, String t_ID, String t_date, String t_user, String t_text) {

        this.gold_polarity = gold_polarity;

        this.t_ID = t_ID;

        this.t_date = t_date;

        this.t_user = t_user;

        this.t_text = t_text;

    }

   

    public int getGold_polarity() {

        return gold_polarity;

    }

    public void setGold_polarity(int gold_polarity) {

        this.gold_polarity = gold_polarity;

    }

    public String getT_ID() {

        return t_ID;

    }

    public void setT_ID(String t_ID) {

        this.t_ID = t_ID;

    }

    public String getT_date() {

        return t_date;

    }

    public void setT_date(String t_date) {

        this.t_date = t_date;

    }

    public String getT_user() {

        return t_user;

    }

    public void setT_user(String t_user) {

        this.t_user = t_user;

    }

    public String getT_text() {

        return t_text;

    }

    public void setT_text(String t_text) {

        this.t_text = t_text;

    }

               

}

*********************************

SentimentWord.java

public class SentimentWord {

    String word;

    int polarity, strength;

    public SentimentWord(String word, int polarity, int strength) {

        this.word = word;

        this.polarity = polarity;

        this.strength = strength;

    }

    public String getWord() {

        return word;

    }

    public void setWord(String word) {

        this.word = word;

    }

    public int getPolarity() {

        return polarity;

    }

    public void setPolarity(int polarity) {

        this.polarity = polarity;

    }

    public int getStrength() {

        return strength;

    }

    public void setStrength(int strength) {

        this.strength = strength;

    }

}

*********************************

TweetCollection.java

import java.io.IOException;

import java.io.Reader;

import java.io.BufferedReader;

import java.nio.file.Files;

import java.nio.file.Paths;

import java.util.*;

import org.apache.commons.csv.*;

import org.junit.Test;

public class TweetCollection {

                // TODO: add appropriate data types

                ArrayList<Tweet> arr_tweets;

                ArrayList<SentimentWord> arr_basic_sentiment;

    ArrayList<SentimentWord> arr_finegrained_sentiment;

               

                enum Polarity{

        negative(0), neutral(2), positive(4), not_given(-1);

        private int value;

        private Polarity(int value) {

            this.value = value;

        }

    }

    enum Strength{

        dual,

        binary,

        trinary,

        scale

    }

               

                public TweetCollection() {

                                // Constructor

                                // TODO

                                arr_tweets = new ArrayList<>();

                                arr_basic_sentiment = new ArrayList<>();

                                arr_finegrained_sentiment = new ArrayList<>();

                }

                /*

                * functions for accessing individual tweets

                */

                public Tweet getTweetByID (String ID) {

                                // PRE: -

                                // POST: Returns the Tweet object that with tweet ID

                                // TODO

        for(int i=0; i<arr_tweets.size(); i++) {

            if(arr_tweets.get(i).getT_ID().equals(ID))

                return arr_tweets.get(i);

        }

        return null;

    }

                public Integer numTweets() {

                                // PRE: -

                                // POST: Returns the number of tweets in this collection

                                // TODO

                                return arr_tweets.size();

                }

                /*

                * functions for accessing sentiment words

                */

                public Polarity getBasicSentimentWordPolarity(String w) {

        // PRE: w not null, basic sentiment words already read in from file

        // POST: Returns polarity of w

        // TODO

        for(int i=0; i<arr_basic_sentiment.size(); i++) {

            if(arr_basic_sentiment.get(i).getWord().equals(w)) {

                int pol = arr_basic_sentiment.get(i).getPolarity();

                if(pol > 0)

                    return Polarity.positive;

                else if(pol == 0)

                    return Polarity.neutral;

                else

                    return Polarity.negative;

            }

        }

        return Polarity.not_given;

    }

                public Polarity getFinegrainedSentimentWordPolarity(String w) {

                                // PRE: w not null, finegrained sentiment words already read in from file

                                // POST: Returns polarity of w

                                // TODO

                                for(int i=0; i<arr_finegrained_sentiment.size(); i++) {

            if(arr_finegrained_sentiment.get(i).getWord().equals(w)) {

                int pol = arr_finegrained_sentiment.get(i).getPolarity();

                if(pol > 0)

                    return Polarity.positive;

                else if(pol == 0)

                    return Polarity.neutral;

                else

                    return Polarity.negative;

            }

        }

        return Polarity.not_given;

                }

                public Strength getFinegrainedSentimentWordStrength(String w) {

                                // PRE: w not null, finegrained sentiment words already read in from file

                                // POST: Returns strength of w

                                // TODO

                                return null;

                }

                /*

                * functions for reading in tweets

                *

                */

                public void ingestTweetsFromFile(String fInName) throws IOException {

                                // PRE: -

                                // POST: Reads tweets from .csv file, stores in data structure

                                // NOTES

                                // Data source, file format description at http://help.sentiment140.com/for-students

                                // Using apache csv reader: https://www.callicoder.com/java-read-write-csv-file-apache-commons-csv/

                                try (

                                                Reader reader = Files.newBufferedReader(Paths.get(fInName));

                                                CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);

                                ) {

                                                Iterable<CSVRecord> csvRecords = csvParser.getRecords();

                                                for (CSVRecord csvRecord : csvRecords) {

                                                                // Accessing Values by Column Index

                                                                Tweet tw = new Tweet(csvRecord.get(0), // gold polarity

                                                                csvRecord.get(1), // ID

                                                                csvRecord.get(2), // date

                                                                csvRecord.get(4), // user

                                                                csvRecord.get(5)); // text

                                                               

                                                                arr_tweets.add(tw);

                                                }

                                }  

                }

                /*

                * functions for sentiment words

                */

                public void importBasicSentimentWordsFromFile (String fInName) throws IOException {

                                // PRE: -

                                // POST: Read in and store basic sentiment words in appropriate data type

                                // TODO

                }

                public void importFinegrainedSentimentWordsFromFile (String fInName) throws IOException {

                                // PRE: -

                                // POST: Read in and store finegrained sentiment words in appropriate data type

                                // TODO

                }

                public Boolean isBasicSentWord (String w) {

                                // PRE: Basic sentiment words have been read in and stored

                                // POST: Returns true if w is a basic sentiment word, false otherwise

                                // TODO

                                for(int i=0; i<arr_basic_sentiment.size(); i++) {

            if(arr_basic_sentiment.get(i).getWord().equals(w))

                return true;

        }

                                return false;

                }

                public Boolean isFinegrainedSentWord (String w) {

                                // PRE: Finegrained sentiment words have been read in and stored

                                // POST: Returns true if w is a finegrained sentiment word, false otherwise

                                // TODO

                                for(int i=0; i<arr_finegrained_sentiment.size(); i++) {

            if(arr_finegrained_sentiment.get(i).getWord().equals(w))

                return true;

        }

                                return false;

                }

                public void predictTweetSentimentFromBasicWordlist () {

                                // PRE: Finegrained word sentiment already imported

                                // POST: For all tweets in collection, tweet annotated with predicted sentiment

                                // based on count of sentiment words in sentWords

                                // TODO

                }

                public void predictTweetSentimentFromFinegrainedWordlist (Integer strongWeight, Integer weakWeight) {

                                // PRE: Finegrained word sentiment already imported

                                // POST: For all tweets in v, tweet annotated with predicted sentiment

                                // based on count of sentiment words in sentWords

                                // TODO

                }

                /*

                * functions for inverse index

                *

                */

                public Map<String, Vector<String>> importInverseIndexFromFile (String fInName) throws IOException {

                                // PRE: -

                                // POST: Read in and returned contents of file as inverse index

                                // invIndex has words w as key, IDs of tweets that contain w as value

                                // TODO

                                return null;

                }

                /*

                * functions for graph construction

                */

                public void constructSharedWordGraph(Map<String, Vector<String>> invIndex) {

                                // PRE: invIndex has words w as key, IDs of tweets that contain w as value

                                // POST: Graph constructed, with tweets as vertices,

                                // and edges between them if they share a word

                                // TODO

                }

                public Integer numConnectedComponents() {

                                // PRE: -

                                // POST: Returns the number of connected components

                                // TODO

                                return null;

                }

                public void annotateConnectedComponents() {

                                // PRE: -

                                // POST: Annotates graph so that it is partitioned into components

                                // TODO

                }

                public Integer componentSentLabelCount(String ID, Polarity p) {

                                // PRE: Graph components are identified, ID is a valid tweet

                                // POST: Returns count of labels corresponding to Polarity p in component containing ID

                                // TODO

                                return null;

                }

                public void propagateLabelAcrossComponent(String ID, Polarity p, Boolean keepPred) {

                                // PRE: ID is a tweet id in the graph

                                // POST: Labels tweets in component with predicted polarity p

                                // (if keepPred == T, only tweets w pred polarity None; otherwise all tweets

                                // TODO

                }

                public void propagateMajorityLabelAcrossComponents(Boolean keepPred) {

                                // PRE: Components are identified

                                // POST: Tweets in each component are labelled with the majority sentiment for that component

                                // Majority label is defined as whichever of POS or NEG has the larger count;

                                // if POS and NEG are both zero, majority label is NONE

                                // otherwise, majority label is NEUT

                                // If keepPred is True, only tweets with predicted label None are labelled in this way

                                // otherwise, all tweets in the component are labelled in this way

                                // TODO

                }

                /*

                * functions for evaluation

                */

                public Double accuracy () {

                                // PRE: -

                                // POST: Calculates and returns accuracy of labelling

                                // TODO

                                return null;

                }

                public Double coverage () {

                                // PRE: -

                                // POST: Calculates and returns coverage of labelling

                                // TODO

                                return null;

                }

                public static void main(String[] args) {

                }

}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote