Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

I need help and I am only limited to using material covered in chapters 1 - 16 C

ID: 3722638 • Letter: I

Question

I need help and I am only limited to using material covered in chapters 1 - 16

CSC143-Computer Programming II Programming Assignment #3 For 25 points Due: see our Canvas class site This assignment will give you practice using sets. You will read in the text of various books and use the provided lists of "positive" and "negative" words to classify each book as a "comedy" or a "tragedy." The practice of attempting to determine the overall tone of a block of text is known as "sentiment analysis." In practice, algorithms for sentiment analysis can be quite sophisticated. Ours is clearly quite simple. We'll see how accurate it is at classifying some books. Part A: Create a BookAnalyzer Class This class will have the following fields (posWords and negwords should both be SETS, bookwords should be an ARRAYLIST) posWords negWords bookWords Your class should have a CONSTRUCTOR that takes 3 INPUT PARAMETERS: the "positive" and "negative" text files, which are used to construct the posWords and negWords sets, and a book text file, which is used to construct the bookWords ArrayList object. Your class should handle bad input gracefully. One option might be to have "default" lists for positive and negative words, such as the lists shown below (which you would need to convert to sets): "great", String [ ] posList = {"good", "nice", "love", "excellent", "awesome", "wonderful", "fantastic", "fabulous", "like" "miserable", String [] negList {"bad", "awful", "hate", "terrible", "sad", "dislike", "atrocious", "depressed, "cry" Another option (and the one you should probably use for the book file name is to keep querying the user until a valid file name is entered Your class should contain methods to analyze the book. You get to determine what methods you will need. Hint: You may want to have methods that create positive and negative sets of words for a given book. Your BookAnalyzer class should determine whether the book being analyzed is a comedy (positive word count > negative word count), a tragedy (positive word count

Explanation / Answer

BookAnalyzer.java

import java.io.BufferedReader;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.IOException;

import java.util.ArrayList;

import java.util.Collections;

import java.util.HashMap;

import java.util.HashSet;

import java.util.Map;

import java.util.Scanner;

import java.util.Set;

public class BookAnalyzer {

Set<String> posWords = new HashSet<String>(); //read positive words file and store the words in posWords set

Set<String> negWords = new HashSet<String>(); //read negative words file and store the words in posWords set

ArrayList<String> bookWords = new ArrayList<String>();// make all the words be read in as an ArrayList

int poscount= 0,negcount= 0; // set the counters for the negative and positive words

long startTime, endTime, timeneeded; // set the times for start, end and overall time needed

int mostnegcount, mostposcount; // variables for the neg/pos word found the most

String[] posList = {"good", "nice", "love", "excellent", "great","awesome", "wonderful", "fantastic", "fabulous", "like"};

String [] negList = {"bad", "awful", "hate", "terrible", "miserable","sad", "dislike", "atrocious", "depressed", "cry"};

public BookAnalyzer(String posfile, String negfile, String analyzefile)throws IOException {//CONSTRUCTOR that takes 3 INPUT PARAMETERS: the “positive”, “negative”, filename

BufferedReader BR = new BufferedReader(new FileReader(posfile));//text files which are used to construct the posWords and negWords sets,

BufferedReader BR1 = new BufferedReader(new FileReader(negfile));

BufferedReader BR2 = new BufferedReader(new FileReader(analyzefile));//and a book text file, which is used to construct the bookWords ArrayList object.

String line;

startTime = System.currentTimeMillis();// start timer

while((line=BR.readLine())!= null) { // while the positive file being read isn't null

String[] words=line.split(" "); // create and array of "words" and split them up

int l= words.length; // hold the word length in a variable

for(int i= 0; i<l;i++) {

posWords.add(words[i]); //reads through the file and adds the positive words to the posWords hash set

}

}

while((line=BR1.readLine())!= null) { //read negative words file and store the words in negWords set just like previous call

String[] words= line.split(" ");

int l= words.length;

for(int i= 0; i<l;i++) {

negWords.add(words[i]);

}

}

while((line=BR2.readLine())!= null) { //read the file to be analyzed and store the words in ArrayList

String[] words=line.split(" ");

int l= words.length;

for(int i= 0; i<l; i++) {

bookWords.add(words[i]);

}

endTime = System.currentTimeMillis();// stop the timer

timeneeded = endTime - startTime; // do total time calculation

}

analyze();

}

/* Counts number of positive words and negative words present in file and returns type of book */

public String analyze() {

int l= bookWords.size();

for(int i= 0;i<l;i++) {

if(posWords.contains(bookWords.get(i))) {

poscount++;

}

else if(negWords.contains(bookWords.get(i))) {

negcount++; }

}

if(poscount > negcount)

return "comedy";

else if(poscount<negcount)

return "tragedy";

else

return "boring book";

}

int gettotalwords() {

return bookWords.size();

}

int getposcount() {

return poscount;

}

int getnegcount() {

return negcount;

}

double postivewordspercentage() {

return (poscount*100.0)/bookWords.size();

}

double negativewordspercentage() {

return (negcount*100.0)/bookWords.size();

}  

String type() {

return analyze();

}

long timeneeded() {

return timeneeded;

}

String mostpositiveword() {

String temp="No word occured";

int max=0;

java.util.Iterator<String> i = posWords.iterator();

while(i.hasNext()) {

String temp1=i.next();

mostposcount= Collections.frequency(bookWords, temp1);

if(mostposcount>max) {

temp=temp1;

max=mostposcount;

}

}

return temp;

}

//Returns most the most repeated negative word in the book

public String mostnegativeword() {

String temp= "No word occured";

int max= 0;

java.util.Iterator<String> i = negWords.iterator();

while(i.hasNext()) {

String temp1= i.next();

mostnegcount= Collections.frequency(bookWords, temp1);

if(mostnegcount> max) {

temp= temp1;

max= mostnegcount;

}

}

return temp;

}

}

sentimentReport.java

package analyse;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import java.util.concurrent.TimeUnit;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import com.cybozu.labs.langdetect.Detector;

import com.cybozu.labs.langdetect.DetectorFactory;

import com.cybozu.labs.langdetect.LangDetectException;

public class Analyse {

// path to lucene index

private final static String indexPath = "/Users/leomelzer/Downloads/Tweets/";

// path to language profiles for classifier

private static String langProfileDirectory = "./src/profiles/";

// lucene queryParser for saving

private static QueryParser queryParser;

// used to store positive and negative words for scoring

static List<String> posWords = new ArrayList<String>();

static List<String> negWords = new ArrayList<String>();

// keep some stats! [-1 / 0 / 1 / not english / foursquare / no text to

// classify]

static int[] stats = new int[6];

/**

* @param args

* @throws IOException

* @throws LangDetectException

*/

public static void main(String[] args) throws IOException,

LangDetectException {

// huh, how long?

long startTime = System.currentTimeMillis();

// open lucene index

Directory dir;

IndexReader docReader = null;

try {

dir = FSDirectory.open(new File(indexPath));

docReader = IndexReader.open(dir, true);

} catch (IOException e1) {

e1.printStackTrace();

}

System.out.println("START: reading file list");

// source: www.cs.uic.edu/~liub/FBS/sentiment-analysis.html

BufferedReader negReader = new BufferedReader(new FileReader(new File(

"./src/negative-words.txt")));

BufferedReader posReader = new BufferedReader(new FileReader(new File(

"./src/positive-words.txt")));

// currently read word

String word;

// add words to comparison list

while ((word = negReader.readLine()) != null) {

negWords.add(word);

}

while ((word = posReader.readLine()) != null) {

posWords.add(word);

}

// cleanup

negReader.close();

posReader.close();

System.out.println("FINISH: reading file list");

// ----------------------------------------------

System.out.println("START: calculating sentiment");

// prepare language classifier

DetectorFactory.loadProfile(langProfileDirectory);

// store different languages

Map<String, Integer> langHitList = new HashMap<String, Integer>();

// detect language, using http://code.google.com/p/language-detection/

// has 99% accuracy

Detector detector;

// current tweet

Document tweet;

// current score

int score = 0;

// current text

String text;

// maximum number of documents

int max = docReader.maxDoc();

// used to give some feedback during processing the 1 million tweets

int j = 0;

// do we want to skip saving that document?

boolean skipSave = false;

for (int i = 0; i < max; i++) { //

if (i % 100000 == 0) {

System.out.println("PROCESSING: " + j * 100000 + " of "

+ max + " tweets processed...");

j++;

}

// reset, most of the times we want that.

skipSave = false;

try {

// read it!

tweet = docReader.document(i);

text = tweet.get("text");

// we need a new instance every time unfortunately...

detector = DetectorFactory.create();

detector.append(text);

// classify language!

String detectedLanguage = detector.detect();

// if it is not english...

if (detectedLanguage.equals("en") == false) {

stats[3]++;

// we can't classify non-english tweets, so just keep them

// neutral

score = 0;

} else if (text.startsWith("I'm at")

|| text.startsWith("I just became the mayor")

|| text.startsWith("I just ousted")) {

// all your foursquare updates are belong to us.

stats[4]++;

// and we don't save them. yo.

skipSave = true;

} else {

// finally! retrieve sentiment score.

score = getSentimentScore(tweet.get("text"));

// ++ index so we won't have -1 and stuff...

stats[score + 1]++;

// wanna see what neutral tweets look like? uncomment.

// if (score == 0) {

// System.out.println("Score: " + score + " for Tweet (" +

// tweet.get("ID") + "):"+ tweet.get("text"));

// }

}

// so now for the saving...

if (skipSave == false) {

Integer currentCount = langHitList.get(detectedLanguage);

// ...save the detected language for some stats

langHitList.put(detectedLanguage,

(currentCount == null) ? 1 : currentCount + 1);

// tweet.set("language", detectedLanguage)

// tweet.set("sentiment", score);

// tweet.get("ID");

}

} catch (LangDetectException e) {

// thrown by the language classifier when tweets are like :D or

// :3 or ?????????

// count how many times there is no valid input, plus we won't

// save it as it's in the catch clause...

stats[5]++;

} catch (Exception e) {

// something went wrong, ouuups!

e.printStackTrace();

System.err.println("Doc at " + i + " does not exist");

}

}

System.out.println("FINISH: calculating sentiment");

// ----------------------------------------------

long endTime = System.currentTimeMillis();

long totalTime = endTime - startTime;

System.out.println("----------------------------------------------");

System.out.println("STATS - TIME: Analysis took "

+ TimeUnit.SECONDS.convert(totalTime, TimeUnit.MILLISECONDS)

+ " seconds");

// ----------------------------------------------

// get me some info!

System.out.println("STATS - COUNTS: [negative | neutral | positive | not english | foursquare | no text to classify]");

System.out.println("STATS - COUNTS: " + java.util.Arrays.toString(stats));

System.out.println("STATS - LANGUAGE: " + langHitList.toString());

// cleanup

docReader.close();

}

/**

* does some string mangling and then calculates occurrences in positive /

* negative word list and finally the delta

*

*

* @param input

* String: the text to classify

* @return score int: if < 0 then -1, if > 0 then 1 otherwise 0 - we don't

* care about the actual delta

*/

private static int getSentimentScore(String input) {

// normalize!

input = input.toLowerCase();

input = input.trim();

// remove all non alpha-numeric non whitespace chars

input = input.replaceAll("[^a-zA-Z0-9\s]", "");

int negCounter = 0;

int posCounter = 0;

// so what we got?

String[] words = input.split(" ");

// check if the current word appears in our reference lists...

for (int i = 0; i < words.length; i++) {

if (posWords.contains(words[i])) {

posCounter++;

}

if (negWords.contains(words[i])) {

negCounter++;

}

}

// positive matches MINUS negative matches

int result = (posCounter - negCounter);

// negative?

if (result < 0) {

return -1;

// or positive?

} else if (result > 0) {

return 1;

}

// neutral to the rescue!

return 0;

}

}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote