Please note I\'m using Python 3.4.3 Write a Python program that analyzes input f
ID: 3834409 • Letter: P
Question
Please note I'm using Python 3.4.3
Write a Python program that analyzes input from a file and compiles statistics on it. The program should output: 1. The total word count 2. The count of unique words 3. The number of sentencesExplanation / Answer
#Initializing counter sentences = 0 lines = 0 word = 0 #print "Number of arguments: ", len(sys.argv) # parse command line # extra 4: The ability to accept input from STDIN, or from file specifid on the command line. #command line file : cat test_mochi.txt| python testing_stdin.py if len(sys.argv) > 1: f = open(sys.argv[1]).read() else: print 'STDIN: Please input your words and hit ctrl-D to execute text analysis' f = sys.stdin.read() #print f d = defaultdict(int) for word in f.split(): d[word] += 1 distinct_word = d.items() #sentences = f.split('.')+f.split('!')+f.split('?') does not work since we have two !! sentences = [s.strip() for s in re.split('[.?!]', f) if s] lines = f.split(' ') print ' ', 'Full Content:' ,' ', f, ' ' print 'Total Word Count =', len(f.split()) #same as int(sum(d.values())) print 'Unique word count =',len(distinct_word) print 'Number of Sentences =', len(sentences) print 'Number of Lines =', len(lines),' ' print 'extra 1: Average Word Count in Sentence =', float(len(f.split())/len(sentences)), ' ' W = re.findall(r"[w']+", f) #print 'Words=', ' ', W, ' ' def phrases(w): phrase = [] for w in W: p = phrase.append(w) if len(phrase) > 3: phrase.remove(phrase[0]) if len(phrase) == 3: yield tuple(phrase) #print 'list of phrases',list(phrases(W)), ' ' Phrases = defaultdict(int) for p in phrases(W): Phrases[p] += 1 sorted_phrases=sorted(Phrases.items(),key=itemgetter(1), reverse =True) print 'extra 2: Sorted Phrases By Count: ', ' ', sorted_phrases, ' ' print 'extra 2: Frequent Phrases (more than 3 times): ' #a phrase of 3 or more words used over 3 times for k, (phrase, freq) in enumerate(sorted_phrases): if freq > 2: print phrase, freq, ' ', else: break s =sorted(d.items(),key=itemgetter(1), reverse =True) print ' ',"extra 3: List of Words in Descending Count:",' ' for k, (words, count) in enumerate(s): print words, count
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.