Write a function in python 2.7 File operations in python 2.7 INPUT:CrimeReport.t

ID: 3881580 • Letter: W

Question

Write a function in python 2.7

File operations in python 2.7 INPUT:CrimeReport.txt: in this file, each line is a raw tweet json format. output-folder: where new results will be stored REQUIREMENT: read tweets and separate these tweets in to groups based on the specific hours (Mon-Day- Year-Hour). The tweets related to a specific hour will be stored in a separate file in the folder"output" with the file name "Mon-Day-Year-Hour.txt" OUTPUT: new files generated and stored in the folder"output", in which each file stores the tweets corresponding to a specific hour

Explanation / Answer

#! /usr/bin/env python

# -*- coding: utf-8 -*-

__author__ = 'gifrim'

# What this code does:

# Twitter stream is given in JSON format

import codecs

from datetime import datetime

import json

#import requests

import os

import string

import sys

import time

#load tweets in jsonformat

def parse_json_tweet(line):

tweet = json.loads(line)

#print line

if tweet['lang'] != 'en':

#print "non-english tweet:", tweet['lang'], tweet

return ['', '', '', [], [], []]

date = tweet['created_at']

id = tweet['id']

nfollowers = tweet['user']['followers_count']

nfriends = tweet['user']['friends_count']

if 'retweeted_status' in tweet:

text = tweet['retweeted_status']['text']

else:

text = tweet['text']

hashtags = [hashtag['text'] for hashtag in tweet['entities']['hashtags']]

users = [user_mention['screen_name'] for user_mention in tweet['entities']['user_mentions']]

urls = [url['expanded_url'] for url in tweet['entities']['urls']]

media_urls = []

if 'media' in tweet['entities']:

media_urls = [media['media_url'] for media in tweet['entities']['media']]

return [date, id, text, hashtags, users, urls, media_urls, nfollowers, nfriends]

'''start main'''

if __name__ == "__main__":

file_timeordered_json_tweets = codecs.open(sys.argv[1], 'r', 'utf-8')

#efficient line-by-line read of big files

for line in file_timeordered_json_tweets:

try:

[tweet_gmttime, tweet_id, text, hashtags, users, urls, media_urls, nfollowers, nfriends] = parse_json_tweet(line)

# if not tweet_gmttime: continue

# fout.write(line)

#"created_at":"Mon Feb 17 14:14:44 +0000 2014"

try:

c = time.strptime(tweet_gmttime.replace("+0000",''), '%a %b %d %H:%M:%S %Y')

mon=time.strptime(tweet_gmttime.replace("+0000",''), '%b')

day=time.strptime(tweet_gmttime.replace("+0000",''), '%d')

year=time.strptime(tweet_gmttime.replace("+0000",''), '%Y')

hour=time.strptime(tweet_gmttime.replace("+0000",''), '%H')

fout = codecs.open('C:\Users\My Pc\Desktop\output\'+`mon`+'-'+`day`+'-'+`year`+'-'+hour`,'w+', 'utf-8')

except:

print "pb with tweet_gmttime", tweet_gmttime, line

pass

tweet_unixtime = int(time.mktime(c))

# fout.write(line)

fout.write(str([tweet_unixtime, tweet_gmttime, tweet_id, text, hashtags, users, urls, media_urls, nfollowers, nfriends]) + " ")

except:

#print "pb with tweet:", line

# print sys.exc_info()[0], line

pass

file_timeordered_json_tweets.close()

fout.close()

Navigate

Write a function in php which takes two parameters and returns (not echos) the H

Write a function in python called is_square(board): Given a board as a list of l

Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.

Write a function in python 2.7 File operations in python 2.7 INPUT:CrimeReport.t

Question

Explanation / Answer

Related Questions

Navigate