-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStoreTweetsInDatabase.py
More file actions
132 lines (103 loc) · 4.02 KB
/
StoreTweetsInDatabase.py
File metadata and controls
132 lines (103 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 9 10:08:04 2015
@author: Confero
"""
import configparser
import time
from twython import Twython, TwythonError
from pymongo import MongoClient
print("Retrieving settings")
config = configparser.ConfigParser()
config.read('settings.cfg')
#Get the keys from the settings file
APP_KEY = config.get('keys','APP_KEY')
APP_SECRET = config.get('keys','APP_SECRET')
#Authenticate with twitter
print("Authenticating with twitter")
twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter.obtain_access_token()
twitter = Twython(APP_KEY, access_token=ACCESS_TOKEN)
#Start the database
print("Starting database")
client = MongoClient()
username = config.get('database', 'username')
DBNAME = config.get('database', 'name')
#COLLECTION = config.get('database', 'user_collection')
db = client[DBNAME]
user_tweets = db[(username + "_timeline4")]
num_tweets_in_db = user_tweets.count()
def get_rate_limit():
print("Getting rate limit")
try:
status = twitter.get_application_rate_limit_status(resources = ['statuses', 'application'])
except:
print("error getting status")
user_status = status['resources']['statuses']['/statuses/user_timeline']
print("Rate limit left: " + str(user_status['remaining']))
return user_status['remaining']
def store_tweets(tweets, collection):
try:
#I might want to process the tweets as I store them.
collection.insert(tweets)
except:
print("Error storing tweets")
def get_tweets(**get_tweet_params):
try:
if get_rate_limit() > 0:
print(get_tweet_params)
new_tweets = twitter.get_user_timeline(**get_tweet_params)
print(new_tweets[0]['id'])
print(new_tweets[-1]['id'])
store_tweets(new_tweets, user_tweets)
return new_tweets
else:
print("Rate limit met: Sleeping 2 min")
time.sleep(60*2)
except:
print("Error getting tweets")
def get_new_tweets(**get_tweet_params):
print("Getting newer tweets")
try:
new_tweets = { 1 }
while len(new_tweets) > 0:
new_tweets = get_tweets(**get_tweet_params)
newest_tweet = new_tweets[0]['id']
get_tweet_params['since_id'] = newest_tweet
print("Getting newer tweets")
except:
print("Error getting new tweets")
def get_old_tweets(**get_tweet_params):
print("Getting older tweets")
try:
old_tweets = { 1 }
while len(old_tweets) > 0:
old_tweets = get_tweets(**get_tweet_params)
oldest_tweet = old_tweets[-1]['id']
get_tweet_params['max_id'] = oldest_tweet-1
print("Got older tweets")
except:
print("Error getting old tweets")
#get_tweet_params = {'screen_name':username, 'count':200}
#Gets how many tweets are in the database
num_tweets_in_db = user_tweets.count()
print("Tweets in database: ", num_tweets_in_db)
if num_tweets_in_db > 0:
#Getting new tweets, or tweets not yet put into the database.
print("Getting new tweets")
oldest_tweet = user_tweets.find(limit=1, sort=[('id',1)])[0]['id']
newest_tweet = user_tweets.find(limit=1, sort=[('id',-1)])[0]['id']
get_old_tweets(screen_name = username, count = 200, max_id = oldest_tweet-1 )
get_new_tweets(screen_name = username, count = 200, since_id = newest_tweet)
print("Got new tweets")
else:
#Getting tweets for the first time
print("Getting tweets for the first time")
new_tweets = get_tweets(screen_name = username, count = 200)
newest_tweet = new_tweets[0]['id']
print("Newest id:" , newest_tweet)
oldest_tweet = new_tweets[-1]['id']
print("Oldest id:" , oldest_tweet)
get_old_tweets(screen_name = username, count = 200, max_id = oldest_tweet-1 )
get_new_tweets(screen_name = username, count = 200, since_id = newest_tweet)
print("Got tweets for the first time")