Here is the code.
this function starts the stream
def get_stream(headers, set, bearer_token, expansions, fields, save_to_disk, save_path):
data =
response = requests.get(
“https://api.twitter.com/2/tweets/search/stream”
+ expansions + fields, headers=headers, stream=True,
)
print(response.status_code)
if response.status_code != 200:
raise Exception(
“Cannot get stream (HTTP {}): {}”.format(
response.status_code, response.text
)
)
i = 0
for response_line in response.iter_lines():
i += 1
if i == max_results:
break
else:
json_response = json.loads(response_line)
# print(json.dumps(json_response, indent=4, sort_keys=True))
try:
save_tweets(json_response)
if save_to_disk == True:
save_media_to_disk(json_response, save_path)
except (json.JSONDecodeError, KeyError) as err:
# In case the JSON fails to decode, we skip this tweet
print(f"{i}/{max_results}: ERROR: encountered a problem with a line of data… \n")
continue
this function saves a tweet
def save_tweets(tweet):
print(json.dumps(tweet, indent=4, sort_keys=True))
data = tweet[‘data’]
includes = tweet[‘includes’]
user = includes[‘users’]
for line in user:
tweet_list.append([data[‘id’], data[‘created_at’], data[‘text’], data[‘conversation_id’], line[‘username’]])
the max number of tweets that will be returned
max_results = 20
save to disk
save_media_to_disk = False
save_path = “”
You can adjust the rules if needed
search_rules = [
{
“value”: “-is:retweet (from:NWSNHC OR from:NHC_Atlantic OR from:NWSHouston OR from:NWSSanAntonio OR from:USGS_TexasRain OR from:USGS_TexasFlood OR from:JeffLindner1)”
}
]
user_fields = “&user.fields=username”
expansions = “?expansions=author_id”
tweet_list =
bearer_token = ts.BEARER_TOKEN
headers = create_headers(bearer_token)
rules = get_rules(headers, bearer_token)
delete = delete_all_rules(headers, bearer_token, rules)
set = set_rules(headers, delete, bearer_token, search_rules)
get_stream(headers, set, bearer_token, expansions, user_fields, save_media_to_disk, save_path)
df = pd.DataFrame (tweet_list, columns = [‘tweetid’, ‘created_at’, ‘text’, ‘conversation_id’, ‘username’])
df