bearer_token = auth()
headers = create_headers(bearer_token)
#keyword = "#We4Vaccine OR #COVIDVaccine OR #𝑽𝒂𝒄𝒄𝒊𝒏𝒂𝒕𝒊𝒐𝒏𝑫𝒓𝒊𝒗𝒆 OR #MythvsFacts OR #LargestVaccinationDrive OR #PulsePolio OR #StayPolioFree OR vaccine OR #vaccine OR side effects of vaccine OR vaccination OR #VaccineForAll OR vaccine for all OR vaccination for kids OR #covid19 OR vaccine mandotory OR #measles vaccine OR polio vaccine OR #polio OR #swine flu vaccine OR #covaxin OR heart inflmmation due to vaccine OR #LargesstVaccinationDrive #RotaVirus Vaccine OR #influenza OR #BCG Vaccine OR #TB OR #NoVaccine OR Vaccine freedom lang:en place_country:In -is:retweet"
#keyword = "(#We4Vaccine OR #COVIDVaccine OR #VaccinationDrive OR #MythvsFacts OR #LargestVaccinationDrive OR #PulsePolio OR #StayPolioFree OR vaccine OR #vaccine OR \"side effects of vaccine\" OR vaccination OR #VaccineForAll OR \"vaccine for all\" OR \"vaccination for kids\" OR #covid19 OR \"vaccine mandatory\" OR #measles OR measles OR \"polio vaccine\" OR #polio OR polio OR #swineflu OR \"swine flu\" OR #covaxin OR \"heart inflammation due to vaccine\" OR #LargesstVaccinationDrive OR #RotaVirus OR \"Rota Virus\" OR Vaccine OR #influenza OR influenza OR #BCG OR Vaccine OR #TB OR #NoVaccine OR \"vaccine freedom\" OR flu OR TB OR \"adverse effects of vaccine\") lang:en -is:retweet"
#keyword = "(#We4Vaccine OR #COVIDVaccine OR #VaccinationDrive OR #MythvsFacts OR #LargestVaccinationDrive OR #PulsePolio OR #StayPolioFree OR vaccine OR #vaccine OR side effects of vaccine OR vaccination OR #VaccineForAll OR vaccine for all OR vaccination for kids OR #covid19 OR vaccine mandatory OR #measles OR measles OR polio vaccine OR #polio OR polio OR #swineflue OR swine flu OR #covaxin OR autism OR heart inflammation due to vaccine OR #LargesstVaccinationDrive OR #RotaVirus OR Rota Virus OR Vaccine OR #influenza OR influenza OR #BCG OR Vaccine OR #TB OR #NoVaccine OR vaccine freedom OR flu OR TB OR adverse effects of vaccine) lang:en -is:retweet"
#keyword = "(#We4Vaccine OR #COVIDVaccine OR #VaccinationDrive OR #MythvsFacts OR #LargestVaccinationDrive OR #PulsePolio OR #StayPolioFree OR vaccine OR #vaccine OR \"side effects of vaccine\" OR vaccination OR #VaccineForAll OR \"vaccine for all\" OR \"vaccination for kids\" OR #covid19 OR \"vaccine mandatory\" OR #measles OR measles OR \"polio vaccine\" OR #polio OR polio OR #swineflu OR \"swine flu\" OR #covaxin OR \"heart inflammation due to vaccine\" OR #LargesstVaccinationDrive OR #RotaVirus OR \"Rota Virus\" OR Vaccine OR #influenza OR influenza OR #BCG OR Vaccine OR #TB OR #NoVaccine OR \"vaccine freedom\" OR flu OR TB OR \"adverse effects of vaccine\" OR #VaccinesWork OR #VaccinesSaveLives OR \"vaccine injury\" OR \"vaccine is dangerous\" ) lang:en -is:retweet"
keyword = "(#We4Vaccine OR #COVIDVaccine OR #VaccinationDrive OR #MythvsFacts OR #LargestVaccinationDrive OR #PulsePolio OR #StayPolioFree OR vaccine OR \"vaccine risk\" OR #vaccine OR \"side effects of vaccine\" OR vaccination OR #VaccineForAll OR \"vaccine for all\" OR \"vaccination for kids\" OR #covid19 OR \"vaccine mandatory\" OR #measles OR measles OR \"polio vaccine\" OR #polio OR polio OR #swineflu OR \"swine flu\" OR #covaxin OR \"heart inflammation due to vaccine\" OR #LargesstVaccinationDrive OR #RotaVirus OR \"Rota Virus\" OR Vaccine OR #influenza OR influenza OR #BCG OR Vaccine OR #TB OR #NoVaccine OR \"vaccine freedom\" OR flu OR TB OR \"adverse effects of vaccine\" OR #VaccinesWork OR #VaccinesSaveLives OR \"vaccine injury\" OR \"vaccine is dangerous\" OR \"COVID-19\" OR \"CORONAVIRUS\" OR \"CORONA\" OR \"Vaccination Refusal\" OR \"Vaccination Refusal\" OR \"Pandemic\" OR \"Covaccine\" OR \"Covishield\" OR \"Sinopharm\" OR \"BioNTech\" OR \"Moderna\" OR \"Oxford\" OR \"Covaxin\" OR \"Sputnik V.\" OR \"Pfizer\" ) lang:en -is:retweet"
start_list = ['2021-01-01T00:00:00.000Z','2021-02-01T0:00:00.000Z','2021-03-01T00:00:00.000Z','2021-04-01T00:00:00.000Z','2021-05-01T00:00:00.000Z','2021-06-01T00:00:00.000Z']
end_list = ['2021-01-07T00:00:00.000Z','2021-02-07T00:00:00.000Z','2021-03-08T00:00:00.000Z','2021-04-07T00:00:00.000Z','2021-05-07T00:00:00.000Z','2021-06-05T00:00:00.000Z']
#start_list = ['2021-02-01T00:00:00.000Z']
#end_list =['2021-10-30T00:00:00.000Z']
max_results = 500
#Total number of tweets we collected from the loop
total_tweets = 0
# Create file
csvFile = open("Feb21_2.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
#Create headers for the data you want to save, in this example, we only want save these columns in our dataset
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
print(len(start_list))
for i in range(0,len(start_list)):
# Inputs
count = 0 # Counting tweets per time period
max_count = 50000 # Max tweets per time period
#max_count = 50
flag = True
next_token = None
# Check if flag is true
while flag:
# Check if max_count reached
if count >= max_count:
break
print("-------------------")
print("Token: ", next_token)
print(end_list[i])
url = create_url(keyword, start_list[i],end_list[i], max_results)
json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
result_count = json_response['meta']['result_count']
if 'next_token' in json_response['meta']:
# Save the token to use for next call
next_token = json_response['meta']['next_token']
print("Next Token: ", next_token)
if result_count is not None and result_count > 0 and next_token is not None:
print("Start Date: ", start_list[i])
append_to_csv(json_response, "Feb21_2.csv")
count += result_count
total_tweets += result_count
print("Total # of Tweets added: ", total_tweets)
print("-------------------")
time.sleep(5)
# If no next token exists
else:
if result_count is not None and result_count > 0:
print("-------------------")
print("Start Date: ", start_list[i])
append_to_csv(json_response, "Feb21_2.csv")
count += result_count
total_tweets += result_count
print("Total # of Tweets added: ", total_tweets)
print("-------------------")
time.sleep(5)
#Since this is the final request, turn flag to false to move to the next time period.
flag = False
next_token = None
time.sleep(5)
print("Total number of results: ", total_tweets)
I am using above code (acaemic research account is created) to extract tweets for every month but its retrieving tweets for only one day per month why this is happening