Hello.
I tried to get tweets using this python code and query_parms options.
def connect_to_endpoint(self, headers):
response = requests.request("GET", self.search_url, headers=headers, params=self.query_params)
# print(response.status_code)
if response.status_code != 200:
raise Exception(response.status_code, response.text)
return response.json()
self.query_params = {'query': '', 'tweet.fields': 'created_at', 'start_time': '2016-01-01T00:00:00Z',
'end_time': '2021-01-01T00:00:00Z', 'max_results': '500'}
but i got different results between month and weeks
you can understand what i means by under photo file
I think we should get perfectly same results between 2 cases.
However, i found 266 different tweets id between 2 cases.
also in getting response during years [ex)2016-01-01~2017-01-01]
i got really few data only.
I want to know the exact reason for this problem…
Why my post is hidden?
What is the problem?
There is not enough specific information here to tell anything - what exact queries did you run using what exact scripts / libraries?
tnx for reading
reference : Twitter-API-v2-sample-code/full-archive-search.py at main · twitterdev/Twitter-API-v2-sample-code · GitHub
i changed query_params start_time & end_time
def main_act_months(self, brand_list, drug_name):
self.query_params['tweet.fields'] = "lang"
for year in range(2016, 2021):
for month in range(2, 14):
if month == 13:
self.query_params['end_time'] = str(year + 1) + "-" + str(1).zfill(2) + "-01T00:00:00Z"
self.query_params['start_time'] = str(year) + "-" + str(month - 1).zfill(2) + "-01T00:00:00Z"
else:
self.query_params['end_time'] = str(year) + "-" + str(month).zfill(2) + "-01T00:00:00Z"
self.query_params['start_time'] = str(year) + "-" + str(month - 1).zfill(2) + "-01T00:00:00Z"
fw = open(drug_name + " " + self.query_params['start_time'][0:10] + "~" + self.query_params['end_time'][
0:10] + ".txt", "w")
for brand_name in brand_list:
self.query_params['query'] = brand_name
print(self.query_params)
self.crawling_part(fw)
fw.close()
def main_act_weeks(self, brand_list, drug_name):
date = datetime.date(2020, 2, 1)
self.query_params['start_time'] = str(date) + "T00:00:00Z"
while True:
date = date + datetime.timedelta(weeks=1)
self.query_params['end_time'] = str(date) + "T00:00:00Z"
dateforcheck = str(date)
if dateforcheck[0:7] == "2020-03":
break
# main activity
fw = open(drug_name + " " + self.query_params['start_time'][0:10] + "~" + self.query_params['end_time'][
0:10] + ".txt", "w")
for brand_name in brand_list:
self.query_params['query'] = brand_name
print(self.query_params)
self.crawling_part(fw)
fw.close()
self.query_params['start_time'] = str(date) + "T00:00:00Z"
def crawling_part(self, fw):
headers = self.create_headers()
json_response = self.connect_to_endpoint(headers)
data = json.dumps(json_response, indent=4, sort_keys=True)
fw.write(data)
try:
self.query_params['next_token'] = json_response['meta']['next_token']
except Exception as e:
print(self.query_params['query'] + " done")
self.query_params['next_token'] = "DUMMY"
del self.query_params['next_token']
# print(e)
time.sleep(2)
return
time.sleep(2)
self.crawling_part(fw)
1 Like
Thanks, I think the issues may be with the fact that start time is inclusive while end time is exclusive GET /2/tweets/search/all | Docs | Twitter Developer Platform so you’d have a few gaps where the smaller time windows would miss tweets - that’s the first thing that comes to my mind anyway.