Hello.

I tried to get tweets using this python code and query_parms options.

    def connect_to_endpoint(self, headers):
        response = requests.request("GET", self.search_url, headers=headers, params=self.query_params)
        # print(response.status_code)
        if response.status_code != 200:
            raise Exception(response.status_code, response.text)
        return response.json()
self.query_params = {'query': '', 'tweet.fields': 'created_at', 'start_time': '2016-01-01T00:00:00Z',
                             'end_time': '2021-01-01T00:00:00Z', 'max_results': '500'}

but i got different results between month and weeks

you can understand what i means by under photo file

I think we should get perfectly same results between 2 cases.
However, i found 266 different tweets id between 2 cases.

also in getting response during years [ex)2016-01-01~2017-01-01]
i got really few data only.

I want to know the exact reason for this problem…

Why my post is hidden?
What is the problem?

There is not enough specific information here to tell anything - what exact queries did you run using what exact scripts / libraries?

tnx for reading

reference : Twitter-API-v2-sample-code/full-archive-search.py at main · twitterdev/Twitter-API-v2-sample-code · GitHub

i changed query_params start_time & end_time

    def main_act_months(self, brand_list, drug_name):
        self.query_params['tweet.fields'] = "lang"
        for year in range(2016, 2021):
            for month in range(2, 14):
                if month == 13:
                    self.query_params['end_time'] = str(year + 1) + "-" + str(1).zfill(2) + "-01T00:00:00Z"
                    self.query_params['start_time'] = str(year) + "-" + str(month - 1).zfill(2) + "-01T00:00:00Z"
                else:
                    self.query_params['end_time'] = str(year) + "-" + str(month).zfill(2) + "-01T00:00:00Z"
                    self.query_params['start_time'] = str(year) + "-" + str(month - 1).zfill(2) + "-01T00:00:00Z"
                fw = open(drug_name + " " + self.query_params['start_time'][0:10] + "~" + self.query_params['end_time'][
                                                                                          0:10] + ".txt", "w")
                for brand_name in brand_list:
                    self.query_params['query'] = brand_name
                    print(self.query_params)
                    self.crawling_part(fw)
                fw.close()

    def main_act_weeks(self, brand_list, drug_name):
        date = datetime.date(2020, 2, 1)
        self.query_params['start_time'] = str(date) + "T00:00:00Z"
        while True:
            date = date + datetime.timedelta(weeks=1)
            self.query_params['end_time'] = str(date) + "T00:00:00Z"
            dateforcheck = str(date)
            if dateforcheck[0:7] == "2020-03":
                break

            # main activity
            fw = open(drug_name + " " + self.query_params['start_time'][0:10] + "~" + self.query_params['end_time'][
                                                                                      0:10] + ".txt", "w")
            for brand_name in brand_list:
                self.query_params['query'] = brand_name
                print(self.query_params)
                self.crawling_part(fw)
            fw.close()

            self.query_params['start_time'] = str(date) + "T00:00:00Z"

    def crawling_part(self, fw):
        headers = self.create_headers()
        json_response = self.connect_to_endpoint(headers)
        data = json.dumps(json_response, indent=4, sort_keys=True)
        fw.write(data)
        try:
            self.query_params['next_token'] = json_response['meta']['next_token']
        except Exception as e:
            print(self.query_params['query'] + " done")
            self.query_params['next_token'] = "DUMMY"
            del self.query_params['next_token']
            # print(e)
            time.sleep(2)
            return
        time.sleep(2)
        self.crawling_part(fw)
1 Like

Thanks, I think the issues may be with the fact that start time is inclusive while end time is exclusive GET /2/tweets/search/all | Docs | Twitter Developer Platform so you’d have a few gaps where the smaller time windows would miss tweets - that’s the first thing that comes to my mind anyway.