I need to filter twitter comments based on native arabic or farsi language. I am storing the tweets in Mongodb. I am unable to filter the tweets based on specific keyword. I am not sure where I am going wrong. I am running the code in UNIX Shell Below is my code
# -*- coding: utf-8 -*-
import pymongo
#from pymongo import Connection
from pymongo import MongoClient
import json
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import datetime
import time
import sys
try:
from pymongo.connection import Connection
except ImportError as e:
from pymongo import MongoClient as Connection
connection = Connection('localhost', 27017)
db = connection.lang
db.tweets.ensure_index("id", unique=True, dropDups=True)
collection = db.tweets
consumer_key = "XXXXXXXXXXXXXXXXXX"
consumer_secret = "XXXXXXXXXXXXXXXXXXXXXXXX"
access_token = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
access_token_secret = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
print ("\n=== Starting Tweet Collection :) ===\n")
# The below code will get Tweets from the stream and store all fields to your database
class StdOutListener(StreamListener):
def on_data(self, data):
# Load the Tweet into the variable "t"
t = json.loads(data)
# Load all of the data from twitter
collection.insert(t)
return True
# Prints the reason for an error to your console
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l,timeout=30.0)
while True:
try:
stream.filter(track=['الله'])
break
except Exception, e:
time.sleep(59)
This code always gives me 0 output.I was able to filter very well in English language but unable to do so in Native languages. Any suggestions on where I am going wrong