Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

4stra commited on Jul 6, 2022

Commit

209bbc5

•

1 Parent(s): 70f6eb7

Add files via upload

Browse files

Files changed (1) hide show

twitter-scraper/scrape.py +94 -0

twitter-scraper/scrape.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import sys
+import io
+import time
+import asyncio
+import os
+loop = asyncio.get_event_loop()
+loop.is_running()
+import twint
+import nest_asyncio
+nest_asyncio.apply()
+from datetime import date
+class scraper:
+    def get_tweets(search_str, from_date="2006-07-01", to_date=str(date.today()), num_tweets=10,u_or_s='s'):
+        time_out= time.time()+2*60
+        _dict={}
+        c=twint.Config()
+        if u_or_s.lower() =="u":
+            c.Search = "from:@"+search_str # topic
+        else:
+            c.Search = search_str       # topic
+        c.Pandas = True
+        num_tweets_and_replies=num_tweets
+        c.Count=True
+        for j in range(1,5):
+            c.Limit = num_tweets_and_replies
+            c.Since = from_date
+            c.Until =  to_date
+            c.Hide_output =True
+            old_stdout = sys.stdout
+            new_stdout = io.StringIO()
+            sys.stdout = new_stdout
+            twint.run.Search(c)
+            output = new_stdout.getvalue()
+            sys.stdout = old_stdout
+            print(output[0:-2])
+            tweet_info=twint.output.panda.Tweets_df
+            t_count=0
+            try:
+                _keys=tweet_info["id"]
+                #tweet infor is a dataframe with fallowing columns
+                '''Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
+                'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
+                'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
+                'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
+                'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
+                'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
+                'trans_dest'],
+                dtype='object')'''
+                for i in range (len(_keys)):
+                    if _keys[i] in _dict.keys() or tweet_info["tweet"][i].startswith("@"):
+                        pass
+                    else:
+                        _dict[int(_keys[i])] = {"tweet": tweet_info["tweet"][i],
+                                                "date" :tweet_info["date"][i],
+                                                "nlikes": tweet_info["nlikes"][i],
+                                                "nreplies":tweet_info["nreplies"][i] ,
+                                                "nretweets": tweet_info["nretweets"][i],"topic":""}
+                        if len(list(_dict.keys()))==num_tweets:
+                            break
+            except:
+                pass
+            print(len(list(_dict.keys())), " of them are Tweets")
+            if len(list(_dict.keys())) < num_tweets:
+                num_tweets_and_replies= num_tweets_and_replies+100*3**j
+            if len(list(_dict.keys())) < num_tweets:
+                num_tweets_and_replies= num_tweets_and_replies+100*3**j
+            else:
+                break
+            if time_out <time.time():
+                break
+            if output.startswith("[!] No more data!"):
+                break
+        return _dict
+    def string_search_user_tweets(user_name,search_str ,from_date="2006-07-01", to_date=str(date.today()), num_tweets=10):
+        c=twint.Config()
+        c.Username =user_name
+        c.Search = search_str       # topic
+        c.Pandas = True
+        num_tweets_and_replies=num_tweets
+        c.Count=True
+        c.Limit = num_tweets_and_replies
+        c.Since = from_date
+        c.Until =  to_date
+        c.Hide_output =True
+        twint.run.Search(c)
+        return twint.output.panda.Tweets_df