Spaces:
Runtime error
Runtime error
import json | |
# Enter your keys/secrets as strings in the following fields | |
# authorization tokens | |
credentials = {} | |
credentials['CONSUMER_KEY'] = 'XV2kS4M1OmganL2zZU0q8Kyxh' | |
credentials['CONSUMER_SECRET'] = 'PvjekJXnI304fE2En3cmYuftP7yOXH0xiANsWOsW1nUpbwV4j7' | |
credentials['ACCESS_TOKEN'] = '152569292-Uw6KPJqudtctiYjpR1GEWOMYMKGc2DhczLiZq4Q4' | |
credentials['ACCESS_SECRET'] = 'Muv9NC0JhKiskMqt7hO7XNbCZPRBOAOtADNaAN8xeBQ1a' | |
# Save the credentials object to file | |
with open("twitter_credentials.json", "w") as file: | |
json.dump(credentials, file) | |
from twython import Twython | |
import json | |
# Load credentials from json file | |
with open("twitter_credentials.json", "r") as file: | |
creds = json.load(file) | |
geocode = '28.6517178,77.2219388,1000mi' # latitude,longitude,distance(mi/km) | |
# Instantiate an object | |
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET']) | |
# Create our query | |
keywords="crime OR attempt -filter:retweets" | |
query = {'q': keywords, | |
'count': 100, | |
'lang': 'en', | |
'geocode': geocode, | |
} | |
import pandas as pd | |
# Search tweets | |
dict_ = {'user': [], 'date': [], 'text': [], 'user_loc': []} | |
for status in python_tweets.search(**query)['statuses']: | |
dict_['user'].append(status['user']['screen_name']) | |
dict_['date'].append(status['created_at']) | |
dict_['text'].append(status['text']) | |
dict_['user_loc'].append(status['user']['location']) | |
# Structure data in a pandas DataFrame for easier manipulation | |
df = pd.DataFrame(dict_) | |
#emoticon feature | |
import demoji | |
import emoji | |
demoji.download_codes() | |
for i in range(len(df)): | |
print(demoji.findall(df['text'][i])) | |
df['text'][i]=emoji.demojize(df['text'][i], delimiters=("", "")) | |
#Pre-process | |
import string | |
import re | |
import nltk | |
nltk.download('vader_lexicon') | |
nltk.download('wordnet') | |
nltk.download('punkt') | |
from nltk.corpus import stopwords | |
from nltk.stem import SnowballStemmer | |
stop_words = stopwords.words("english") | |
stemmer = SnowballStemmer("english") | |
def preprocess(tweet): | |
# wnl = WordNetLemmatizer() | |
cleaned_tweet = [] | |
words = tweet.split() | |
for word in words: | |
# Skip Hyperlinks and Twitter Handles @<user> | |
if ('http' in word) or ('.com' in word) or ('www.' in word) or (word.startswith('@')): | |
continue | |
# Remove Digits and Special Characters | |
temp = re.sub(f'[^{string.ascii_lowercase}]', '', word.lower()) | |
# Remove words with less than 3 characters | |
if len(temp) < 3: | |
continue | |
# Store the Stemmed version of the word | |
temp = stemmer.stem(temp) | |
if len(temp) > 0: | |
cleaned_tweet.append(temp) | |
return ' '.join(cleaned_tweet) | |
cl=[] | |
for i in range(len(df)): | |
cl.append(preprocess(df['text'][i])) | |
df['clean_tweet']=cl | |
#load model | |
import pickle | |
import bz2 | |
sfile1 = bz2.BZ2File('All Model', 'r') | |
models=pickle.load(sfile1) | |
sfile2 = bz2.BZ2File('All Vector', 'r') | |
vectorizers=pickle.load(sfile2) | |
names = ["K-Nearest Neighbors", "Liner SVM", | |
"Decision Tree", "Random Forest", | |
"ExtraTreesClassifier"] | |
for i in range(0,len(names)): | |
test_vectors = vectorizers[i].transform(cl) | |
df['Class '+names[i]]=models[i].predict(test_vectors) | |
df.to_csv('tweets.csv') | |