Spaces:

Smartie
/

crimetwitter

Runtime error

App Files Files Community

crimetwitter / CrimeTest.py

Smartie

Upload 13 files

c3bd0c8 over 1 year ago

raw

history blame contribute delete

No virus

3.28 kB

	import json
	# Enter your keys/secrets as strings in the following fields
	# authorization tokens
	credentials = {}
	credentials['CONSUMER_KEY'] = 'XV2kS4M1OmganL2zZU0q8Kyxh'
	credentials['CONSUMER_SECRET'] = 'PvjekJXnI304fE2En3cmYuftP7yOXH0xiANsWOsW1nUpbwV4j7'
	credentials['ACCESS_TOKEN'] = '152569292-Uw6KPJqudtctiYjpR1GEWOMYMKGc2DhczLiZq4Q4'
	credentials['ACCESS_SECRET'] = 'Muv9NC0JhKiskMqt7hO7XNbCZPRBOAOtADNaAN8xeBQ1a'

	# Save the credentials object to file
	with open("twitter_credentials.json", "w") as file:
	json.dump(credentials, file)


	from twython import Twython
	import json

	# Load credentials from json file
	with open("twitter_credentials.json", "r") as file:
	creds = json.load(file)
	geocode = '28.6517178,77.2219388,1000mi' # latitude,longitude,distance(mi/km)
	# Instantiate an object
	python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])
	# Create our query
	keywords="crime OR attempt -filter:retweets"
	query = {'q': keywords,
	'count': 100,
	'lang': 'en',
	'geocode': geocode,
	}
	import pandas as pd
	# Search tweets
	dict_ = {'user': [], 'date': [], 'text': [], 'user_loc': []}
	for status in python_tweets.search(**query)['statuses']:
	dict_['user'].append(status['user']['screen_name'])
	dict_['date'].append(status['created_at'])
	dict_['text'].append(status['text'])
	dict_['user_loc'].append(status['user']['location'])
	# Structure data in a pandas DataFrame for easier manipulation
	df = pd.DataFrame(dict_)

	#emoticon feature
	import demoji
	import emoji
	demoji.download_codes()
	for i in range(len(df)):
	print(demoji.findall(df['text'][i]))
	df['text'][i]=emoji.demojize(df['text'][i], delimiters=("", ""))

	#Pre-process
	import string
	import re
	import nltk
	nltk.download('vader_lexicon')
	nltk.download('wordnet')
	nltk.download('punkt')
	from nltk.corpus import stopwords
	from nltk.stem import SnowballStemmer
	stop_words = stopwords.words("english")
	stemmer = SnowballStemmer("english")

	def preprocess(tweet):
	# wnl = WordNetLemmatizer()
	cleaned_tweet = []

	words = tweet.split()
	for word in words:
	# Skip Hyperlinks and Twitter Handles @<user>
	if ('http' in word) or ('.com' in word) or ('www.' in word) or (word.startswith('@')):
	continue

	# Remove Digits and Special Characters
	temp = re.sub(f'[^{string.ascii_lowercase}]', '', word.lower())

	# Remove words with less than 3 characters
	if len(temp) < 3:
	continue

	# Store the Stemmed version of the word
	temp = stemmer.stem(temp)

	if len(temp) > 0:
	cleaned_tweet.append(temp)

	return ' '.join(cleaned_tweet)
	cl=[]
	for i in range(len(df)):
	cl.append(preprocess(df['text'][i]))
	df['clean_tweet']=cl


	#load model
	import pickle
	import bz2
	sfile1 = bz2.BZ2File('All Model', 'r')
	models=pickle.load(sfile1)
	sfile2 = bz2.BZ2File('All Vector', 'r')
	vectorizers=pickle.load(sfile2)

	names = ["K-Nearest Neighbors", "Liner SVM",
	"Decision Tree", "Random Forest",
	"ExtraTreesClassifier"]
	for i in range(0,len(names)):
	test_vectors = vectorizers[i].transform(cl)
	df['Class '+names[i]]=models[i].predict(test_vectors)
	df.to_csv('tweets.csv')