Spaces:

ameythakur
/

Depression-Detection-Using-Tweets

Running

App Files Files Community

Depression-Detection-Using-Tweets / Source Code /app_utilities.py

ameythakur

DEPRESSION-DETECTION

c061ce5 verified about 1 month ago

raw

history blame contribute delete

2.36 kB

	# ==============================================================================
	# PROJECT: DEPRESSION-DETECTION-USING-TWEETS
	# AUTHORS: AMEY THAKUR & MEGA SATISH
	# GITHUB (AMEY): https://github.com/Amey-Thakur
	# GITHUB (MEGA): https://github.com/msatmod
	# REPOSITORY: https://github.com/Amey-Thakur/DEPRESSION-DETECTION-USING-TWEETS
	# RELEASE DATE: June 5, 2022
	# LICENSE: MIT License
	# DESCRIPTION: Utility module for tweet analysis predictions.
	# ==============================================================================

	import sys
	import pickle
	import warnings
	import numpy as np
	import pandas as pd
	import spacy
	import en_core_web_lg
	# Configure sys.path to permit localized module discovery within the core directory
	sys.path.append('./core')

	import clean_utilities as CU

	# Suppression of non-critical runtime warnings to maintain a clean console log
	warnings.filterwarnings("ignore")

	def tweet_prediction(tweet: str) -> int:
	"""
	Takes a tweet and returns whether it's classified as depressive (1) or not (0).

	The process:
	1. Clean the text using our utility module.
	2. Convert text to numbers using spaCy.
	3. Use the trained SVM model to make a prediction.
	Args:
	tweet (str): The tweet text from the user.

	Returns:
	int: 1 for Depressive, 0 for Non-depressive.
	"""
	# Step 1: Clean the text
	processed_tweet = tweet
	cleaned_input = []
	cleaned_input.append(CU.tweets_cleaner(processed_tweet))

	# Step 2: Convert text to numbers using spaCy
	nlp_engine = en_core_web_lg.load()

	# Step 3: Compute centroid word embeddings
	# We calculate the mean vector of all tokens to represent the tweet's semantic context
	semantic_vectors = np.array([
	np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300))
	for s in cleaned_input
	])

	# Step 4: Load the pre-trained Support Vector Machine (SVM) model artifact
	# The SVM was selected for its robust performance in high-dimensional text classification
	model_path = "./assets/models/model_svm1.pkl"
	with open(model_path, 'rb') as model_file:
	classifier = pickle.load(model_file)

	# Step 5: Perform binary classification
	prediction_result = classifier.predict(semantic_vectors)

	return int(prediction_result[0])