Spaces:

hardik-kanzariya
/

Sentiment-Analysis

Sleeping

App Files Files Community

Sentiment-Analysis / app.py

hardik-kanzariya

Upload 4 files

537db34 verified about 1 month ago

raw

history blame

2.57 kB

	import tensorflow as tf
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.preprocessing.text import tokenizer_from_json
	import pandas as pd
	import re
	from nltk.stem import WordNetLemmatizer
	import json
	import numpy as np
	import streamlit as st


	# Load your TensorFlow model
	model = tf.keras.models.load_model("model/my_model.h5")
	lemmatizer=WordNetLemmatizer()
	maxlen = 41
	with open('data/tokenizer.json', 'r', encoding='utf-8') as f:
	tokenizer = tokenizer_from_json(json.load(f))

	def preprocessing(text):
	# Ensure the input is a string, otherwise return an empty string
	if not isinstance(text, str):
	return ''

	cleaned_text = re.sub(r'(http\|https\|www)\S+', '', text) # Remove URLs
	cleaned_text = re.sub(r'[@#]\w+', '', cleaned_text) # Remove mentions (like @username) and hashtgs

	cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)
	cleaned_text = cleaned_text.replace('\n', ' ')
	cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

	cleaned_text = cleaned_text.split()
	filtered_words = [lemmatizer.lemmatize(word, pos='v') for word in cleaned_text]
	text = ' '.join(filtered_words)
	return text

	def getPrediction(input):
	input = pd.DataFrame(input, columns=['text'])
	input['text'] = input['text'].apply(preprocessing)
	print(input['text'][0], end=", ")
	input = tokenizer.texts_to_sequences(input['text'])
	input = pad_sequences(input, maxlen = maxlen, padding = 'post', truncating = 'post')
	prediction = model.predict(input, verbose=0)
	# calculate confidence score
	confidence_score = np.max(prediction, axis=1)/np.sum(prediction, axis=1)
	result = np.argmax(prediction, axis=1)
	for i in range(len(confidence_score)):
	if confidence_score[i] < 0.7:
	result[i] = 2
	print(prediction, confidence_score)
	return result, confidence_score

	def getSentiment(idx):
	match idx:
	case 0:
	return "Negative"
	case 1:
	return "Positive"
	case default:
	return "Neutral"

	text = st.text_area("Enter Text...")

	if text:
	prediction, confidence_score = getPrediction([text]) # Modify if preprocessing is needed
	# Convert prediction to a human-readable format
	response = {"prediction": getSentiment(prediction[0]) + " Statement",
	"confidence": "{:.2f}".format(float(confidence_score[0] * 100)) + "%"} # Adjust as necessary for output formatting
	st.json(response)