Detector / app.py
mkoot007's picture
Update app.py
be16c65
raw
history blame contribute delete
No virus
2.07 kB
import pandas as pd
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-imdb")
model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-imdb")
def analyze_text(text):
# Preprocess the text
text = text.lower()
# Encode the text
encoded_text = tokenizer(text, truncation=True, padding=True, return_tensors='pt')
# Classify the text
with torch.no_grad():
output = model(**encoded_text)
predictions = output.logits.argmax(-1).item()
if predictions == 1: # For IMDb sentiment analysis, 1 indicates positive sentiment
return "Job Related"
else:
return "Not Job Related"
def count_job_related_messages(data):
job_related_count = 0
not_job_related_count = 0
for message in data["message"]:
result = analyze_text(message)
if result == "Job Related":
job_related_count += 1
else:
not_job_related_count += 1
return job_related_count, not_job_related_count
# Streamlit application
st.title("Job Related Message Analyzer")
uploaded_file = st.file_uploader("Upload CSV file")
user_input = st.text_input("Enter text")
if uploaded_file:
# Read the CSV file
data = pd.read_csv(uploaded_file)
# Analyze messages
results = []
for message in data["message"]:
result = analyze_text(message)
results.append(result)
data["Job Related"] = results
# Count job-related messages
job_related_count, not_job_related_count = count_job_related_messages(data)
st.dataframe(data)
st.write(f"Job Related Messages: {job_related_count}")
st.write(f"Not Job Related Messages: {not_job_related_count}")
elif user_input:
# Analyze user-input text
result = analyze_text(user_input)
st.write(f"Message Classification: {result}")
else:
st.write("Please upload a CSV file or enter text to analyze.")