File size: 3,483 Bytes
6e40caf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# pip install -r requirements.txt
# streamlit run app.py
import streamlit as st
from transformers import AlbertTokenizer, AlbertForSequenceClassification
import torch
import trafilatura
import nltk
from nltk.tokenize import sent_tokenize
# Download NLTK data
nltk.download('punkt')
# Load the tokenizer and model from Hugging Face
tokenizer = AlbertTokenizer.from_pretrained("dejanseo/good-vibes")
model = AlbertForSequenceClassification.from_pretrained("dejanseo/good-vibes")
# Set Streamlit layout to wide
st.set_page_config(layout="wide")
# Function to classify text and highlight "Good Vibes" (Label_0) with dynamic opacity
def classify_and_highlight(text, max_length=512):
sentences = sent_tokenize(text)
highlighted_text = ""
for sentence in sentences:
# Tokenize and classify each sentence separately
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
softmax_scores = torch.softmax(outputs.logits, dim=-1)
prediction = torch.argmax(softmax_scores, dim=-1).item()
confidence = softmax_scores[0][prediction].item() * 100
if prediction == 0: # Label_0 corresponds to "Good Vibes"
# Adjust opacity calculation: base +10%
opacity = ((confidence - 50) / 100) + 0.1
highlighted_text += f'<span style="background-color: rgba(0, 255, 0, {opacity});" title="{confidence:.2f}%">{sentence}</span> '
else:
highlighted_text += f'{sentence} '
return highlighted_text.strip()
# Function to extract content from URL using Trafilatura
def extract_content_from_url(url):
downloaded = trafilatura.fetch_url(url)
if downloaded:
return trafilatura.extract(downloaded)
else:
return None
# Streamlit app layout
st.title("Good Vibes Detector - SEO by DEJAN")
st.write("This app detects good vibes on the internet.")
mode = st.radio("Choose input mode", ("Paste text", "Enter URL"))
if mode == "Paste text":
user_text = st.text_area("Paste your text here:")
if st.button("Classify"):
if user_text:
result = classify_and_highlight(user_text)
st.markdown(result, unsafe_allow_html=True)
st.markdown("---")
st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
else:
st.write("Please paste some text.")
elif mode == "Enter URL":
user_url = st.text_input("Enter the URL:")
if st.button("Extract and Classify"):
if user_url:
content = extract_content_from_url(user_url)
if content:
result = classify_and_highlight(content)
st.markdown(result, unsafe_allow_html=True)
st.markdown("---")
st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
else:
st.write("Failed to extract content from the URL.")
else:
st.write("Please enter a URL.")
|