File size: 3,483 Bytes
6e40caf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# pip install -r requirements.txt
# streamlit run app.py

import streamlit as st
from transformers import AlbertTokenizer, AlbertForSequenceClassification
import torch
import trafilatura
import nltk
from nltk.tokenize import sent_tokenize

# Download NLTK data
nltk.download('punkt')

# Load the tokenizer and model from Hugging Face
tokenizer = AlbertTokenizer.from_pretrained("dejanseo/good-vibes")
model = AlbertForSequenceClassification.from_pretrained("dejanseo/good-vibes")

# Set Streamlit layout to wide
st.set_page_config(layout="wide")

# Function to classify text and highlight "Good Vibes" (Label_0) with dynamic opacity
def classify_and_highlight(text, max_length=512):
    sentences = sent_tokenize(text)
    
    highlighted_text = ""
    for sentence in sentences:
        # Tokenize and classify each sentence separately
        inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)
        softmax_scores = torch.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(softmax_scores, dim=-1).item()
        confidence = softmax_scores[0][prediction].item() * 100
        
        if prediction == 0:  # Label_0 corresponds to "Good Vibes"
            # Adjust opacity calculation: base +10%
            opacity = ((confidence - 50) / 100) + 0.1
            highlighted_text += f'<span style="background-color: rgba(0, 255, 0, {opacity});" title="{confidence:.2f}%">{sentence}</span> '
        else:
            highlighted_text += f'{sentence} '

    return highlighted_text.strip()

# Function to extract content from URL using Trafilatura
def extract_content_from_url(url):
    downloaded = trafilatura.fetch_url(url)
    if downloaded:
        return trafilatura.extract(downloaded)
    else:
        return None

# Streamlit app layout
st.title("Good Vibes Detector - SEO by DEJAN")
st.write("This app detects good vibes on the internet.")

mode = st.radio("Choose input mode", ("Paste text", "Enter URL"))

if mode == "Paste text":
    user_text = st.text_area("Paste your text here:")
    if st.button("Classify"):
        if user_text:
            result = classify_and_highlight(user_text)
            st.markdown(result, unsafe_allow_html=True)
            st.markdown("---")
            st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
        else:
            st.write("Please paste some text.")

elif mode == "Enter URL":
    user_url = st.text_input("Enter the URL:")
    if st.button("Extract and Classify"):
        if user_url:
            content = extract_content_from_url(user_url)
            if content:
                result = classify_and_highlight(content)
                st.markdown(result, unsafe_allow_html=True)
                st.markdown("---")
                st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
            else:
                st.write("Failed to extract content from the URL.")
        else:
            st.write("Please enter a URL.")