dejanseo commited on
Commit
82d9e6e
1 Parent(s): d424f07

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +78 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AlbertTokenizer, AlbertForSequenceClassification
3
+ import torch
4
+ import trafilatura
5
+ import nltk
6
+ from nltk.tokenize import sent_tokenize
7
+
8
+ # Download NLTK data
9
+ nltk.download('punkt')
10
+
11
+ # Load the tokenizer and model from Hugging Face
12
+ tokenizer = AlbertTokenizer.from_pretrained("dejanseo/good-vibes")
13
+ model = AlbertForSequenceClassification.from_pretrained("dejanseo/good-vibes")
14
+
15
+ # Set Streamlit layout to wide
16
+ st.set_page_config(layout="wide")
17
+
18
+ # Function to classify text and highlight "Good Vibes" (Label_0) with dynamic opacity
19
+ def classify_and_highlight(text, max_length=512):
20
+ sentences = sent_tokenize(text)
21
+
22
+ highlighted_text = ""
23
+ for sentence in sentences:
24
+ # Tokenize and classify each sentence separately
25
+ inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
26
+ outputs = model(**inputs)
27
+ softmax_scores = torch.softmax(outputs.logits, dim=-1)
28
+ prediction = torch.argmax(softmax_scores, dim=-1).item()
29
+ confidence = softmax_scores[0][prediction].item() * 100
30
+
31
+ if prediction == 0: # Label_0 corresponds to "Good Vibes"
32
+ # Adjust opacity calculation: base +10%
33
+ opacity = ((confidence - 50) / 100) + 0.1
34
+ highlighted_text += f'<span style="background-color: rgba(0, 255, 0, {opacity});" title="{confidence:.2f}%">{sentence}</span> '
35
+ else:
36
+ highlighted_text += f'{sentence} '
37
+
38
+ return highlighted_text.strip()
39
+
40
+ # Function to extract content from URL using Trafilatura
41
+ def extract_content_from_url(url):
42
+ downloaded = trafilatura.fetch_url(url)
43
+ if downloaded:
44
+ return trafilatura.extract(downloaded)
45
+ else:
46
+ return None
47
+
48
+ # Streamlit app layout
49
+ st.title("Good Vibes Detector - SEO by DEJAN")
50
+ st.write("This app detects good vibes on the internet.")
51
+
52
+ mode = st.radio("Choose input mode", ("Paste text", "Enter URL"))
53
+
54
+ if mode == "Paste text":
55
+ user_text = st.text_area("Paste your text here:")
56
+ if st.button("Classify"):
57
+ if user_text:
58
+ result = classify_and_highlight(user_text)
59
+ st.markdown(result, unsafe_allow_html=True)
60
+ st.markdown("---")
61
+ st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
62
+ else:
63
+ st.write("Please paste some text.")
64
+
65
+ elif mode == "Enter URL":
66
+ user_url = st.text_input("Enter the URL:")
67
+ if st.button("Extract and Classify"):
68
+ if user_url:
69
+ content = extract_content_from_url(user_url)
70
+ if content:
71
+ result = classify_and_highlight(content)
72
+ st.markdown(result, unsafe_allow_html=True)
73
+ st.markdown("---")
74
+ st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
75
+ else:
76
+ st.write("Failed to extract content from the URL.")
77
+ else:
78
+ st.write("Please enter a URL.")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ trafilatura
5
+ nltk