dejanseo commited on
Commit
6e40caf
1 Parent(s): aab804c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -78
app.py CHANGED
@@ -1,78 +1,81 @@
1
- import streamlit as st
2
- from transformers import AlbertTokenizer, AlbertForSequenceClassification
3
- import torch
4
- import trafilatura
5
- import nltk
6
- from nltk.tokenize import sent_tokenize
7
-
8
- # Download NLTK data
9
- nltk.download('punkt')
10
-
11
- # Load the tokenizer and model from Hugging Face
12
- tokenizer = AlbertTokenizer.from_pretrained("dejanseo/good-vibes")
13
- model = AlbertForSequenceClassification.from_pretrained("dejanseo/good-vibes")
14
-
15
- # Set Streamlit layout to wide
16
- st.set_page_config(layout="wide")
17
-
18
- # Function to classify text and highlight "Good Vibes" (Label_0) with dynamic opacity
19
- def classify_and_highlight(text, max_length=512):
20
- sentences = sent_tokenize(text)
21
-
22
- highlighted_text = ""
23
- for sentence in sentences:
24
- # Tokenize and classify each sentence separately
25
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
26
- outputs = model(**inputs)
27
- softmax_scores = torch.softmax(outputs.logits, dim=-1)
28
- prediction = torch.argmax(softmax_scores, dim=-1).item()
29
- confidence = softmax_scores[0][prediction].item() * 100
30
-
31
- if prediction == 0: # Label_0 corresponds to "Good Vibes"
32
- # Adjust opacity calculation: base +10%
33
- opacity = ((confidence - 50) / 100) + 0.1
34
- highlighted_text += f'<span style="background-color: rgba(0, 255, 0, {opacity});" title="{confidence:.2f}%">{sentence}</span> '
35
- else:
36
- highlighted_text += f'{sentence} '
37
-
38
- return highlighted_text.strip()
39
-
40
- # Function to extract content from URL using Trafilatura
41
- def extract_content_from_url(url):
42
- downloaded = trafilatura.fetch_url(url)
43
- if downloaded:
44
- return trafilatura.extract(downloaded)
45
- else:
46
- return None
47
-
48
- # Streamlit app layout
49
- st.title("Good Vibes Detector - SEO by DEJAN")
50
- st.write("This app detects good vibes on the internet.")
51
-
52
- mode = st.radio("Choose input mode", ("Paste text", "Enter URL"))
53
-
54
- if mode == "Paste text":
55
- user_text = st.text_area("Paste your text here:")
56
- if st.button("Classify"):
57
- if user_text:
58
- result = classify_and_highlight(user_text)
59
- st.markdown(result, unsafe_allow_html=True)
60
- st.markdown("---")
61
- st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
62
- else:
63
- st.write("Please paste some text.")
64
-
65
- elif mode == "Enter URL":
66
- user_url = st.text_input("Enter the URL:")
67
- if st.button("Extract and Classify"):
68
- if user_url:
69
- content = extract_content_from_url(user_url)
70
- if content:
71
- result = classify_and_highlight(content)
72
- st.markdown(result, unsafe_allow_html=True)
73
- st.markdown("---")
74
- st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
75
- else:
76
- st.write("Failed to extract content from the URL.")
77
- else:
78
- st.write("Please enter a URL.")
 
 
 
 
1
+ # pip install -r requirements.txt
2
+ # streamlit run app.py
3
+
4
+ import streamlit as st
5
+ from transformers import AlbertTokenizer, AlbertForSequenceClassification
6
+ import torch
7
+ import trafilatura
8
+ import nltk
9
+ from nltk.tokenize import sent_tokenize
10
+
11
+ # Download NLTK data
12
+ nltk.download('punkt')
13
+
14
+ # Load the tokenizer and model from Hugging Face
15
+ tokenizer = AlbertTokenizer.from_pretrained("dejanseo/good-vibes")
16
+ model = AlbertForSequenceClassification.from_pretrained("dejanseo/good-vibes")
17
+
18
+ # Set Streamlit layout to wide
19
+ st.set_page_config(layout="wide")
20
+
21
+ # Function to classify text and highlight "Good Vibes" (Label_0) with dynamic opacity
22
+ def classify_and_highlight(text, max_length=512):
23
+ sentences = sent_tokenize(text)
24
+
25
+ highlighted_text = ""
26
+ for sentence in sentences:
27
+ # Tokenize and classify each sentence separately
28
+ inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
29
+ outputs = model(**inputs)
30
+ softmax_scores = torch.softmax(outputs.logits, dim=-1)
31
+ prediction = torch.argmax(softmax_scores, dim=-1).item()
32
+ confidence = softmax_scores[0][prediction].item() * 100
33
+
34
+ if prediction == 0: # Label_0 corresponds to "Good Vibes"
35
+ # Adjust opacity calculation: base +10%
36
+ opacity = ((confidence - 50) / 100) + 0.1
37
+ highlighted_text += f'<span style="background-color: rgba(0, 255, 0, {opacity});" title="{confidence:.2f}%">{sentence}</span> '
38
+ else:
39
+ highlighted_text += f'{sentence} '
40
+
41
+ return highlighted_text.strip()
42
+
43
+ # Function to extract content from URL using Trafilatura
44
+ def extract_content_from_url(url):
45
+ downloaded = trafilatura.fetch_url(url)
46
+ if downloaded:
47
+ return trafilatura.extract(downloaded)
48
+ else:
49
+ return None
50
+
51
+ # Streamlit app layout
52
+ st.title("Good Vibes Detector - SEO by DEJAN")
53
+ st.write("This app detects good vibes on the internet.")
54
+
55
+ mode = st.radio("Choose input mode", ("Paste text", "Enter URL"))
56
+
57
+ if mode == "Paste text":
58
+ user_text = st.text_area("Paste your text here:")
59
+ if st.button("Classify"):
60
+ if user_text:
61
+ result = classify_and_highlight(user_text)
62
+ st.markdown(result, unsafe_allow_html=True)
63
+ st.markdown("---")
64
+ st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
65
+ else:
66
+ st.write("Please paste some text.")
67
+
68
+ elif mode == "Enter URL":
69
+ user_url = st.text_input("Enter the URL:")
70
+ if st.button("Extract and Classify"):
71
+ if user_url:
72
+ content = extract_content_from_url(user_url)
73
+ if content:
74
+ result = classify_and_highlight(content)
75
+ st.markdown(result, unsafe_allow_html=True)
76
+ st.markdown("---")
77
+ st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).")
78
+ else:
79
+ st.write("Failed to extract content from the URL.")
80
+ else:
81
+ st.write("Please enter a URL.")