dejanseo commited on
Commit
62f5593
1 Parent(s): 6840931

Upload 2 files

Browse files
Files changed (2) hide show
  1. goodies/data.csv +2 -2
  2. goodies/sentiment.py +30 -2
goodies/data.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:025591039882326919545ffe4e47a9285d3f567c617c7b061a4f335f8a3d3a2b
3
- size 11089589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bed21061c54fb40693e1879336dfd2be7f1583089ed286fe395e44e3fda1762
3
+ size 11088805
goodies/sentiment.py CHANGED
@@ -6,13 +6,14 @@ from bs4 import BeautifulSoup
6
  import pandas as pd
7
  import altair as alt
8
  from collections import OrderedDict
9
- import nltk
10
  from nltk.tokenize import sent_tokenize
11
 
 
 
12
  nltk.download('punkt')
13
 
14
  # Load model and tokenizer
15
- model_name = 'C:/projects/sentiment/albert_sentiment_model/checkpoint-3000'
16
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
 
@@ -85,6 +86,21 @@ def classify_sentences(text):
85
  st.title("Sentiment Classification from URL")
86
 
87
  url = st.text_input("Enter URL:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  if url:
89
  text = get_text_from_url(url)
90
  if text:
@@ -140,3 +156,15 @@ if url:
140
 
141
  else:
142
  st.write("Could not extract text from the provided URL.")
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import pandas as pd
7
  import altair as alt
8
  from collections import OrderedDict
 
9
  from nltk.tokenize import sent_tokenize
10
 
11
+ # Load the punkt tokenizer from nltk
12
+ import nltk
13
  nltk.download('punkt')
14
 
15
  # Load model and tokenizer
16
+ model_name = 'dejanseo/sentiment'
17
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
18
  tokenizer = AutoTokenizer.from_pretrained(model_name)
19
 
 
86
  st.title("Sentiment Classification from URL")
87
 
88
  url = st.text_input("Enter URL:")
89
+
90
+ # Additional information
91
+ st.markdown("""
92
+ Multi-label sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/).
93
+
94
+ The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline.
95
+
96
+ This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
97
+
98
+ # Engage Our Team
99
+ Interested in using this in an automated pipeline for bulk query processing?
100
+
101
+ Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
102
+ """)
103
+
104
  if url:
105
  text = get_text_from_url(url)
106
  if text:
 
156
 
157
  else:
158
  st.write("Could not extract text from the provided URL.")
159
+
160
+ # Additional information at the end
161
+ st.markdown("""
162
+ Multi-label sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/).
163
+
164
+ The model is designed to be deployed in an automated pipeline capable of classifying text sentiment for thousands (or even millions) of text chunks or as a part of a scraping pipeline. This is a demo model which may occassionally misclasify some texts. In a typical commercial project, a larger model is deployed for the task, and in special cases, a domain-specific model is developed for the client.
165
+
166
+ ### Engage Our Team
167
+ Interested in using this in an automated pipeline for bulk query processing?
168
+
169
+ Please [book an appointment](https://dejanmarketing.com/conference/) to discuss your needs.
170
+ """)