ghost-love-you
commited on
Commit
•
a66c985
1
Parent(s):
eb4ec66
chore: Update app.py with new translation and sentiment analysis models
Browse files- Dockerfile +21 -0
- app.py +20 -2
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
RUN apt-get update && apt-get install -y \
|
6 |
+
build-essential \
|
7 |
+
curl \
|
8 |
+
software-properties-common \
|
9 |
+
git \
|
10 |
+
&& rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
# RUN git clone https://github.com/streamlit/streamlit-example.git .
|
13 |
+
COPY . .
|
14 |
+
|
15 |
+
RUN pip3 install -r requirements.txt
|
16 |
+
|
17 |
+
EXPOSE 8501
|
18 |
+
|
19 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
20 |
+
|
21 |
+
ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
from GoogleNews import GoogleNews
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
@@ -172,14 +172,32 @@ def display_sentiments(translate_comments, batch_size=16):
|
|
172 |
cur_date = datetime.datetime.now().strftime("%Y-%m-%d")
|
173 |
selected_date = st.date_input("选择日期", value=pd.to_datetime(cur_date))
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
settings = {
|
176 |
"max_comments": 99999,
|
177 |
"translate_batch_size": 16,
|
178 |
"sentiment_batch_size": 16,
|
|
|
|
|
179 |
}
|
180 |
|
181 |
with st.sidebar:
|
182 |
st.title("设置")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
st.header("最大获取帖子数")
|
184 |
settings["max_comments"] = st.number_input("Max Comments", 1, 99999, 99999)
|
185 |
|
@@ -192,7 +210,7 @@ with st.sidebar:
|
|
192 |
|
193 |
if st.button("统计"):
|
194 |
with st.spinner("正在加载模型 ..."):
|
195 |
-
classifier = pipeline(task="text-classification", model="
|
196 |
with st.spinner("正在获取当天的帖子 ..."):
|
197 |
comments = get_comments(selected_date, settings["max_comments"])
|
198 |
st.dataframe(comments)
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
|
3 |
from GoogleNews import GoogleNews
|
4 |
import matplotlib.pyplot as plt
|
5 |
import seaborn as sns
|
|
|
172 |
cur_date = datetime.datetime.now().strftime("%Y-%m-%d")
|
173 |
selected_date = st.date_input("选择日期", value=pd.to_datetime(cur_date))
|
174 |
|
175 |
+
model_translate = [
|
176 |
+
"Helsinki-NLP/opus-mt-zh-en"
|
177 |
+
]
|
178 |
+
|
179 |
+
model_emo_analysis = [
|
180 |
+
"orlco/google-bert-base-cased-fine-tune",
|
181 |
+
"SamLowe/roberta-base-go_emotions"
|
182 |
+
]
|
183 |
+
|
184 |
settings = {
|
185 |
"max_comments": 99999,
|
186 |
"translate_batch_size": 16,
|
187 |
"sentiment_batch_size": 16,
|
188 |
+
"model_translate": model_translate[0],
|
189 |
+
"model_emo_analysis": model_emo_analysis[0]
|
190 |
}
|
191 |
|
192 |
with st.sidebar:
|
193 |
st.title("设置")
|
194 |
+
|
195 |
+
st.header("翻译模型")
|
196 |
+
settings["model_translate"] = st.selectbox("Model", model_translate)
|
197 |
+
|
198 |
+
st.header("情感分析模型")
|
199 |
+
settings["model_emo_analysis"] = st.selectbox("Model", model_emo_analysis)
|
200 |
+
|
201 |
st.header("最大获取帖子数")
|
202 |
settings["max_comments"] = st.number_input("Max Comments", 1, 99999, 99999)
|
203 |
|
|
|
210 |
|
211 |
if st.button("统计"):
|
212 |
with st.spinner("正在加载模型 ..."):
|
213 |
+
classifier = pipeline(task="text-classification", model=settings["model_emo_analysis"], top_k=None)
|
214 |
with st.spinner("正在获取当天的帖子 ..."):
|
215 |
comments = get_comments(selected_date, settings["max_comments"])
|
216 |
st.dataframe(comments)
|