Spaces:
Sleeping
Sleeping
AlanTsai-0329
commited on
Commit
ā¢
92c0979
1
Parent(s):
da121d0
Upload 25 files
Browse files- .gitattributes +1 -0
- Home.py +12 -0
- pages/1_Dashboard.py +122 -0
- pages/Control/Controls.py +0 -0
- pages/Model/Load_Model.py +134 -0
- pages/Model/__pycache__/Load_Model.cpython-39.pyc +0 -0
- pages/model_param/board_classification_model/config.json +47 -0
- pages/model_param/board_classification_model/pytorch_model.bin +3 -0
- pages/model_param/board_classification_model/special_tokens_map.json +7 -0
- pages/model_param/board_classification_model/tokenizer.json +0 -0
- pages/model_param/board_classification_model/tokenizer_config.json +13 -0
- pages/model_param/board_classification_model/training_args.bin +3 -0
- pages/model_param/board_classification_model/vocab.txt +0 -0
- pages/model_param/sentiment_analysis_model/config.json +39 -0
- pages/model_param/sentiment_analysis_model/pytorch_model.bin +3 -0
- pages/model_param/sentiment_analysis_model/special_tokens_map.json +7 -0
- pages/model_param/sentiment_analysis_model/tokenizer.json +0 -0
- pages/model_param/sentiment_analysis_model/tokenizer_config.json +15 -0
- pages/model_param/sentiment_analysis_model/vocab.txt +0 -0
- pages/model_param/summarization_model/config.json +36 -0
- pages/model_param/summarization_model/generation_config.json +11 -0
- pages/model_param/summarization_model/pytorch_model.bin +3 -0
- pages/model_param/summarization_model/special_tokens_map.json +5 -0
- pages/model_param/summarization_model/spiece.model +3 -0
- pages/model_param/summarization_model/tokenizer.json +3 -0
- pages/model_param/summarization_model/tokenizer_config.json +11 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
app/pages/model_param/summarization_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
app/pages/model_param/summarization_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
36 |
+
pages/model_param/summarization_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
Home.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
st.header("ćęę«å°é”ćęåē¤¦å·„")
|
4 |
+
st.subheader("PTT ēé¢åęęēØ")
|
5 |
+
|
6 |
+
st.header("ēµå”åå®")
|
7 |
+
st.markdown("""
|
8 |
+
- 111AB8005 å¼µäŗ姵
|
9 |
+
- 111AB8017 ęē¶å©
|
10 |
+
- 111AB8023 é³ē궵
|
11 |
+
- 111AB8026 č”å°å®
|
12 |
+
""")
|
pages/1_Dashboard.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from pages.Model import Load_Model
|
3 |
+
import warnings
|
4 |
+
warnings.filterwarnings("ignore")
|
5 |
+
|
6 |
+
st.set_page_config(
|
7 |
+
page_title="PTT ēé¢ęēØ App",
|
8 |
+
page_icon="š§",
|
9 |
+
layout="wide",
|
10 |
+
initial_sidebar_state="expanded",
|
11 |
+
menu_items={
|
12 |
+
'About': """ēµå”åå®ļ¼å¼µäŗ姵ćęē¶å©ćé³ē궵ćč”å°å®""",
|
13 |
+
}
|
14 |
+
)
|
15 |
+
|
16 |
+
|
17 |
+
sample_text = [
|
18 |
+
'Select',
|
19 |
+
'[åå¦] ęéŗ¼ę²ęäŗŗåØčØč«å·„ę„4.0äŗļ¼ å
©å¹“åéøčēęå å·„ę„4.0ć大ęøęćAIčØč«ēę²øę²øęę ęéŗ¼ä»å¹“éøčēęå 儽åę²ęč½å°ä»»ä½å·„ę„4.0ēę¶ęÆļ¼ ęå¦åļ¼ å£čč¦ęåƦčøēę¹ę³å§ ē¾åØåč·³åŗä¾äŗ ę„ęå
ę¬ä¾åØé«éå°±ęå» äŗå§',
|
20 |
+
'[åå¦] ęę²ęč·ÆäøēŖē¶č®å¾äŗ®ēå
«å¦ļ¼ å°éÆ家éčæēå··å åę¬č·Æéč¦ŗå¾é½ęęē 大ę¦å°±ęÆé£ēØ®ę“č·Æę10ēēåŖéäøå
©ēēēØåŗ¦ äøéååē¶éęēŖē¶č¦ŗå¾ę“č·Æč®å¾č¶
äŗ® åä¾ęęēēé½ęéäŗ č·Æå¤Ŗäŗ®åč儽åęé»äøå¤Ŗēæę
£ ęę²ęäŗŗä¹ē¼ē¾ēę“»åØéēč·ÆēŖē¶č®å¾å¾äŗ® ęå¦åļ¼ ',
|
21 |
+
'Re: [čØč«] ę“č
¦å¤Ŗęå!äøåå¤äŗ¤å®å
¬ē¶č¬éÆę·å²å¦!!!! čÆååę²ē« ē¬¬äŗē« å®å
Øēäŗę ē¬¬23ę¢ č³ä»ęŖę¹ åęäøč¼ēä»ē¶ęÆäøčÆę°å Republic of china CHAPTER V: THE SECURITY COUNCIL COMPOSITION Article 23 The Security Council shall consist of fifteen Members of the United Nations. The Republic of China France the Union of Soviet Socialist Republics the United Kingdom of Great Britain and Northern Ireland and the United States of America shall be permanent members of the Security Council. The General Assembly shall elect ten other Members of the United Nations to be nonpermanent members of the Security Council due regard being specially paid in the first instance to the contribution of Members of the United Nations to the maintenance of international peace and security and to the other purposes of the Organization and also to equitable geographical distribution. å¾å„ęŖå¾äøčÆę°åéåŗčÆåå(1971) äøē¾ę·äŗ¤ä»„å¾ (1979) čå
±ä»ē¶ę²ęéēØä»åå½±éæåå»äæ®ę£čÆååę²ē« ļ¼ę¹ęä»åę³č¦ēēę¬ éå»čÆååę²ē« ä¹äøęÆę²ęäæ®ę£čę”é ä½ęÆå°éåä»åč¦å¦ē¼äøéćčäøåŗēē¬¬23ę¢ äøč¼äøčÆę°åēęå ä»åå»å§ēµę²ęč¾¦ę³äæå
¶ę¹å ęÆäøę³éęÆč¾¦äøå°ļ¼ : äøå
±ę“č
¦å¤§ęå : å°å¼ååēē¾åé»č¦ę°č : å
±åŖå¤äŗ¤å®ē«ē¶äøē„éå
±ē¢é»Øę²ęéäŗę° : éå»ŗč° : äøē¾åå°äŗę°ęęēŗåå¹³ę£ē¾©äø¦č©ä½ę°ēéäæ : éęå°é£čéćé³ē“å¾·ćå²čæŖåØå°č».... : äøåęÆē¬¬äøååØčÆååę²ē« äøē°½å... : č²ę»“ éäŗäøé½ęÆäøčÆę°åćåę°é»Øēę·å²åļ¼ : ęę²ęå¦ļ¼ : č£ååå
§å ±å°ļ¼ : č”č±ęēēč¦å„½å„½ēęäøäøäøčÆę°å ä½ēŗäøååęēäøä¾č³ē¢ēē¹¼ęæč
å
¶åƦäø¦äø容ę äøč¦č¦ŗå¾å„½åę²äŗäøčÆę°åä»ååčµ·äŗä¾ę“č¼é¬ ęēēę³ęÆčÆååę²ē« åę ęRepublic of China ē“ę„ę¹ęChinaćåŖęÆäøååä½ äø¦äøęÆä»éŗ¼ę¶å¾ę¹ēåé”ļ¼ę³åäøä¹ē“ę„å« Franceļ¼ ę“å¤ēå ē“ åÆč½éåØčå¾ęæę²»åę© ę³ēµ±å°ä½ē“ē å°±éęØ£ć',
|
22 |
+
'[éč] č¼å¤ä¹å¾éęęęåęø¬č©¦åļ¼ å°±ęÆå 仄å¾č¼å¤åé件äŗé½éč¦ę©åé
å åÆęÆē¾åØēę
ę³ä¹å¾éč®ę©åå»åä¹å¤ŖęŖäŗå§ čäøäŗ¬é½äøå½¹ä¹č®č¼å¤äøåēååéåč”ę é£ä¹å¾č¼å¤éęęęåęø¬č©¦åļ¼ ',
|
23 |
+
'Re: [åå¦] 儹č·ęčŖŖéčµ°äøåŗä¾ęÆä»éŗ¼ęęļ¼ å äŗŗčē°ļ¼éč£”ęÆę©ę°å®¢å®¶äŗŗć : : åč·äøååę”ē儳ēåē½äŗ : ę仄ēŗęåäŗę儽ę : ēµę儹č·ęčŖŖ儹éčµ°äøåŗä¾äøäøꮵ : ęéęę©ęåļ¼ : ę±č§£ éēØ®äŗę
ę ¹ęęēäŗŗēē¶é© äøęÆå„¹čµ°äøåŗä¾ļ¼ęÆäøę³čµ°åŗä¾ć ēę³å¾ē¾ę»æļ¼ē¾åƦå¾éŖØęć č¦äøč¦å°±č©¦äø試ęē„é 試é½äø試ē“ę„ęēµä½ ēļ¼å°±ęÆęä½ ē¶ē½ē”ć ä»å¤©ęēęęÆå½±ē č”čØŖåęä½ęÆē¶ęåēę
侶 äŗ¤å¾éå幓ćåęå幓 儳ēåØč¢å¹åé¢ē“ę„čŖŖäøę³č·ē·ēē¶ęå å¹¹åæ«ē¬ę» ē·ēé½åæ«ååŗä¾äŗļ¼éč¦č¢«č¬éēØ®č©±ć å¦ęäøęÆé£åå½±ēč¢«åŖäŗļ¼ęč©²ęÆč¢«å“ēļ¼ ęęč²¼äøä¾č®å¤§å®¶ēē äøčæ°å½±ēå°č±”äøęÆ WebTVasis Taiwan ē ēä»åęäøęåę¾åŗä¾ ēµč«ļ¼ å°é儹ļ¼ę¾äøäøåć åę£ä½ åäøęå»čŖę®ŗ ę³čŖę®ŗéå£pęæēäøäø SOP čØå¾å¹«čŖå·±ęŗå儽å¾äŗļ¼å„åå°å„äŗŗć é½å¹¾ę²äŗ čŖå·±äøč¦ē¶ē½ē”就儽äŗ åŖ天éęäøč”čØŖč¢«å
«å¦é
øę°å“ éēØ®äŗę
ęÆå āäŗŗāčē°ē ē©“ē©“ęäŗ¤ ä½č
hancel (hancel) ēęæ Gossiping ęØé” Re: [åå¦] 儳ååŗåäŗ¤ęäø幓 åęēę©ēå¤å°ļ¼ ęé Tue Jul 24 02:42:26 2018',
|
24 |
+
'Re: [éč] č„æęÆęæę儳ēč¦å
é¤ē·ē č±Ŗä¹
åŖęēč„æęÆēå©0.0 éäø幓å
§č±Ŗåäøꬔé½åŖę ęÆčŖŖå© äŗ¤å¾éēč„æęÆē ä¹å¾åÆę0.0 č·äŗ¤å¾ę»äŗ¤åč»é«äøęØ£ ēå„äŗŗē¼ę§ęę åęJå ē©ŗčåŖ0.0 ē¾åØē“ę„ēęē®åÆ¦ę³ ęÆč¼ę¹ä¾æ0.0 ',
|
25 |
+
'[åƦę³] ččēē
éå·„ęæ2 ēēē³»ļæ½ļæ½é¢äøęę²ęę¹åä»éŗ¼ YT: åå„ē¶²ååØē°½åęŖ éæē©ŗåƦę³å° äø»č¦åƦę³PSåå°éęę©éę²ļ¼å¶ē¾ē©ē©PCéę²ļ¼ęÆä¾å¤§ę¦98.9:1:0.1 åƦę³ęéēŗęÆę„21:00~24:00 ęéęåå¾1äøå°ęļ¼ē¢ŗå®ä¼ęÆęåäø天ēµęęåē„ ē®ęØęÆēØå“ęBOSS Twitch : Youtube : å·“åå°å± : ēæę
£åÆ«é·ęļ¼ęęē« äæååØé UtsuhoReiuzi :č½éč³ēęæ PlayStation 12/03 20:07',
|
26 |
+
'[é»ē¹] åøåƶä½ę»æäŗŗļ¼ä»£č”Øåøåƶäøč²“ļ¼ ęÆē²čŖŖļ¼ęå«ē¤¾å®
ę»æē§äŗļ¼ä»£č”Øęå«ē¤¾å®
äøč²“ åøåƶä¹å¾ę¶ęćä½å¾ę»æļ¼ęÆäøęÆ代č”Øåøåƶä¹äøč²“å¢ļ¼ ę³äøå°åęå„ééŗ¼å§å±ļ¼åŖč½č·å„äŗŗę äøč²“åä½ę»æäŗŗēåøåƶ ',
|
27 |
+
'[ę°č] éæäøéØé·ē«ęÆę©å¦¹é«ęļ¼éå„éØå£é»ęčØč
éæäøéØé·ē«ęÆę©å¦¹é«ęéå„éØå£å°±ä¾é»ęčØč
ē«ę
ęę®äøåæčØč
ęäøēø½ęÆę²ē©©ēęę®å®é³ęäøļ¼ å
¶åƦęÆåčŖæē®å¤§ē·å©ļ¼éęÆåę©å¦¹é«ę ļ¼ćčŖē±čæ½ę°čćę”čØŖåéå°čØŖé³ęäøļ¼é¤äŗåęē«ę
ļ¼ä¹ęéé³ęäøč¼å°åØé”é ååŗē¾ ēäøé¢ ę¦ę¼¢čŗēļ¼ę°åå ēē
ęÆē
ļ¼COVID19ļ¼ē«ę
ēē¼ä»„ä¾ļ¼é³ęäøäøåŗ¦é£ēŗäø»ęé¾ē¾å “čØč
ęļ¼č¢«ę„åŖ形容ēŗéµäŗŗéØé·ļ¼ęŗ«åćēę§äøäø失äŗŗęéę·ēē¼čØļ¼äøåŖē©©ē©©ę§å¶ē«ę
ļ¼ä¹č®ä»åē²ē”ęøć 夫妻ēøčå°é¤ēä¹é é³ęäøåēåæ話 å½±ēäøļ¼é³ęäøé¢å°čØč
ēę©å¦¹éå„čé©ļ¼äøä½äøäøę„ęļ¼éå±ē¾ę„ęŗčŖåµé²ē«ę© 妹éå„ļ¼ äøå
ę©ååč¶³äøå¾åč¶
å¼·ļ¼č®čØč
č½å°é½ē“å¼åę»ęäŗ é³ęäøä¹åØå°čØŖäøéé²ä»ēé¤ēä¹éćęęēå°ē£å°åć夫妻ę¾éēµęļ¼ä»„åē«ę
éå¾ę ę³åēäŗ åæ«é»éå½±ēļ¼ēé³ęäøåäŗ«é²ē«ä»„å¤ēēåæ話ć éæäøēēč¶
åÆę é£ęŖęęčØč
é»ę ęø¾čŗ«äøäøé½ę£ē¼åŗęēēé
å å¦ęå幓č¼é»ę©é»åŗé åÆč½å°±ęÆę°é²é»Øēåę¾¤ē“ęعęęÆéåę¦äŗ ',
|
28 |
+
'Re: [å§åŖ] äø ęę³ éå°±ęÆęč·å¤§č”ēč·é¢äŗ 大č”éē¬å”å” ęåŖč½č®å”å”ē½µęęä½ åŖ½ '
|
29 |
+
]
|
30 |
+
|
31 |
+
|
32 |
+
@st.cache_resource
|
33 |
+
def load_all_model():
|
34 |
+
classify_model = Load_Model.Bert_Classify_Model()
|
35 |
+
classify_model.load_model()
|
36 |
+
|
37 |
+
senti_model = Load_Model.Sentiment_Model()
|
38 |
+
senti_model.load_model()
|
39 |
+
|
40 |
+
summarize_model = Load_Model.Summarization_Model()
|
41 |
+
summarize_model.load_model()
|
42 |
+
return classify_model, senti_model, summarize_model
|
43 |
+
|
44 |
+
def text_area_widget():
|
45 |
+
input_content = st.selectbox(
|
46 |
+
"ä½æēØēÆä¾ęē« ",
|
47 |
+
sample_text,
|
48 |
+
index=0
|
49 |
+
)
|
50 |
+
|
51 |
+
if input_content == "Select":
|
52 |
+
input_content = st.text_area(
|
53 |
+
label="č«č¼øå
„ęē« ..."
|
54 |
+
)
|
55 |
+
else:
|
56 |
+
input_content = st.text_area(
|
57 |
+
label="č«č¼øå
„ęē« ...",
|
58 |
+
value=input_content
|
59 |
+
)
|
60 |
+
return input_content
|
61 |
+
|
62 |
+
|
63 |
+
def make_result_button(classify_model, senti_model, summarize_model, input_content):
|
64 |
+
if st.button("ē¢åŗęę"):
|
65 |
+
if input_content == "":
|
66 |
+
st.error('č«å
č¼øå
„ęØę³åęēęē« ęē“ę„éøęēÆä¾ęē« ļ¼', icon="šØ")
|
67 |
+
else:
|
68 |
+
with st.spinner('Wait for it...'):
|
69 |
+
run_analysis(classify_model, senti_model, summarize_model, input_content)
|
70 |
+
st.balloons()
|
71 |
+
|
72 |
+
def run_analysis(classify, senti, summarize, input):
|
73 |
+
tab_summarize, tab_classify, tab_sentiment = st.tabs(["ęē« ęč¦", "ēé¢é ęø¬ęę", "ę
ē·åę"])
|
74 |
+
|
75 |
+
with tab_summarize:
|
76 |
+
st.subheader("ęē« ęč¦")
|
77 |
+
st.caption('仄äøēŗęØēęē« ęč¦ć')
|
78 |
+
st.write(summarize.run_summarize(input))
|
79 |
+
|
80 |
+
with tab_classify:
|
81 |
+
st.subheader("ēé¢é ęø¬ęę")
|
82 |
+
st.caption('仄äøēŗēé¢é ęø¬ēę©ēć')
|
83 |
+
classify_result = classify.predict(input).round(4)
|
84 |
+
st.write(f"ęØēęē« ęęåÆč½ęÆ {classify_result.iloc[0, 0]}ļ¼åÆč½ę§ēŗ {classify_result['ę©ē'].max()*100:2f} %")
|
85 |
+
with st.expander("ę„ēęęēé¢é ęø¬ę©ē"):
|
86 |
+
st.dataframe(
|
87 |
+
data=classify_result,
|
88 |
+
use_container_width=True
|
89 |
+
)
|
90 |
+
|
91 |
+
with tab_sentiment:
|
92 |
+
st.subheader("ę
ē·åęęę")
|
93 |
+
st.caption('仄äøēŗęØēęē« ę
ē·')
|
94 |
+
senti_result = senti.run_sentiment(input)[0]
|
95 |
+
st.write(f"ęØēęē« ę
ē·ēŗ {senti_result['label']}ļ¼åęøēŗ {senti_result['score']}")
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
def main():
|
100 |
+
# first init model
|
101 |
+
classify_model, senti_model, summarize_model = load_all_model()
|
102 |
+
|
103 |
+
# page design
|
104 |
+
head_section = st.container()
|
105 |
+
ana_section = st.container()
|
106 |
+
output_section = st.container()
|
107 |
+
|
108 |
+
with head_section:
|
109 |
+
st.title("Dashboard")
|
110 |
+
st.divider()
|
111 |
+
|
112 |
+
with ana_section:
|
113 |
+
input_content = text_area_widget()
|
114 |
+
st.divider()
|
115 |
+
|
116 |
+
with output_section:
|
117 |
+
make_result_button(classify_model, senti_model, summarize_model, input_content)
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
if __name__ == '__main__':
|
122 |
+
main()
|
pages/Control/Controls.py
ADDED
File without changes
|
pages/Model/Load_Model.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import accelerate
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from pathlib import Path
|
7 |
+
from transformers import AutoModelForSequenceClassification, BertTokenizerFast, pipeline
|
8 |
+
|
9 |
+
accelerator = accelerate.Accelerator(cpu=True)
|
10 |
+
|
11 |
+
class LoadException(Exception):
|
12 |
+
...
|
13 |
+
|
14 |
+
class LoadModelException(Exception):
|
15 |
+
...
|
16 |
+
|
17 |
+
class LoadTokenizerException(Exception):
|
18 |
+
...
|
19 |
+
|
20 |
+
class DIR:
|
21 |
+
MODEL_DIR = Path("pages/model_param")
|
22 |
+
CLASSIFIER_MODEL_DIR = Path(f"{MODEL_DIR}/board_classification_model")
|
23 |
+
SENTIMENT_MODEL_DIR = Path(f"{MODEL_DIR}/sentiment_analysis_model")
|
24 |
+
SUMMARIZATION_MODEL_DIR = Path(f"{MODEL_DIR}/summarization_model")
|
25 |
+
|
26 |
+
class Bert_Classify_Model:
|
27 |
+
def __init__(self):
|
28 |
+
self.tokenizer_loaded = False
|
29 |
+
self.model_loaded = False
|
30 |
+
|
31 |
+
def load_model(self):
|
32 |
+
try:
|
33 |
+
self.tokenizer = BertTokenizerFast.from_pretrained(
|
34 |
+
pretrained_model_name_or_path=DIR.CLASSIFIER_MODEL_DIR,
|
35 |
+
local_files_only=True
|
36 |
+
)
|
37 |
+
self.tokenizer_loaded = True
|
38 |
+
except LoadTokenizerException:
|
39 |
+
raise "Tokenizer not loaded."
|
40 |
+
|
41 |
+
try:
|
42 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
43 |
+
pretrained_model_name_or_path=DIR.CLASSIFIER_MODEL_DIR,
|
44 |
+
local_files_only=True,
|
45 |
+
num_labels=4
|
46 |
+
)
|
47 |
+
self.model_loaded = True
|
48 |
+
except LoadModelException:
|
49 |
+
raise "Model not loaded."
|
50 |
+
|
51 |
+
@staticmethod
|
52 |
+
def __make_output(outputs):
|
53 |
+
id2label = {
|
54 |
+
"0": "C_Chat",
|
55 |
+
"1": "Gossiping",
|
56 |
+
"2": "HatePolotics",
|
57 |
+
"3": "Marginalman"
|
58 |
+
}
|
59 |
+
|
60 |
+
pred_prob = F.softmax(outputs.logits)
|
61 |
+
pred_prob_df = (
|
62 |
+
pd.DataFrame({
|
63 |
+
"ēé¢": id2label.values(),
|
64 |
+
"ę©ē": pred_prob[0, :].detach().numpy()
|
65 |
+
})
|
66 |
+
.sort_values(by="ę©ē", ascending=False)
|
67 |
+
)
|
68 |
+
return pred_prob_df
|
69 |
+
|
70 |
+
def predict(self, text):
|
71 |
+
if (not self.tokenizer_loaded) and (not self.model_loaded):
|
72 |
+
raise LoadException("Not loaded.")
|
73 |
+
|
74 |
+
token_text = self.tokenizer(
|
75 |
+
text,
|
76 |
+
padding=True,
|
77 |
+
truncation=True,
|
78 |
+
return_tensors='pt'
|
79 |
+
)
|
80 |
+
|
81 |
+
outputs = self.model(**token_text)
|
82 |
+
result = self.__make_output(outputs)
|
83 |
+
return result
|
84 |
+
|
85 |
+
|
86 |
+
class Sentiment_Model:
|
87 |
+
def __init__(self):
|
88 |
+
self.model_loaded = False
|
89 |
+
|
90 |
+
def load_model(self):
|
91 |
+
try:
|
92 |
+
self.model = pipeline(
|
93 |
+
"sentiment-analysis",
|
94 |
+
DIR.SENTIMENT_MODEL_DIR,
|
95 |
+
)
|
96 |
+
self.model_loaded = True
|
97 |
+
|
98 |
+
except LoadModelException:
|
99 |
+
raise "Model not loaded."
|
100 |
+
|
101 |
+
def run_sentiment(self, text):
|
102 |
+
if not self.model_loaded:
|
103 |
+
raise LoadModelException("model not loaded.")
|
104 |
+
outputs = self.model(text)
|
105 |
+
return outputs
|
106 |
+
|
107 |
+
class Summarization_Model:
|
108 |
+
def __init__(self):
|
109 |
+
self.model_loaded = False
|
110 |
+
|
111 |
+
def load_model(self):
|
112 |
+
try:
|
113 |
+
self.model = pipeline(
|
114 |
+
"summarization",
|
115 |
+
DIR.SUMMARIZATION_MODEL_DIR
|
116 |
+
)
|
117 |
+
self.model_loaded = True
|
118 |
+
except LoadModelException:
|
119 |
+
raise "Model not loaded."
|
120 |
+
|
121 |
+
self.model_loaded = True
|
122 |
+
|
123 |
+
@staticmethod
|
124 |
+
def __make_output(outputs):
|
125 |
+
return outputs[0]["summary_text"]
|
126 |
+
|
127 |
+
def run_summarize(self, text):
|
128 |
+
if not self.model_loaded:
|
129 |
+
raise LoadModelException("model not loaded.")
|
130 |
+
outputs = self.model(text, max_length=1024)
|
131 |
+
result = self.__make_output(outputs)
|
132 |
+
return result
|
133 |
+
|
134 |
+
|
pages/Model/__pycache__/Load_Model.cpython-39.pyc
ADDED
Binary file (4.67 kB). View file
|
|
pages/model_param/board_classification_model/config.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ckiplab/albert-tiny-chinese",
|
3 |
+
"architectures": [
|
4 |
+
"AlbertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"bos_token_id": 101,
|
8 |
+
"classifier_dropout_prob": 0.1,
|
9 |
+
"down_scale_factor": 1,
|
10 |
+
"embedding_size": 128,
|
11 |
+
"eos_token_id": 102,
|
12 |
+
"gap_size": 0,
|
13 |
+
"hidden_act": "gelu",
|
14 |
+
"hidden_dropout_prob": 0.0,
|
15 |
+
"hidden_size": 312,
|
16 |
+
"id2label": {
|
17 |
+
"0": "LABEL_0",
|
18 |
+
"1": "LABEL_1",
|
19 |
+
"2": "LABEL_2",
|
20 |
+
"3": "LABEL_3"
|
21 |
+
},
|
22 |
+
"initializer_range": 0.02,
|
23 |
+
"inner_group_num": 1,
|
24 |
+
"intermediate_size": 1248,
|
25 |
+
"label2id": {
|
26 |
+
"LABEL_0": 0,
|
27 |
+
"LABEL_1": 1,
|
28 |
+
"LABEL_2": 2,
|
29 |
+
"LABEL_3": 3
|
30 |
+
},
|
31 |
+
"layer_norm_eps": 1e-12,
|
32 |
+
"max_position_embeddings": 512,
|
33 |
+
"model_type": "albert",
|
34 |
+
"net_structure_type": 0,
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_groups": 1,
|
37 |
+
"num_hidden_layers": 4,
|
38 |
+
"num_memory_blocks": 0,
|
39 |
+
"pad_token_id": 0,
|
40 |
+
"position_embedding_type": "absolute",
|
41 |
+
"problem_type": "single_label_classification",
|
42 |
+
"tokenizer_class": "BertTokenizerFast",
|
43 |
+
"torch_dtype": "float32",
|
44 |
+
"transformers_version": "4.28.0",
|
45 |
+
"type_vocab_size": 2,
|
46 |
+
"vocab_size": 21128
|
47 |
+
}
|
pages/model_param/board_classification_model/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a0cb8aa2000d5211faebaf9d40c945f51dece7fdceef3435349b698217157f4
|
3 |
+
size 16340421
|
pages/model_param/board_classification_model/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
pages/model_param/board_classification_model/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/model_param/board_classification_model/tokenizer_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"do_lower_case": false,
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"model_max_length": 512,
|
7 |
+
"pad_token": "[PAD]",
|
8 |
+
"sep_token": "[SEP]",
|
9 |
+
"strip_accents": null,
|
10 |
+
"tokenize_chinese_chars": true,
|
11 |
+
"tokenizer_class": "BertTokenizer",
|
12 |
+
"unk_token": "[UNK]"
|
13 |
+
}
|
pages/model_param/board_classification_model/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:546fa189039a1bdaf4bbc81c76bbfffec575c533bb90613c836d519d7ec2e832
|
3 |
+
size 3579
|
pages/model_param/board_classification_model/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/model_param/sentiment_analysis_model/config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "IDEA-CCNL/Erlangshen-Roberta-110M-Sentiment",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"eos_token_id": 2,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "Negative",
|
16 |
+
"1": "Positive"
|
17 |
+
},
|
18 |
+
"initializer_range": 0.02,
|
19 |
+
"intermediate_size": 3072,
|
20 |
+
"label2id": null,
|
21 |
+
"layer_norm_eps": 1e-12,
|
22 |
+
"max_position_embeddings": 512,
|
23 |
+
"model_type": "bert",
|
24 |
+
"num_attention_heads": 12,
|
25 |
+
"num_hidden_layers": 12,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 1,
|
28 |
+
"pooler_fc_size": 768,
|
29 |
+
"pooler_num_attention_heads": 12,
|
30 |
+
"pooler_num_fc_layers": 3,
|
31 |
+
"pooler_size_per_head": 128,
|
32 |
+
"pooler_type": "first_token_transform",
|
33 |
+
"position_embedding_type": "absolute",
|
34 |
+
"torch_dtype": "float32",
|
35 |
+
"transformers_version": "4.28.0",
|
36 |
+
"type_vocab_size": 2,
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 21128
|
39 |
+
}
|
pages/model_param/sentiment_analysis_model/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:615d7fe9040cbb298050a3a60797ced537736ef1dcf386199fa8b419098112d7
|
3 |
+
size 409146741
|
pages/model_param/sentiment_analysis_model/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
pages/model_param/sentiment_analysis_model/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/model_param/sentiment_analysis_model/tokenizer_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"do_basic_tokenize": true,
|
5 |
+
"do_lower_case": true,
|
6 |
+
"mask_token": "[MASK]",
|
7 |
+
"model_max_length": 1000000000000000019884624838656,
|
8 |
+
"never_split": null,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"strip_accents": null,
|
12 |
+
"tokenize_chinese_chars": true,
|
13 |
+
"tokenizer_class": "BertTokenizer",
|
14 |
+
"unk_token": "[UNK]"
|
15 |
+
}
|
pages/model_param/sentiment_analysis_model/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/model_param/summarization_model/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "csebuetnlp/mT5_multilingual_XLSum",
|
3 |
+
"architectures": [
|
4 |
+
"MT5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"d_ff": 2048,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 768,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dense_act_fn": "gelu_new",
|
11 |
+
"dropout_rate": 0.1,
|
12 |
+
"eos_token_id": 1,
|
13 |
+
"feed_forward_proj": "gated-gelu",
|
14 |
+
"initializer_factor": 1.0,
|
15 |
+
"is_encoder_decoder": true,
|
16 |
+
"is_gated_act": true,
|
17 |
+
"layer_norm_epsilon": 1e-06,
|
18 |
+
"length_penalty": 0.6,
|
19 |
+
"max_length": 84,
|
20 |
+
"model_type": "mt5",
|
21 |
+
"no_repeat_ngram_size": 2,
|
22 |
+
"num_beams": 4,
|
23 |
+
"num_decoder_layers": 12,
|
24 |
+
"num_heads": 12,
|
25 |
+
"num_layers": 12,
|
26 |
+
"output_past": true,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"relative_attention_max_distance": 128,
|
29 |
+
"relative_attention_num_buckets": 32,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"tokenizer_class": "T5Tokenizer",
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.29.2",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 250112
|
36 |
+
}
|
pages/model_param/summarization_model/generation_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"decoder_start_token_id": 0,
|
4 |
+
"eos_token_id": 1,
|
5 |
+
"length_penalty": 0.6,
|
6 |
+
"max_length": 84,
|
7 |
+
"no_repeat_ngram_size": 2,
|
8 |
+
"num_beams": 4,
|
9 |
+
"pad_token_id": 0,
|
10 |
+
"transformers_version": "4.29.2"
|
11 |
+
}
|
pages/model_param/summarization_model/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73285124713d581b135f95f92f49e70f24f6fa04f93ccf3bf8d6ed68d2f42a8c
|
3 |
+
size 2329698485
|
pages/model_param/summarization_model/special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": "</s>",
|
3 |
+
"pad_token": "<pad>",
|
4 |
+
"unk_token": "<unk>"
|
5 |
+
}
|
pages/model_param/summarization_model/spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
|
3 |
+
size 4309802
|
pages/model_param/summarization_model/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93c3578052e1605d8332eb961bc08d72e246071974e4cc54aa6991826b802aa5
|
3 |
+
size 16330369
|
pages/model_param/summarization_model/tokenizer_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": null,
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"extra_ids": 0,
|
6 |
+
"model_max_length": 1000000000000000019884624838656,
|
7 |
+
"pad_token": "<pad>",
|
8 |
+
"sp_model_kwargs": {},
|
9 |
+
"tokenizer_class": "T5Tokenizer",
|
10 |
+
"unk_token": "<unk>"
|
11 |
+
}
|