Simon Salmon commited on
Commit
5e48936
·
1 Parent(s): fc757ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -115
app.py CHANGED
@@ -1,117 +1,35 @@
1
  import streamlit as st
2
- from huggingface_hub import InferenceApi
3
  import pandas as pd
4
- from transformers import pipeline
5
-
6
- STYLE = """
7
- <style>
8
- img {
9
- max-width: 100%;
10
- }
11
-
12
- th {
13
- text-align: left!important
14
- }
15
-
16
- td {
17
- font-size:
18
- }
19
- </style>
20
- """
21
-
22
- MASK_TOKEN = "<mask>"
23
-
24
- EMOJI_MAP = {
25
- "anger": "😡",
26
- "fear": "😱",
27
- "happy": "😄",
28
- "love": "😍",
29
- "sadness": "😭",
30
- "positive": "🤗",
31
- "negative": "😤",
32
- "neutral": "😐",
33
- }
34
-
35
-
36
- def display_table(df: pd.DataFrame, subheader: str):
37
- st.subheader(subheader)
38
- st.table(df)
39
-
40
-
41
- def setup():
42
- st.markdown(STYLE, unsafe_allow_html=True)
43
- st.markdown(
44
- """
45
- # 🇮🇩 Indonesian RoBERTa Base
46
-
47
- Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base).
48
- """
49
- )
50
- st.sidebar.subheader("Settings")
51
-
52
-
53
- def main():
54
- setup()
55
-
56
- analyze = st.sidebar.selectbox(
57
- "What should we analyze?",
58
- ("Emotion", "Sentiment"),
59
- help="Classifier model to choose for text analysis",
60
- )
61
-
62
- user_input = st.text_input(
63
- f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
64
- value=f"Gila! Hari ini aku {MASK_TOKEN} banget..",
65
- )
66
-
67
- mlm_model = "BigSalmon/BestMask2"
68
- mask_api = InferenceApi(mlm_model)
69
-
70
- if analyze == "Emotion":
71
- sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
72
- elif analyze == "Sentiment":
73
- sa_model = "w11wo/indonesian-roberta-base-sentiment-classifier"
74
-
75
- sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)
76
-
77
- if len(user_input) > 0:
78
- try:
79
- user_input.index(MASK_TOKEN)
80
- except ValueError:
81
- st.error(
82
- f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
83
- )
84
- else:
85
- # render masked language modeling table
86
- mlm_result = mask_api(inputs=user_input)
87
-
88
- if mlm_result == None:
89
- st.write("Model is loading. Please try again later...")
90
- return
91
-
92
- mlm_df = pd.DataFrame(mlm_result)
93
- mlm_df.drop(columns=["token", "token_str"], inplace=True)
94
- mlm_df_styled = mlm_df.copy(deep=False)
95
- mlm_df_styled = mlm_df_styled.style.set_properties(
96
- subset=["sequence", "score"], **{"text-align": "left"}
97
- )
98
- display_table(mlm_df_styled, "🎈 Top 5 Predictions")
99
-
100
- # render sentiment analysis table
101
- sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
102
- for sequence in mlm_df["sequence"].values:
103
- sa_output = sa_pipeline(sequence) # predict for every mlm output
104
- result_dict = {"sequence": sequence}
105
- result_dict.update(sa_output[0])
106
- sa_df = sa_df.append(result_dict, ignore_index=True)
107
-
108
- sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOJI_MAP[x])
109
- sa_df_styled = sa_df.copy(deep=False)
110
- sa_df_styled = sa_df_styled.style.set_properties(
111
- subset=["sequence", "label", "score"], **{"text-align": "left"}
112
- )
113
- display_table(sa_df_styled, "🤔 By saying that, I guess you are feeling..")
114
-
115
-
116
- if __name__ == "__main__":
117
- main()
 
1
  import streamlit as st
2
+ import numpy as np
3
  import pandas as pd
4
+ import os
5
+ import torch
6
+ import torch.nn as nn
7
+ from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel
8
+ from transformers.activations import get_activation
9
+ from transformers import AutoTokenizer
10
+
11
+
12
+ st.title('KoGPT2 Demo')
13
+
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained("skt/kogpt2-base-v2")
17
+ model = GPT2LMHeadModel.from_pretrained('skt/kogpt2-base-v2')
18
+
19
+ with st.form(key='my_form'):
20
+ text_input = st.text_input(label='Enter sentence')
21
+ submit_button = st.form_submit_button(label='Submit')
22
+
23
+ if submit_button:
24
+ with torch.no_grad():
25
+ inputs = tokenizer.encode(text_input)
26
+ gen_ids = model.generate(torch.tensor([inputs]),
27
+ max_length=128,
28
+ repetition_penalty=2.0,
29
+ pad_token_id=tokenizer.pad_token_id,
30
+ eos_token_id=tokenizer.eos_token_id,
31
+ bos_token_id=tokenizer.bos_token_id,
32
+ use_cache=True)
33
+ generated = tokenizer.decode(gen_ids[0,:].tolist())
34
+
35
+ st.write(generated)