Spaces:

BigSalmon
/

GPT2_Most_Probable

Runtime error

App Files Files Community

Simon Salmon commited on Oct 11, 2021

Commit

5e48936

1 Parent(s): fc757ce

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -115

app.py CHANGED Viewed

@@ -1,117 +1,35 @@
 import streamlit as st
-from huggingface_hub import InferenceApi
 import pandas as pd
-from transformers import pipeline
-STYLE = """
-<style>
-img {
-    max-width: 100%;
-}
-th {
-    text-align: left!important
-}
-td {
-    font-size:
-}
-</style>
-"""
-MASK_TOKEN = "<mask>"
-EMOJI_MAP = {
-    "anger": "😡",
-    "fear": "😱",
-    "happy": "😄",
-    "love": "😍",
-    "sadness": "😭",
-    "positive": "🤗",
-    "negative": "😤",
-    "neutral": "😐",
-}
-def display_table(df: pd.DataFrame, subheader: str):
-    st.subheader(subheader)
-    st.table(df)
-def setup():
-    st.markdown(STYLE, unsafe_allow_html=True)
-    st.markdown(
-        """
-        # 🇮🇩 Indonesian RoBERTa Base
-        Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base).
-        """
-    )
-    st.sidebar.subheader("Settings")
-def main():
-    setup()
-    analyze = st.sidebar.selectbox(
-        "What should we analyze?",
-        ("Emotion", "Sentiment"),
-        help="Classifier model to choose for text analysis",
-    )
-    user_input = st.text_input(
-        f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
-        value=f"Gila! Hari ini aku {MASK_TOKEN} banget..",
-    )
-    mlm_model = "BigSalmon/BestMask2"
-    mask_api = InferenceApi(mlm_model)
-    if analyze == "Emotion":
-        sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
-    elif analyze == "Sentiment":
-        sa_model = "w11wo/indonesian-roberta-base-sentiment-classifier"
-    sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)
-    if len(user_input) > 0:
-        try:
-            user_input.index(MASK_TOKEN)
-        except ValueError:
-            st.error(
-                f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
-            )
-        else:
-            # render masked language modeling table
-            mlm_result = mask_api(inputs=user_input)
-            if mlm_result == None:
-                st.write("Model is loading. Please try again later...")
-                return
-            mlm_df = pd.DataFrame(mlm_result)
-            mlm_df.drop(columns=["token", "token_str"], inplace=True)
-            mlm_df_styled = mlm_df.copy(deep=False)
-            mlm_df_styled = mlm_df_styled.style.set_properties(
-                subset=["sequence", "score"], **{"text-align": "left"}
-            )
-            display_table(mlm_df_styled, "🎈 Top 5 Predictions")
-            # render sentiment analysis table
-            sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
-            for sequence in mlm_df["sequence"].values:
-                sa_output = sa_pipeline(sequence)  # predict for every mlm output
-                result_dict = {"sequence": sequence}
-                result_dict.update(sa_output[0])
-                sa_df = sa_df.append(result_dict, ignore_index=True)
-            sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOJI_MAP[x])
-            sa_df_styled = sa_df.copy(deep=False)
-            sa_df_styled = sa_df_styled.style.set_properties(
-                subset=["sequence", "label", "score"], **{"text-align": "left"}
-            )
-            display_table(sa_df_styled, "🤔 By saying that, I guess you are feeling..")
-if __name__ == "__main__":
-    main()

 import streamlit as st
+import numpy as np
 import pandas as pd
+import os
+import torch
+import torch.nn as nn
+from transformers import ElectraModel, AutoConfig, GPT2LMHeadModel
+from transformers.activations import get_activation
+from transformers import AutoTokenizer
+st.title('KoGPT2 Demo')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained("skt/kogpt2-base-v2")
+model = GPT2LMHeadModel.from_pretrained('skt/kogpt2-base-v2')
+with st.form(key='my_form'):
+    text_input = st.text_input(label='Enter sentence')
+    submit_button = st.form_submit_button(label='Submit')
+    if submit_button:
+      with torch.no_grad():
+        inputs = tokenizer.encode(text_input)
+        gen_ids = model.generate(torch.tensor([inputs]),
+                           max_length=128,
+                           repetition_penalty=2.0,
+                           pad_token_id=tokenizer.pad_token_id,
+                           eos_token_id=tokenizer.eos_token_id,
+                           bos_token_id=tokenizer.bos_token_id,
+                           use_cache=True)
+        generated = tokenizer.decode(gen_ids[0,:].tolist())
+        st.write(generated)