Spaces:

flax-community
/

spanish-gpt2

Runtime error

File size: 5,352 Bytes

import json
import random
import requests

from mtranslate import translate
import streamlit as st


LOGO = "https://raw.githubusercontent.com/nlp-en-es/assets/main/logo.png"

MODELS = {
    "Model trained on OSCAR": {
        "url": "https://api-inference.huggingface.co/models/flax-community/gpt-2-spanish"
    },
    "Model trained on the Large Spanish Corpus": {
        "url": "https://api-inference.huggingface.co/models/mrm8488/spanish-gpt2"
    },
}

PROMPT_LIST = {
    "Érase una vez...": ["Érase una vez "],
    "¡Hola!": ["¡Hola! Me llamo "],
    "¿Ser o no ser?": ["En mi opinión, 'ser' es "],
}


def query(payload, model_name):
    data = json.dumps(payload)
    print("model url:", MODELS[model_name]["url"])
    response = requests.request(
        "POST", MODELS[model_name]["url"], headers={}, data=data
    )
    return json.loads(response.content.decode("utf-8"))


def process(
    text: str, model_name: str, max_len: int, temp: float, top_k: int, top_p: float
):
    payload = {
        "inputs": text,
        "parameters": {
            "max_new_tokens": max_len,
            "top_k": top_k,
            "top_p": top_p,
            "temperature": temp,
            "repetition_penalty": 2.0,
        },
        "options": {
            "use_cache": True,
        },
    }
    return query(payload, model_name)


# Page
st.set_page_config(page_title="Spanish GPT-2 Demo", page_icon=LOGO)
st.title("Spanish GPT-2")


# Sidebar
st.sidebar.image(LOGO)
st.sidebar.subheader("Configurable parameters")

max_len = st.sidebar.number_input(
    "Maximum length",
    value=100,
    help="The maximum length of the sequence to be generated.",
)

temp = st.sidebar.slider(
    "Temperature",
    value=1.0,
    min_value=0.1,
    max_value=100.0,
    help="The value used to module the next token probabilities.",
)

top_k = st.sidebar.number_input(
    "Top k",
    value=10,
    help="The number of highest probability vocabulary tokens to keep for top-k-filtering.",
)

top_p = st.sidebar.number_input(
    "Top p",
    value=0.95,
    help=" If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.",
)

do_sample = st.sidebar.selectbox(
    "Sampling?",
    (True, False),
    help="Whether or not to use sampling; use greedy decoding otherwise.",
)


# Body
st.markdown(
    """
    Spanish GPT-2 models trained from scratch on two different datasets. One
    model is trained on the Spanish portion of
    [OSCAR](https://huggingface.co/datasets/viewer/?dataset=oscar)
    and the other on the
    [large_spanish_corpus](https://huggingface.co/datasets/viewer/?dataset=large_spanish_corpus)
    aka BETO's corpus.
    
    The models are trained with Flax and using TPUs sponsored by Google since this is part of the
    [Flax/Jax Community Week](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104)
    organised by HuggingFace.
    """
)

model_name = st.selectbox("Model", (list(MODELS.keys())))

ALL_PROMPTS = list(PROMPT_LIST.keys()) + ["Custom"]
prompt = st.selectbox("Prompt", ALL_PROMPTS, index=len(ALL_PROMPTS) - 1)
if prompt == "Custom":
    prompt_box = "Enter your text here"
else:
    prompt_box = random.choice(PROMPT_LIST[prompt])

text = st.text_area("Enter text", prompt_box)

if st.button("Run"):
    with st.spinner(text="Getting results..."):
        st.subheader("Result")
        print(f"maxlen:{max_len}, temp:{temp}, top_k:{top_k}, top_p:{top_p}")
        result = process(
            text=text,
            model_name=model_name,
            max_len=int(max_len),
            temp=temp,
            top_k=int(top_k),
            top_p=float(top_p),
        )
        print("result:", result)
        if "error" in result:
            if type(result["error"]) is str:
                st.write(f'{result["error"]}.', end=" ")
                if "estimated_time" in result:
                    st.write(
                        f'Please try again in about {result["estimated_time"]:.0f} seconds.'
                    )
            else:
                if type(result["error"]) is list:
                    for error in result["error"]:
                        st.write(f"{error}")
        else:
            result = result[0]["generated_text"]
            st.write(result.replace("\n", "  \n"))
            st.text("English translation")
            st.write(translate(result, "en", "es").replace("\n", "  \n"))

st.markdown(
    """
    ### Team members
    - Manuel Romero ([mrm8488](https://huggingface.co/mrm8488))
    - María Grandury ([mariagrandury](https://huggingface.co/mariagrandury))
    - Pablo González de Prado ([Pablogps](https://huggingface.co/Pablogps))
    - Daniel Vera ([daveni](https://huggingface.co/daveni))
    - Sri Lakshmi ([srisweet](https://huggingface.co/srisweet))
    - José Posada ([jdposa](https://huggingface.co/jdposa))
    - Santiago Hincapie ([shpotes](https://huggingface.co/shpotes))
    - Jorge ([jorgealro](https://huggingface.co/jorgealro))
    
    ### More information
    You can find more information about these models in their cards:
    - [Model trained on OSCAR](https://huggingface.co/models/flax-community/gpt-2-spanish)
    - [Model trained on the Large Spanish Corpus](https://huggingface.co/mrm8488/spanish-gpt2)
    """
)