GPT2Tokenizer / app.py
BigSalmon's picture
Update app.py
b4c807e
raw
history blame
911 Bytes
import streamlit as st
from transformers import AutoTokenizer
import torch
@st.cache(allow_output_mutation=True)
def get_model():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return tokenizer
tokenizer = get_model()
bad_words = st.text_input("Words You Do Not Want Generated", " core lemon height time ")
def run_generate(bad_words):
bad_words = bad_words.split()
bad_word_ids = []
for bad_word in bad_words:
bad_word = " " + bad_word
ids = tokenizer(bad_word).input_ids
ids = str(ids)
ids = ids.replace("]", ": -30").replace("[", "")
bad_word_ids.append(ids)
bad_word_ids = str(bad_word_ids)
bad_word_ids = bad_word_ids.replace("['", "{").replace("']", "}").replace("'", "")
print(bad_word_ids)
return bad_word_ids
if bad_words:
translated_text = run_generate(bad_words)
st.write(translated_text if translated_text else "No translation found")