Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import csv | |
| st.set_page_config(page_title="Reassuring Parables") | |
| st.title("Reassuring Parables generator - by Allen Roush") | |
| st.caption("Find me on Linkedin: https://www.linkedin.com/in/allen-roush-27721011b/") | |
| st.image("https://imgs.xkcd.com/comics/reassuring.png") | |
| st.caption("From https://xkcd.com/1263/") | |
| # instantiate | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| # load (supports t5, mt5, byT5 models) | |
| #model.from_pretrained("t5","t5-base") | |
| source_text = ["Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never", | |
| "Computers will never",] | |
| target_text = ["Computers will never understand a sonnet", | |
| "Computers will never enjoy a salad", | |
| "Computers will never know how to love", | |
| "Computers will never know how to smell", | |
| "Computers will never have a sense of being", | |
| "Computers will never feel", | |
| "Computers will never appreciate art", | |
| "Computers will never have good manners", | |
| "Computers will never understand god", | |
| "Computers will never solve the halting problem", | |
| "Computers will never be conscious", | |
| "Computers will never prove that they aren't P-zombies", | |
| "Computers will never replace the human brain", | |
| "Computers will never write better reassuring parables than humans" | |
| "Computers will never replace humans"] | |
| #full_df = pd.DataFrame(list(zip(source_text, target_text)), columns = ["source_text", "target_text"]) | |
| #print(full_df) | |
| #train_df, eval_df = train_test_split(full_df, test_size = 0.2) | |
| def train_model(): | |
| model.train(train_df=train_df, # pandas dataframe with 2 columns: source_text & target_text | |
| eval_df=eval_df, # pandas dataframe with 2 columns: source_text & target_text | |
| source_max_token_len = 512, | |
| target_max_token_len = 128, | |
| batch_size = 1, | |
| max_epochs = 4, | |
| use_gpu = True, | |
| outputdir = "/home/lain/lain/CX_DB8/outputs", | |
| early_stopping_patience_epochs = 0, | |
| precision = 32 | |
| ) | |
| #train_model() | |
| # load trained T5 model | |
| with st.spinner("Please wait while the model loads:"): | |
| tokenizer = AutoTokenizer.from_pretrained("Hellisotherpeople/T5_Reassuring_Parables") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("Hellisotherpeople/T5_Reassuring_Parables") | |
| form = st.sidebar.form("choose_settings") | |
| form.header("Main Settings") | |
| number_of_parables = form.number_input("Select how many reassuring parables you want to generate", value = 20, max_value = 1000) | |
| max_length_of_parable = form.number_input("What's the max length of the parable?", value = 20, max_value = 128) | |
| min_length_of_parable = form.number_input("What's the min length of the parable?", value = 0, max_value = max_length_of_parable) | |
| top_k = form.number_input("What value of K should we use for Top-K sampling? Set to zero to disable", value = 50) | |
| form.caption("In Top-K sampling, the K most likely next words are filtered and the probability mass is redistributed among only those K next words. ") | |
| top_p = form.number_input("What value of P should we use for Top-p sampling? Set to zero to disable", value = 0.95, max_value = 1.0, min_value = 0.0) | |
| form.caption("Top-p sampling chooses from the smallest possible set of words whose cumulative probability exceeds the probability p. The probability mass is then redistributed among this set of words.") | |
| temperature = form.number_input("How spicy/interesting do we want our models output to be", value = 1.05, min_value = 0.0) | |
| form.caption("Setting this higher decreases the likelihood of high probability words and increases the likelihood of low probability (and presumably more interesting) words") | |
| form.caption("For more details on what these settings mean, see here: https://huggingface.co/blog/how-to-generate") | |
| form.form_submit_button("Generate some Reassuring Parables!") | |
| #seed_value = st.sidebar.number_input("Select a seed value - change this to get different output", 42) ## Doesn't work :( | |
| with st.spinner("Generating Reassuring Parables"): | |
| input_ids = tokenizer.encode("Computers will never", return_tensors='pt') | |
| sample_outputs = model.generate( | |
| input_ids, | |
| do_sample=True, | |
| max_length=max_length_of_parable, | |
| min_length=min_length_of_parable, | |
| top_k=top_k, | |
| top_p=top_p, | |
| num_return_sequences=number_of_parables, | |
| temperature=temperature | |
| ) | |
| #pl.seed_everything(seed_value) | |
| list_of_parables = [] | |
| for i, sample_output in enumerate(sample_outputs): | |
| list_of_parables.append(tokenizer.decode(sample_output, skip_special_tokens=True)) | |
| st.write(list_of_parables) | |