Spaces:

RickMartel
/

GPT2_4_Bible

Sleeping

App Files Files Community

RickMartel commited on Feb 14

Commit

87612e7

•

1 Parent(s): a022107

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -6

app.py CHANGED Viewed

@@ -1,7 +1,14 @@
 import streamlit as st
-from transformers import pipeline, PretrainedConfig
 pc = PretrainedConfig(
     max_new_tokens=100,
     num_beams=3,
@@ -11,10 +18,19 @@ pc = PretrainedConfig(
 pipe = pipeline(
     "text-generation",
-    model="RickMartel/GPT2_FT_By_NT_RAND_v7",
     device="cpu",
     config=pc
 )
 st.set_page_config(page_title="GPT2 4 Bible")
@@ -25,16 +41,33 @@ st.markdown(
 Model notes:
 - This is a fine-tuned Hugging Face distilgpt2 model.
 - The dataset used was the Christian New Testament.
-- This Space uses a CPU only.
 - This is a document completion model. Not a Q&A. Input prompts like, "Jesus said".
 """
 )
 txt = st.text_area('Enter prompt of a biblical nature. WARNING: Results may not be correct.')
 if txt and len(txt.strip()) > 0:
-    txt = "<|startoftext|>" + txt
-    out = pipe(txt, num_return_sequences=1)[0]["generated_text"]
-    out = out.replace("<|startoftext|>", "")
     with st.expander("Response", expanded=True):
         st.write(out)

 import streamlit as st
+from transformers import pipeline, PretrainedConfig, AutoModelForCausalLM, GPT2Tokenizer
+import torch
+model_id = "RickMartel/GPT2_FT_By_NT_RAND_v7"
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+BOS_TOKEN='<|startoftext|>'
+EOS_TOKEN='<|endoftext|>'
+PAD_TOKEN='<|pad|>'
+b1="""
 pc = PretrainedConfig(
     max_new_tokens=100,
     num_beams=3,
 pipe = pipeline(
     "text-generation",
+    model=model_id,
     device="cpu",
     config=pc
 )
+"""
+model = AutoModelForCausalLM.from_pretrained(model_id)
+model = model.to( device )
+model.eval()
+tokenizer = GPT2Tokenizer.from_pretrained(model_id,
+                                          bos_token=BOS_TOKEN,
+                                          eos_token=EOS_TOKEN,
+                                          pad_token=PAD_TOKEN,
+                                          add_bos_token=False,)
 st.set_page_config(page_title="GPT2 4 Bible")
 Model notes:
 - This is a fine-tuned Hugging Face distilgpt2 model.
 - The dataset used was the Christian New Testament.
+- This Space uses a CPU only. So, the app is a little slow.
 - This is a document completion model. Not a Q&A. Input prompts like, "Jesus said".
 """
 )
 txt = st.text_area('Enter prompt of a biblical nature. WARNING: Results may not be correct.')
+def get_model_input(_input: str):
+  prompt = "<|startoftext|>" + _input
+  generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
+  generated = generated.to( device )
+  return generated
 if txt and len(txt.strip()) > 0:
+    #txt = "<|startoftext|>" + txt
+    #out = pipe(txt, num_return_sequences=1)[0]["generated_text"]
+    #out = out.replace("<|startoftext|>", "")
+    generated = get_model_input(txt)
+    sample_outputs = model.generate(
+                                    generated,
+                                    do_sample=True,
+                                    top_k=15,
+                                    max_length=150,
+                                    num_return_sequences=1,
+                                    pad_token_id=tokenizer.eos_token_id,
+                                    )
+    out = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
     with st.expander("Response", expanded=True):
         st.write(out)