RickMartel commited on
Commit
87612e7
1 Parent(s): a022107

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -6
app.py CHANGED
@@ -1,7 +1,14 @@
1
  import streamlit as st
2
- from transformers import pipeline, PretrainedConfig
 
3
 
 
 
 
 
 
4
 
 
5
  pc = PretrainedConfig(
6
  max_new_tokens=100,
7
  num_beams=3,
@@ -11,10 +18,19 @@ pc = PretrainedConfig(
11
 
12
  pipe = pipeline(
13
  "text-generation",
14
- model="RickMartel/GPT2_FT_By_NT_RAND_v7",
15
  device="cpu",
16
  config=pc
17
  )
 
 
 
 
 
 
 
 
 
18
 
19
  st.set_page_config(page_title="GPT2 4 Bible")
20
 
@@ -25,16 +41,33 @@ st.markdown(
25
  Model notes:
26
  - This is a fine-tuned Hugging Face distilgpt2 model.
27
  - The dataset used was the Christian New Testament.
28
- - This Space uses a CPU only.
29
  - This is a document completion model. Not a Q&A. Input prompts like, "Jesus said".
30
  """
31
  )
32
 
33
  txt = st.text_area('Enter prompt of a biblical nature. WARNING: Results may not be correct.')
34
 
 
 
 
 
 
 
35
  if txt and len(txt.strip()) > 0:
36
- txt = "<|startoftext|>" + txt
37
- out = pipe(txt, num_return_sequences=1)[0]["generated_text"]
38
- out = out.replace("<|startoftext|>", "")
 
 
 
 
 
 
 
 
 
 
 
39
  with st.expander("Response", expanded=True):
40
  st.write(out)
 
1
  import streamlit as st
2
+ from transformers import pipeline, PretrainedConfig, AutoModelForCausalLM, GPT2Tokenizer
3
+ import torch
4
 
5
+ model_id = "RickMartel/GPT2_FT_By_NT_RAND_v7"
6
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
7
+ BOS_TOKEN='<|startoftext|>'
8
+ EOS_TOKEN='<|endoftext|>'
9
+ PAD_TOKEN='<|pad|>'
10
 
11
+ b1="""
12
  pc = PretrainedConfig(
13
  max_new_tokens=100,
14
  num_beams=3,
 
18
 
19
  pipe = pipeline(
20
  "text-generation",
21
+ model=model_id,
22
  device="cpu",
23
  config=pc
24
  )
25
+ """
26
+ model = AutoModelForCausalLM.from_pretrained(model_id)
27
+ model = model.to( device )
28
+ model.eval()
29
+ tokenizer = GPT2Tokenizer.from_pretrained(model_id,
30
+ bos_token=BOS_TOKEN,
31
+ eos_token=EOS_TOKEN,
32
+ pad_token=PAD_TOKEN,
33
+ add_bos_token=False,)
34
 
35
  st.set_page_config(page_title="GPT2 4 Bible")
36
 
 
41
  Model notes:
42
  - This is a fine-tuned Hugging Face distilgpt2 model.
43
  - The dataset used was the Christian New Testament.
44
+ - This Space uses a CPU only. So, the app is a little slow.
45
  - This is a document completion model. Not a Q&A. Input prompts like, "Jesus said".
46
  """
47
  )
48
 
49
  txt = st.text_area('Enter prompt of a biblical nature. WARNING: Results may not be correct.')
50
 
51
+ def get_model_input(_input: str):
52
+ prompt = "<|startoftext|>" + _input
53
+ generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
54
+ generated = generated.to( device )
55
+ return generated
56
+
57
  if txt and len(txt.strip()) > 0:
58
+ #txt = "<|startoftext|>" + txt
59
+ #out = pipe(txt, num_return_sequences=1)[0]["generated_text"]
60
+ #out = out.replace("<|startoftext|>", "")
61
+ generated = get_model_input(txt)
62
+ sample_outputs = model.generate(
63
+ generated,
64
+ do_sample=True,
65
+ top_k=15,
66
+ max_length=150,
67
+ num_return_sequences=1,
68
+ pad_token_id=tokenizer.eos_token_id,
69
+ )
70
+ out = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
71
+
72
  with st.expander("Response", expanded=True):
73
  st.write(out)