gorkemgoknar commited on
Commit
f2fca0a
1 Parent(s): 60290fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -22
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import random
3
-
4
  from transformers import AutoConfig
5
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
6
 
@@ -21,38 +21,38 @@ SPECIAL_TOKENS = ["<bos>", "<eos>", "<speaker1>", "<speaker2>", "<pad>"]
21
 
22
  #See document for experiment https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/
23
 
24
- def get_chat_response(name, input_txt = "Hello , what is your name?"):
25
- #I trained my dataset with "My name is " as prefix, this will let me dump personalities
26
- #and also gives character sense if of his/her/its name!
 
 
 
 
27
  personality = "My name is " + name
28
 
29
- if input_txt[:-1] != ".":
30
- #add a dot after sentence to make model understand it more clearly
31
- input_txt = input_txt + "."
32
-
33
- ##can respond well to history as well but for this quick demo not implemented
34
- ##see metayazar.com/chatbot for a min 2 history
35
 
36
- ##this is a multi-speaker model, currently no history, so ending with "<speaker2>" to get response. depends on who starts conversation it can be speaker1
37
- ##if there is a history depends on who started conversation it should end with <speaker1>
38
- #historical implementation not implemented in this demo
39
- bot_input_ids = tokenizer.encode(tokenizer.bos_token + personality + "<speaker1>" + input_txt + tokenizer.eos_token , return_tensors='pt')
40
 
41
  #optimum response and speed
42
- #50 token max length, temperature = 1.3 makes it creative
43
  chat_history_ids = model.generate(
44
- bot_input_ids,min_length =1, max_length=50,
45
  pad_token_id=tokenizer.eos_token_id,
46
  no_repeat_ngram_size=3,
47
  do_sample=True,
48
- top_k=50,
49
- top_p=0.9,
50
- temperature = temperature
51
  )
52
-
53
- out_str = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
54
  return out_str
55
-
56
  ##you can use anyone from below
57
  '''
58
  | Macleod | Moran | Brenda | Ramirez | Peter Parker | Quentin Beck | Andy
 
1
  import gradio as gr
2
  import random
3
+ import torch
4
  from transformers import AutoConfig
5
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
6
 
 
21
 
22
  #See document for experiment https://www.linkedin.com/pulse/ai-goes-job-interview-g%C3%B6rkem-g%C3%B6knar/
23
 
24
+
25
+
26
+ def get_chat_response(name,history=[], input_txt = "Hello , what is your name?"):
27
+
28
+ history.append(input_txt)
29
+ history_e = [tokenizer.encode(e) for e in history]
30
+
31
  personality = "My name is " + name
32
 
33
+ bos, eos, speaker1, speaker2 = tokenizer.convert_tokens_to_ids(SPECIAL_TOKENS[:-1])
34
+ sequence = [[bos] + tokenizer.encode(personality)] + history_e
35
+ sequence = [sequence[0]] + [[speaker2 if (len(sequence)-i) % 2 else speaker1] + s for i, s in enumerate(sequence[1:])]
36
+ sequence = list(chain(*sequence))
37
+
 
38
 
39
+ #bot_input_ids = tokenizer.encode(personality + tokenizer.eos_token + input_txt + tokenizer.eos_token , return_tensors='pt')
40
+ sequence_len = len(sequence)
 
 
41
 
42
  #optimum response and speed
 
43
  chat_history_ids = model.generate(
44
+ torch.tensor(sequence).unsqueeze(0), max_length=50,
45
  pad_token_id=tokenizer.eos_token_id,
46
  no_repeat_ngram_size=3,
47
  do_sample=True,
48
+ top_k=60,
49
+ top_p=0.8,
50
+ temperature = 1.3
51
  )
52
+ out_str = tokenizer.decode(chat_history_ids[0][sequence_len:], skip_special_tokens=True)
53
+ #out_str = tokenizer.decode(chat_history_ids[:, sequence.shape[-1]:][0], skip_special_tokens=False)
54
  return out_str
55
+
56
  ##you can use anyone from below
57
  '''
58
  | Macleod | Moran | Brenda | Ramirez | Peter Parker | Quentin Beck | Andy