georgesung commited on
Commit
8a6ebc0
1 Parent(s): d7886eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
2
+ import torch
3
+
4
+ import gradio as gr
5
+
6
+ # LLM helper functions
7
+ def get_response_text(data):
8
+ text = data[0]["generated_text"]
9
+
10
+ assistant_text_index = text.rfind('### RESPONSE:')
11
+ if assistant_text_index != -1:
12
+ text = text[assistant_text_index+len('### RESPONSE:'):].strip()
13
+
14
+ return text
15
+
16
+ def get_llm_response(prompt, pipe):
17
+ raw_output = pipe(prompt)
18
+ text = get_response_text(raw_output)
19
+ return text
20
+
21
+ # Load LLM
22
+ model_id = "georgesung/open_llama_7b_qlora_uncensored"
23
+ tokenizer = LlamaTokenizer.from_pretrained(model_id)
24
+ model = LlamaForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
25
+
26
+ # Llama tokenizer missing pad token
27
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
28
+
29
+ pipe = pipeline(
30
+ "text-generation",
31
+ model=model,
32
+ tokenizer=tokenizer,
33
+ max_length=512,
34
+ temperature=0.7,
35
+ top_p=0.95,
36
+ repetition_penalty=1.15
37
+ )
38
+
39
+ with gr.Blocks() as demo:
40
+ chatbot = gr.Chatbot()
41
+ msg = gr.Textbox()
42
+ clear = gr.Button("Clear")
43
+
44
+ def hist_to_prompt(history):
45
+ prompt = ""
46
+ for human_text, bot_text in history:
47
+ prompt += f"### HUMAN:\n{human_text}\n\n### RESPONSE:\n"
48
+ if bot_text:
49
+ prompt += f"{bot_text}\n\n"
50
+ return prompt
51
+
52
+ def get_bot_response(text):
53
+ bot_text_index = text.rfind('### RESPONSE:')
54
+ if bot_text_index != -1:
55
+ text = text[bot_text_index + len('### RESPONSE:'):].strip()
56
+ return text
57
+
58
+ def user(user_message, history):
59
+ return "", history + [[user_message, None]]
60
+
61
+ def bot(history):
62
+ #bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
63
+ #history[-1][1] = bot_message + '</s>'
64
+
65
+ hist_text = hist_to_prompt(history)
66
+ print(hist_text)
67
+ bot_message = get_llm_response(hist_text, pipe) + tokenizer.eos_token
68
+ history[-1][1] = bot_message # add bot message to overall history
69
+
70
+ return history
71
+
72
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
73
+ bot, chatbot, chatbot
74
+ )
75
+ clear.click(lambda: None, None, chatbot, queue=False)
76
+
77
+ demo.queue()
78
+ demo.launch()
79
+