duckling commited on
Commit
a76d259
1 Parent(s): 0577e35

Upload 3 files

Browse files

add app & requirements

Files changed (3) hide show
  1. README.md +14 -1
  2. app.py +52 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -10,4 +10,17 @@ pinned: false
10
  license: cc-by-4.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: cc-by-4.0
11
  ---
12
 
13
+
14
+
15
+ # demo chatbot of OpenAssit
16
+ models trained by [Rallio67](https://huggingface.co/Rallio67/chip_1.4B_instruct_alpha)
17
+
18
+
19
+ **note:**
20
+
21
+ The model used in this program is still being tested, and this program is only used to show how it works, not to be used for real production
22
+
23
+ # Reference
24
+ [1]. https://github.com/LAION-AI/Open-Assistant
25
+
26
+
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+
6
+ model_name = "chip_1.4B_instruct_alpha"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+
9
+ chip_map= {
10
+ 'gpt_neox.embed_in': 0,
11
+ 'gpt_neox.layers': 0,
12
+ 'gpt_neox.final_layer_norm': 0,
13
+ 'embed_out': 0
14
+ }
15
+ # model = AutoModelForCausalLM.from_pretrained(name, device_map=chip_map, torch_dtype=torch.float16, load_in_8bit=True)
16
+ model = AutoModelForCausalLM.from_pretrained(model_name).half()
17
+
18
+
19
+ def predict(input, history=[], MAX_NEW_TOKENS = 500):
20
+ text = "User: " + input + "\n\nChip: "
21
+ new_user_input_ids = tokenizer(text, return_tensors="pt").input_ids
22
+ # bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1).to("cuda")
23
+ bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
24
+
25
+ generated_ids = model.generate(bot_input_ids,
26
+ max_length=MAX_NEW_TOKENS, pad_token_id=tokenizer.eos_token_id,
27
+ do_sample=True,
28
+ top_p=0.95, temperature=0.5, penalty_alpha=0.6, top_k=4, repetition_penalty=1.03,
29
+ num_return_sequences=1)
30
+
31
+ response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
32
+ history = generated_ids.tolist()
33
+
34
+ # convert to list of user & bot response
35
+ response = response.split("\n\n")
36
+ response_pairs = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)]
37
+ return response_pairs, history
38
+
39
+
40
+ with gr.Blocks() as demo:
41
+ chatbot = gr.Chatbot()
42
+ state = gr.State([])
43
+
44
+ with gr.Row():
45
+ txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
46
+
47
+ txt.submit(predict, [txt, state], [chatbot, state])
48
+
49
+
50
+ if __name__ == "__main__":
51
+ # demo.launch(debug=True, server_name="0.0.0.0", server_port=9991)
52
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ accelerate
3
+ bitsandbytes
4
+ requests