dennyaw commited on
Commit
b0a6dfd
·
1 Parent(s): b1a7635

create app

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+ from peft import PeftModel
4
+ import gradio as gr
5
+
6
+ model = AutoModelForCausalLM.from_pretrained("internlm/internlm-7b")
7
+ model = PeftModel.from_pretrained(model, "fadliaulawi/internlm-7b-finetuned")
8
+ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-7b", padding_side="left", use_fast = False)
9
+
10
+ def generate_prompt(
11
+ instruction, input, label
12
+ ):
13
+ # template = {
14
+ # "description": "Template used by Alpaca-LoRA.",
15
+ # "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
16
+ # "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
17
+ # "response_split": "### Response:"
18
+ # }
19
+ # <s>[INST] <<SYS>>
20
+ # {{ system_prompt }}
21
+ # <</SYS>>
22
+
23
+ # {{ user_message }} [/INST]
24
+ # return '''<s>[INST] <<SYS>>\n{0}\n<</SYS>>\n\n{1} {2} [/INST]'''.format(template['prompt_input'].format(instruction=instruction, input=input), template['response_split'], label)
25
+ template = {
26
+ "description": "Template used by Alpaca-LoRA.",
27
+ "prompt_input": "Di bawah ini adalah instruksi yang menjelaskan tugas, dipasangkan dengan masukan yang memberikan konteks lebih lanjut. Tulis tanggapan yang melengkapi permintaan dengan tepat.\n\n### Instruksi:\n{instruction}\n\n### Masukan:\n{input}",
28
+ #"prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
29
+ "response_split": "### Tanggapan:"
30
+ }
31
+
32
+ if input:
33
+ res = template["prompt_input"].format(instruction=instruction, input=input)
34
+ #else:
35
+ # res = template["prompt_no_input"].format(instruction=instruction)
36
+
37
+ res = f"{res} \n\n### Tanggapan:\n"
38
+ if label:
39
+ res = f"{res}{label}"
40
+
41
+ return res
42
+
43
+ def user(message, history):
44
+ return "", history + [[message, None]]
45
+
46
+ def generate_and_tokenize_prompt(data_point):
47
+ full_prompt = generate_prompt(
48
+ data_point["instruction"],
49
+ data_point["input"],
50
+ data_point["output"],
51
+ )
52
+ # print(full_prompt)
53
+ # return
54
+ cutoff_len = 256
55
+ tokenizer.pad_token = tokenizer.eos_token
56
+ result = tokenizer(
57
+ full_prompt,
58
+ truncation=True,
59
+ max_length=cutoff_len,
60
+ padding=True,
61
+ return_tensors=None,
62
+ )
63
+
64
+ if (result["input_ids"][-1] != tokenizer.eos_token_id and len(result["input_ids"]) < cutoff_len):
65
+ result["input_ids"].append(tokenizer.eos_token_id)
66
+ result["attention_mask"].append(1)
67
+
68
+ # result["labels"] = result["input_ids"].copy()
69
+ return result
70
+
71
+ def bot(history,temperature, max_new_tokens, top_p,top_k):
72
+ user_message = history[-1][0]
73
+ data = {
74
+ 'instruction': "Jika Anda seorang dokter, silakan menjawab pertanyaan medis berdasarkan deskripsi pasien.",
75
+ 'input': user_message,
76
+ 'output': ''
77
+ }
78
+
79
+ new_user_input_ids = generate_and_tokenize_prompt(data)
80
+
81
+ # append the new user input tokens to the chat history
82
+ bot_input_ids = torch.LongTensor([new_user_input_ids['input_ids']])
83
+
84
+ # generate a response
85
+ response = model.generate(
86
+ input_ids=bot_input_ids,
87
+ pad_token_id=tokenizer.eos_token_id,
88
+ temperature = float(temperature),
89
+ max_new_tokens=max_new_tokens,
90
+ top_p=float(top_p),
91
+ top_k=top_k,
92
+ do_sample=True
93
+ )
94
+
95
+ # clean up response before returning
96
+ response = tokenizer.batch_decode(response, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
97
+ sections = response.split("###")
98
+ response = sections[3]
99
+ response=response.split("Tanggapan:")[1].strip()
100
+ history[-1][1] = response
101
+ return history
102
+
103
+
104
+ with gr.Blocks() as demo:
105
+ gr.Markdown(
106
+ """# ChatDoctor - InternLM 7b 🩺
107
+
108
+ A [ChatDoctor - InternLM 7b](https://huggingface.co/fadliaulawi/internlm-7b-finetuned) demo.
109
+ From the [InternLM 7b](https://huggingface.co/internlm/internlm-7b) model and finetuned on the Indonesian translation of [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor) dataset.
110
+
111
+ """
112
+ )
113
+
114
+ chatbot = gr.Chatbot()
115
+ msg = gr.Textbox()
116
+ submit = gr.Button("Submit")
117
+ clear = gr.Button("Clear")
118
+ examples = gr.Examples(examples=["Dokter, aku mengalami kelelahan akhir-akhir ini.", "Dokter, aku merasa pusing, lemah dan sakit dada tajam akhir-akhir ini.",
119
+ "Dokter, aku merasa sangat depresi akhir-akhir ini dan juga mengalami perubahan suhu tubuhku.",
120
+ "Dokter, saya sudah beberapa minggu mengalami suara serak dan tidak kunjung membaik meski sudah minum obat. Apa masalahnya?"
121
+ ],inputs=[msg])
122
+
123
+ gr.Markdown(
124
+ """## Adjust the additional inputs:"""
125
+ )
126
+
127
+ temperature = gr.Slider(0, 5, value=0.8, step=0.1, label='Temperature',info="Controls randomness, higher values increase diversity.")
128
+ max_length = gr.Slider(0, 1024, value=50, step=1, label='Max Length',info="The maximum numbers of output's tokens.")
129
+ top_p = gr.Slider(0, 1, value=0.8, step=0.1, label='Top P',info="The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.")
130
+ top_k = gr.Slider(0, 50, value=10, step=1, label='Top K',info="Sample from the k most likely next tokens at each step. Lower k focuses on higher probability tokens.")
131
+
132
+ submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
133
+ bot, [chatbot,temperature,max_length,top_p,top_k], chatbot
134
+ )
135
+ clear.click(lambda: None, None, chatbot, queue=False)
136
+
137
+ demo.queue(concurrency_count=100).launch()