File size: 6,095 Bytes
b0a6dfd
 
 
 
 
b33dee8
b0a6dfd
b33dee8
b0a6dfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
import gradio as gr

model = AutoModelForCausalLM.from_pretrained("internlm/internlm-7b",trust_remote_code=True)
model = PeftModel.from_pretrained(model, "fadliaulawi/internlm-7b-finetuned")
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-7b", padding_side="left", use_fast = False,trust_remote_code=True)

def generate_prompt(
    instruction, input, label
):
#   template = {
#       "description": "Template used by Alpaca-LoRA.",
#       "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
#       "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
#       "response_split": "### Response:"
#   }
# <s>[INST] <<SYS>>
# {{ system_prompt }}
# <</SYS>>

# {{ user_message }} [/INST]
#   return '''<s>[INST] <<SYS>>\n{0}\n<</SYS>>\n\n{1} {2} [/INST]'''.format(template['prompt_input'].format(instruction=instruction, input=input), template['response_split'], label)
    template = {
      "description": "Template used by Alpaca-LoRA.",
      "prompt_input": "Di bawah ini adalah instruksi yang menjelaskan tugas, dipasangkan dengan masukan yang memberikan konteks lebih lanjut. Tulis tanggapan yang melengkapi permintaan dengan tepat.\n\n### Instruksi:\n{instruction}\n\n### Masukan:\n{input}",
      #"prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n",
      "response_split": "### Tanggapan:"
    }

    if input:
      res = template["prompt_input"].format(instruction=instruction, input=input)
    #else:
    #  res = template["prompt_no_input"].format(instruction=instruction)

    res = f"{res} \n\n### Tanggapan:\n"
    if label:
      res = f"{res}{label}"

    return res

def user(message, history):
    return "", history + [[message, None]]

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(
        data_point["instruction"],
        data_point["input"],
        data_point["output"],
    )
#     print(full_prompt)
#     return
    cutoff_len = 256
    tokenizer.pad_token = tokenizer.eos_token
    result = tokenizer(
        full_prompt,
        truncation=True,
        max_length=cutoff_len,
        padding=True,
        return_tensors=None,
    )

    if (result["input_ids"][-1] != tokenizer.eos_token_id and len(result["input_ids"]) < cutoff_len):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

#     result["labels"] = result["input_ids"].copy()
    return result

def bot(history,temperature, max_new_tokens, top_p,top_k):
    user_message = history[-1][0]
    data = {
        'instruction': "Jika Anda seorang dokter, silakan menjawab pertanyaan medis berdasarkan deskripsi pasien.",
        'input': user_message,
        'output': ''
    }
    
    new_user_input_ids = generate_and_tokenize_prompt(data)

    # append the new user input tokens to the chat history
    bot_input_ids = torch.LongTensor([new_user_input_ids['input_ids']])

    # generate a response
    response = model.generate(
        input_ids=bot_input_ids,
        pad_token_id=tokenizer.eos_token_id,
        temperature = float(temperature),
        max_new_tokens=max_new_tokens,
        top_p=float(top_p),
        top_k=top_k,
        do_sample=True
    )

    # clean up response before returning
    response = tokenizer.batch_decode(response, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    sections = response.split("###")
    response = sections[3]
    response=response.split("Tanggapan:")[1].strip()
    history[-1][1] = response
    return history


with gr.Blocks() as demo:
  gr.Markdown(
                """# ChatDoctor - InternLM 7b 🩺
                
                A [ChatDoctor - InternLM 7b](https://huggingface.co/fadliaulawi/internlm-7b-finetuned) demo.
                From the [InternLM 7b](https://huggingface.co/internlm/internlm-7b) model and finetuned on the Indonesian translation of [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor) dataset.
                
                """
            )
  
  chatbot = gr.Chatbot()
  msg = gr.Textbox()
  submit = gr.Button("Submit")
  clear = gr.Button("Clear")
  examples = gr.Examples(examples=["Dokter, aku mengalami kelelahan akhir-akhir ini.", "Dokter, aku merasa pusing, lemah dan sakit dada tajam akhir-akhir ini.",
                                       "Dokter, aku merasa sangat depresi akhir-akhir ini dan juga mengalami perubahan suhu tubuhku.", 
                                       "Dokter, saya sudah beberapa minggu mengalami suara serak dan tidak kunjung membaik meski sudah minum obat. Apa masalahnya?"
                                       ],inputs=[msg])
  
  gr.Markdown(
                """## Adjust the additional inputs:"""
            )
  
  temperature = gr.Slider(0, 5, value=0.8, step=0.1, label='Temperature',info="Controls randomness, higher values increase diversity.")
  max_length = gr.Slider(0, 1024, value=50, step=1, label='Max Length',info="The maximum numbers of output's tokens.")
  top_p = gr.Slider(0, 1, value=0.8, step=0.1, label='Top P',info="The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.")
  top_k = gr.Slider(0, 50, value=10, step=1, label='Top K',info="Sample from the k most likely next tokens at each step. Lower k focuses on higher probability tokens.")

  submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot,temperature,max_length,top_p,top_k], chatbot
    )
  clear.click(lambda: None, None, chatbot, queue=False)

demo.queue(concurrency_count=100).launch()