Spaces:
Runtime error
Runtime error
Add appy_chat_template process
Browse files- app.py +7 -5
- spanish_medica_llm.py +38 -2
app.py
CHANGED
@@ -10,7 +10,7 @@ import sys
|
|
10 |
import torch
|
11 |
|
12 |
|
13 |
-
from spanish_medica_llm import run_training, run_training_process, run_finnetuning_process
|
14 |
|
15 |
import gradio as gr
|
16 |
|
@@ -31,13 +31,15 @@ def generate(prompt):
|
|
31 |
image = pipe(prompt).images[0]
|
32 |
return(image)
|
33 |
|
34 |
-
def evaluate_model():
|
35 |
#from diffusers import StableDiffusionPipeline
|
36 |
|
37 |
#pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
|
38 |
#pipe = pipe.to("cuda")
|
39 |
#image = pipe(prompt).images[0]
|
40 |
-
|
|
|
|
|
41 |
|
42 |
|
43 |
|
@@ -66,7 +68,7 @@ with gr.Blocks() as demo:
|
|
66 |
with gr.Row():
|
67 |
inp = gr.Textbox(placeholder="What is your name?")
|
68 |
out = gr.Textbox()
|
69 |
-
|
70 |
btn_response = gr.Button("Generate Response")
|
71 |
btn_response.click(fn=generate_model, inputs=inp, outputs=out)
|
72 |
btn_train = gr.Button("Train Model")
|
@@ -74,7 +76,7 @@ with gr.Blocks() as demo:
|
|
74 |
btn_finnetuning = gr.Button("Finnetuning Model")
|
75 |
btn_finnetuning.click(fn=finnetuning_model, inputs=[], outputs=out)
|
76 |
btn_evaluate = gr.Button("Evaluate Model")
|
77 |
-
btn_evaluate.click(fn=evaluate_model, inputs=
|
78 |
btn_stop = gr.Button("Stop Model")
|
79 |
btn_stop.click(fn=stop_model, inputs=[], outputs=out)
|
80 |
|
|
|
10 |
import torch
|
11 |
|
12 |
|
13 |
+
from spanish_medica_llm import run_training, run_training_process, run_finnetuning_process, generate_response
|
14 |
|
15 |
import gradio as gr
|
16 |
|
|
|
31 |
image = pipe(prompt).images[0]
|
32 |
return(image)
|
33 |
|
34 |
+
def evaluate_model(input):
|
35 |
#from diffusers import StableDiffusionPipeline
|
36 |
|
37 |
#pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
|
38 |
#pipe = pipe.to("cuda")
|
39 |
#image = pipe(prompt).images[0]
|
40 |
+
output = generate_response(input)
|
41 |
+
return output
|
42 |
+
|
43 |
|
44 |
|
45 |
|
|
|
68 |
with gr.Row():
|
69 |
inp = gr.Textbox(placeholder="What is your name?")
|
70 |
out = gr.Textbox()
|
71 |
+
|
72 |
btn_response = gr.Button("Generate Response")
|
73 |
btn_response.click(fn=generate_model, inputs=inp, outputs=out)
|
74 |
btn_train = gr.Button("Train Model")
|
|
|
76 |
btn_finnetuning = gr.Button("Finnetuning Model")
|
77 |
btn_finnetuning.click(fn=finnetuning_model, inputs=[], outputs=out)
|
78 |
btn_evaluate = gr.Button("Evaluate Model")
|
79 |
+
btn_evaluate.click(fn=evaluate_model, inputs=inp, outputs=out)
|
80 |
btn_stop = gr.Button("Stop Model")
|
81 |
btn_stop.click(fn=stop_model, inputs=[], outputs=out)
|
82 |
|
spanish_medica_llm.py
CHANGED
@@ -19,7 +19,8 @@ from transformers import (
|
|
19 |
BitsAndBytesConfig,
|
20 |
DataCollatorForLanguageModeling,
|
21 |
TrainingArguments,
|
22 |
-
Trainer
|
|
|
23 |
)
|
24 |
|
25 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
@@ -711,6 +712,41 @@ def run_finnetuning_process():
|
|
711 |
print('Dataset in One ')
|
712 |
print (train_dataset[5])
|
713 |
configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
|
714 |
-
def generate_response(query):
|
715 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
716 |
|
|
|
19 |
BitsAndBytesConfig,
|
20 |
DataCollatorForLanguageModeling,
|
21 |
TrainingArguments,
|
22 |
+
Trainer,
|
23 |
+
GenerationConfig
|
24 |
)
|
25 |
|
26 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
|
|
712 |
print('Dataset in One ')
|
713 |
print (train_dataset[5])
|
714 |
configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
|
|
|
715 |
|
716 |
+
def generate_response(query):
|
717 |
+
max_new_tokens=256
|
718 |
+
temperature=0.1
|
719 |
+
top_p=0.75
|
720 |
+
top_k=40
|
721 |
+
num_beams=2
|
722 |
+
|
723 |
+
tokenizer = loadSpanishTokenizer()
|
724 |
+
model = loadBaseModel(HUB_MODEL_ID)
|
725 |
+
|
726 |
+
system = f"[INST]\nYou are a helpful coding assistant.[/INST]\n"
|
727 |
+
prompt = f"{system}\n{query}\n \n"
|
728 |
+
print(prompt)
|
729 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
730 |
+
input_ids = inputs["input_ids"].to("cuda")
|
731 |
+
attention_mask = inputs["attention_mask"].to("cuda")
|
732 |
+
generation_config = GenerationConfig(
|
733 |
+
temperature=temperature,
|
734 |
+
top_p=top_p,
|
735 |
+
top_k=top_k,
|
736 |
+
num_beams=num_beams,
|
737 |
+
)
|
738 |
+
with torch.no_grad():
|
739 |
+
generation_output = model.generate(
|
740 |
+
input_ids=input_ids,
|
741 |
+
attention_mask=attention_mask,
|
742 |
+
generation_config=generation_config,
|
743 |
+
return_dict_in_generate=True,
|
744 |
+
#output_scores=True,
|
745 |
+
max_new_tokens=max_new_tokens,
|
746 |
+
early_stopping=True
|
747 |
+
)
|
748 |
+
s = generation_output.sequences[0]
|
749 |
+
output = tokenizer.decode(s, skip_special_tokens=True)
|
750 |
+
return output
|
751 |
+
# return output.split("<|assistant|>")[1]
|
752 |
|