inoid commited on
Commit
db73536
1 Parent(s): 97392da

Add appy_chat_template process

Browse files
Files changed (2) hide show
  1. app.py +7 -5
  2. spanish_medica_llm.py +38 -2
app.py CHANGED
@@ -10,7 +10,7 @@ import sys
10
  import torch
11
 
12
 
13
- from spanish_medica_llm import run_training, run_training_process, run_finnetuning_process
14
 
15
  import gradio as gr
16
 
@@ -31,13 +31,15 @@ def generate(prompt):
31
  image = pipe(prompt).images[0]
32
  return(image)
33
 
34
- def evaluate_model():
35
  #from diffusers import StableDiffusionPipeline
36
 
37
  #pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
38
  #pipe = pipe.to("cuda")
39
  #image = pipe(prompt).images[0]
40
- return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
 
 
41
 
42
 
43
 
@@ -66,7 +68,7 @@ with gr.Blocks() as demo:
66
  with gr.Row():
67
  inp = gr.Textbox(placeholder="What is your name?")
68
  out = gr.Textbox()
69
-
70
  btn_response = gr.Button("Generate Response")
71
  btn_response.click(fn=generate_model, inputs=inp, outputs=out)
72
  btn_train = gr.Button("Train Model")
@@ -74,7 +76,7 @@ with gr.Blocks() as demo:
74
  btn_finnetuning = gr.Button("Finnetuning Model")
75
  btn_finnetuning.click(fn=finnetuning_model, inputs=[], outputs=out)
76
  btn_evaluate = gr.Button("Evaluate Model")
77
- btn_evaluate.click(fn=evaluate_model, inputs=[], outputs=out)
78
  btn_stop = gr.Button("Stop Model")
79
  btn_stop.click(fn=stop_model, inputs=[], outputs=out)
80
 
 
10
  import torch
11
 
12
 
13
+ from spanish_medica_llm import run_training, run_training_process, run_finnetuning_process, generate_response
14
 
15
  import gradio as gr
16
 
 
31
  image = pipe(prompt).images[0]
32
  return(image)
33
 
34
+ def evaluate_model(input):
35
  #from diffusers import StableDiffusionPipeline
36
 
37
  #pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
38
  #pipe = pipe.to("cuda")
39
  #image = pipe(prompt).images[0]
40
+ output = generate_response(input)
41
+ return output
42
+
43
 
44
 
45
 
 
68
  with gr.Row():
69
  inp = gr.Textbox(placeholder="What is your name?")
70
  out = gr.Textbox()
71
+
72
  btn_response = gr.Button("Generate Response")
73
  btn_response.click(fn=generate_model, inputs=inp, outputs=out)
74
  btn_train = gr.Button("Train Model")
 
76
  btn_finnetuning = gr.Button("Finnetuning Model")
77
  btn_finnetuning.click(fn=finnetuning_model, inputs=[], outputs=out)
78
  btn_evaluate = gr.Button("Evaluate Model")
79
+ btn_evaluate.click(fn=evaluate_model, inputs=inp, outputs=out)
80
  btn_stop = gr.Button("Stop Model")
81
  btn_stop.click(fn=stop_model, inputs=[], outputs=out)
82
 
spanish_medica_llm.py CHANGED
@@ -19,7 +19,8 @@ from transformers import (
19
  BitsAndBytesConfig,
20
  DataCollatorForLanguageModeling,
21
  TrainingArguments,
22
- Trainer
 
23
  )
24
 
25
  from accelerate import FullyShardedDataParallelPlugin, Accelerator
@@ -711,6 +712,41 @@ def run_finnetuning_process():
711
  print('Dataset in One ')
712
  print (train_dataset[5])
713
  configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
714
- def generate_response(query):
715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
 
19
  BitsAndBytesConfig,
20
  DataCollatorForLanguageModeling,
21
  TrainingArguments,
22
+ Trainer,
23
+ GenerationConfig
24
  )
25
 
26
  from accelerate import FullyShardedDataParallelPlugin, Accelerator
 
712
  print('Dataset in One ')
713
  print (train_dataset[5])
714
  configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
 
715
 
716
+ def generate_response(query):
717
+ max_new_tokens=256
718
+ temperature=0.1
719
+ top_p=0.75
720
+ top_k=40
721
+ num_beams=2
722
+
723
+ tokenizer = loadSpanishTokenizer()
724
+ model = loadBaseModel(HUB_MODEL_ID)
725
+
726
+ system = f"[INST]\nYou are a helpful coding assistant.[/INST]\n"
727
+ prompt = f"{system}\n{query}\n \n"
728
+ print(prompt)
729
+ inputs = tokenizer(prompt, return_tensors="pt")
730
+ input_ids = inputs["input_ids"].to("cuda")
731
+ attention_mask = inputs["attention_mask"].to("cuda")
732
+ generation_config = GenerationConfig(
733
+ temperature=temperature,
734
+ top_p=top_p,
735
+ top_k=top_k,
736
+ num_beams=num_beams,
737
+ )
738
+ with torch.no_grad():
739
+ generation_output = model.generate(
740
+ input_ids=input_ids,
741
+ attention_mask=attention_mask,
742
+ generation_config=generation_config,
743
+ return_dict_in_generate=True,
744
+ #output_scores=True,
745
+ max_new_tokens=max_new_tokens,
746
+ early_stopping=True
747
+ )
748
+ s = generation_output.sequences[0]
749
+ output = tokenizer.decode(s, skip_special_tokens=True)
750
+ return output
751
+ # return output.split("<|assistant|>")[1]
752