Luciferalive commited on
Commit
01e0062
1 Parent(s): 27597d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
 
4
  # Load the model and tokenizer
5
  model_name = "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0"
6
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=False, device_map="auto")
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
 
9
  def generate_prompt(instruction, user_input):
@@ -26,8 +29,9 @@ def get_model_response(user_input, instruction="Identify and summarize the core
26
  """
27
  input_text = generate_prompt(instruction, user_input)
28
  inputs = tokenizer([input_text], return_tensors="pt")
29
- outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
30
- response = tokenizer.batch_decode(outputs)[0]
 
31
  return response.split("### Response:")[-1].strip()
32
 
33
  # Gradio interface
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import accelerate
4
 
5
  # Load the model and tokenizer
6
  model_name = "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0"
7
+ accelerator = accelerate.Accelerator()
8
+ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=False, device_map="auto", offload_folder="/tmp")
9
+ model = accelerator.prepare(model)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
  def generate_prompt(instruction, user_input):
 
29
  """
30
  input_text = generate_prompt(instruction, user_input)
31
  inputs = tokenizer([input_text], return_tensors="pt")
32
+ with accelerator.distribute_inputs_to_prepared(model.device_map, inputs):
33
+ outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
34
+ response = tokenizer.batch_decode(accelerator.gather(outputs))[0]
35
  return response.split("### Response:")[-1].strip()
36
 
37
  # Gradio interface