DarkAngel commited on
Commit
4f30987
·
verified ·
1 Parent(s): cceb174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -1,17 +1,12 @@
1
  import gradio as gr
2
- from unsloth import FastLanguageModel
3
- from transformers import TextStreamer
4
-
5
- # Load the fine-tuned model and tokenizer
6
- # model, tokenizer = FastLanguageModel.from_pretrained("lora_model")
7
  from peft import PeftModel
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
 
10
  base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
11
  model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
12
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
13
 
14
- tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
15
  def generate_response(shloka, transliteration):
16
  """
17
  Generates the response using the fine-tuned LLaMA model.
@@ -23,15 +18,15 @@ def generate_response(shloka, transliteration):
23
  }
24
  ]
25
 
 
26
  inputs = tokenizer.apply_chat_template(
27
  input_message,
28
  tokenize=True,
29
- add_generation_prompt=True,
30
  return_tensors="pt"
31
- ).to("cpu")
32
-
33
- model = model.to("cpu")
34
 
 
35
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
36
  generated_tokens = model.generate(
37
  input_ids=inputs,
@@ -44,7 +39,7 @@ model = model.to("cpu")
44
 
45
  raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
46
 
47
-
48
  try:
49
  sections = raw_response.split("Hindi Meaning:")
50
  english_meaning = sections[0].strip()
@@ -59,10 +54,12 @@ model = model.to("cpu")
59
  f"Word Meaning:\n{word_meaning}"
60
  )
61
  except IndexError:
 
62
  formatted_response = raw_response
63
 
64
  return formatted_response
65
 
 
66
  interface = gr.Interface(
67
  fn=generate_response,
68
  inputs=[
@@ -74,8 +71,6 @@ interface = gr.Interface(
74
  description="Input a Shloka with its transliteration, and this model will provide meanings in English and Hindi along with word meanings."
75
  )
76
 
 
77
  if __name__ == "__main__":
78
  interface.launch()
79
-
80
-
81
-
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 
 
 
 
3
  from peft import PeftModel
 
4
 
5
+ # Load the fine-tuned model and tokenizer
6
  base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
7
  model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
8
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
9
 
 
10
  def generate_response(shloka, transliteration):
11
  """
12
  Generates the response using the fine-tuned LLaMA model.
 
18
  }
19
  ]
20
 
21
+ # Ensure the model uses CPU instead of GPU
22
  inputs = tokenizer.apply_chat_template(
23
  input_message,
24
  tokenize=True,
25
+ add_generation_prompt=True, # Enable for generation
26
  return_tensors="pt"
27
+ ).to("cpu") # Use CPU
 
 
28
 
29
+ # Generate response
30
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
31
  generated_tokens = model.generate(
32
  input_ids=inputs,
 
39
 
40
  raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
41
 
42
+ # Format the response
43
  try:
44
  sections = raw_response.split("Hindi Meaning:")
45
  english_meaning = sections[0].strip()
 
54
  f"Word Meaning:\n{word_meaning}"
55
  )
56
  except IndexError:
57
+ # In case the response format is not as expected
58
  formatted_response = raw_response
59
 
60
  return formatted_response
61
 
62
+ # Gradio interface
63
  interface = gr.Interface(
64
  fn=generate_response,
65
  inputs=[
 
71
  description="Input a Shloka with its transliteration, and this model will provide meanings in English and Hindi along with word meanings."
72
  )
73
 
74
+ # Launch the interface
75
  if __name__ == "__main__":
76
  interface.launch()