Spaces:

DarkAngel
/

BhagavadGita-LLama8b

Runtime error

App Files Files Community

DarkAngel commited on Dec 30, 2024

Commit

4f30987

verified ·

1 Parent(s): cceb174

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -15

app.py CHANGED Viewed

@@ -1,17 +1,12 @@
 import gradio as gr
-from unsloth import FastLanguageModel
-from transformers import TextStreamer
-# Load the fine-tuned model and tokenizer
-# model, tokenizer = FastLanguageModel.from_pretrained("lora_model")
 from peft import PeftModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
 base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
 model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
 tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
-tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
 def generate_response(shloka, transliteration):
     """
     Generates the response using the fine-tuned LLaMA model.
@@ -23,15 +18,15 @@ def generate_response(shloka, transliteration):
         }
     ]
     inputs = tokenizer.apply_chat_template(
         input_message,
         tokenize=True,
-        add_generation_prompt=True,
         return_tensors="pt"
-    ).to("cpu")
-model = model.to("cpu")
     text_streamer = TextStreamer(tokenizer, skip_prompt=True)
     generated_tokens = model.generate(
         input_ids=inputs,
@@ -44,7 +39,7 @@ model = model.to("cpu")
     raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
     try:
         sections = raw_response.split("Hindi Meaning:")
         english_meaning = sections[0].strip()
@@ -59,10 +54,12 @@ model = model.to("cpu")
             f"Word Meaning:\n{word_meaning}"
         )
     except IndexError:
         formatted_response = raw_response
     return formatted_response
 interface = gr.Interface(
     fn=generate_response,
     inputs=[
@@ -74,8 +71,6 @@ interface = gr.Interface(
     description="Input a Shloka with its transliteration, and this model will provide meanings in English and Hindi along with word meanings."
 )
 if __name__ == "__main__":
     interface.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from peft import PeftModel
+# Load the fine-tuned model and tokenizer
 base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
 model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
 tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
 def generate_response(shloka, transliteration):
     """
     Generates the response using the fine-tuned LLaMA model.
         }
     ]
+    # Ensure the model uses CPU instead of GPU
     inputs = tokenizer.apply_chat_template(
         input_message,
         tokenize=True,
+        add_generation_prompt=True,  # Enable for generation
         return_tensors="pt"
+    ).to("cpu")  # Use CPU
+    # Generate response
     text_streamer = TextStreamer(tokenizer, skip_prompt=True)
     generated_tokens = model.generate(
         input_ids=inputs,
     raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+    # Format the response
     try:
         sections = raw_response.split("Hindi Meaning:")
         english_meaning = sections[0].strip()
             f"Word Meaning:\n{word_meaning}"
         )
     except IndexError:
+        # In case the response format is not as expected
         formatted_response = raw_response
     return formatted_response
+# Gradio interface
 interface = gr.Interface(
     fn=generate_response,
     inputs=[
     description="Input a Shloka with its transliteration, and this model will provide meanings in English and Hindi along with word meanings."
 )
+# Launch the interface
 if __name__ == "__main__":
     interface.launch()