Spaces:

AvaPersona
/

avapersonapal

Runtime error

App Files Files Community

AvaPersona commited on Nov 17, 2024

Commit

8bb427b

verified ·

1 Parent(s): 6805787

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -15

app.py CHANGED Viewed

@@ -1,27 +1,43 @@
 from transformers import LlamaForCausalLM, LlamaTokenizer
 import gradio as gr
-import torch
 # Load the model and tokenizer
-model_name = "meta-llama/Llama-3.1-8B"  # Replace with the desired LLaMA model
-tokenizer = LlamaTokenizer.from_pretrained(model_name)
-model = LlamaForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
-# Define the response generation function
-def generate_response(prompt, max_length=100):
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")  # Use CUDA if available
-    outputs = model.generate(inputs['input_ids'], max_length=max_length, temperature=0.7)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
-# Create the Gradio interface
-interface = gr.Interface(
     fn=generate_response,
-    inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
-    outputs="text",
-    title="LLaMA Chatbot",
-    description="A chatbot powered by LLaMA. Enter a prompt and get a response!",
 )
 # Launch the app
-interface.launch()

+import torch
 from transformers import LlamaForCausalLM, LlamaTokenizer
 import gradio as gr
 # Load the model and tokenizer
+MODEL_NAME = "meta-llama/Llama-2-8b-hf"  # Update this if using a custom LLaMA model
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+print("Loading model...")
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)
+model = LlamaForCausalLM.from_pretrained(
+    MODEL_NAME,
+    torch_dtype=torch.float16,  # Use float16 for better performance
+    device_map="auto"  # Automatically load onto available GPU
+)
+# Define a function for generating responses
+def generate_response(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(DEVICE)
+    with torch.no_grad():
+        outputs = model.generate(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            max_length=512,
+            temperature=0.7,  # Adjust creativity level
+            top_p=0.95,      # Top-p sampling
+            num_return_sequences=1
+        )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
+# Gradio UI
+iface = gr.Interface(
     fn=generate_response,
+    inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here..."),
+    outputs=gr.Textbox(label="LLaMA Response"),
+    title="LLaMA 3.1 8B Chatbot",
+    description="An interactive demo of the LLaMA 3.1 8B model using Hugging Face Spaces."
 )
 # Launch the app
+if __name__ == "__main__":
+    iface.launch()