Spaces:

AiCoderv2
/

swift-ai-22

Sleeping

AiCoderv2 commited on 19 days ago

Commit

819ac9a

verified ·

1 Parent(s): da33207

Deploy Gradio app with multiple files

Files changed (5) hide show

app.py ADDED Viewed

+import gradio as gr
+from models import load_model, generate_response
+from utils import format_history
+from config import MODEL_NAME, MAX_LENGTH, TEMPERATURE
+def chat_response(message, history):
+    # Format history for the model
+    formatted_history = format_history(history)
+    # Generate response using the model
+    response = generate_response(message, formatted_history, max_length=MAX_LENGTH, temperature=TEMPERATURE)
+    return response
+with gr.Blocks() as demo:
+    gr.HTML("""
+    <div style="text-align: center; padding: 10px;">
+        <h1>AI Chatbot for Chat and Code</h1>
+        <p>Powered by <a href="https://huggingface.co/microsoft/Phi-2">microsoft/Phi-2</a></p>
+        <p><a href="https://huggingface.co/spaces/akhaliq/anycoder">Built with anycoder</a></p>
+    </div>
+    """)
+    chatbot = gr.ChatInterface(
+        fn=chat_response,
+        title="Chat and Code Assistant",
+        description="Ask me anything about coding, chat, or general questions!",
+        examples=["Write a Python function to reverse a string", "Explain recursion", "Hello, how are you?"],
+        theme=gr.themes.Soft()
+    )
+if __name__ == "__main__":
+    demo.launch()

config.py ADDED Viewed

+MODEL_NAME = "microsoft/Phi-2"
+MAX_LENGTH = 512
+TEMPERATURE = 0.7

models.py ADDED Viewed

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from config import MODEL_NAME
+import spaces
+model = None
+tokenizer = None
+@spaces.GPU
+def load_model():
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+        model.eval()
+    return model, tokenizer
+@spaces.GPU
+def generate_response(message, history, max_length=512, temperature=0.7):
+    model, tokenizer = load_model()
+    # Prepare input
+    if history:
+        input_text = history + f"\nUser: {message}\nAssistant:"
+    else:
+        input_text = f"User: {message}\nAssistant:"
+    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=max_length,
+            temperature=temperature,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the assistant's response
+    if "Assistant:" in response:
+        response = response.split("Assistant:")[-1].strip()
+    return response

requirements.txt ADDED Viewed

+gradio>=4.0.0
+transformers>=4.30.0
+torch>=2.0.0
+accelerate>=0.20.0
+spaces>=0.15.0

utils.py ADDED Viewed

+def format_history(history):
+    """Format chat history for the model input."""
+    formatted = ""
+    for user_msg, assistant_msg in history:
+        formatted += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
+    return formatted.strip()