Spaces:

Grandediw
/

Test

Sleeping

App Files Files Community

Grandediw commited on Dec 6, 2024

Commit

5790f62

verified ·

1 Parent(s): 0663176

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -51

app.py CHANGED Viewed

@@ -1,68 +1,60 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-client = InferenceClient("Grandediw/lora_model")
-def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Convert tuple-based history to messages if needed
-    messages = [{"role": "system", "content": system_message}]
     for user_msg, assistant_msg in history:
         if user_msg:
-            messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    messages.append({"role": "user", "content": message})
     response = ""
-    for partial in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        token = partial.choices[0].delta.content
         response += token
-        yield response
-with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
-    gr.Markdown(
-        """
-        # LORA Chat Assistant
-        Welcome! This is a demo of a LORA-based Chat Assistant.
-        Start by entering your prompt below.
-        """
-    )
-    with gr.Row():
-        # System message and other parameters
-        with gr.Column():
-            system_message = gr.Textbox(
-                value="You are a friendly Chatbot.",
-                label="Initial Behavior (System Message)",
-                lines=3,
-                placeholder="Describe how the assistant should behave..."
-            )
-            max_tokens = gr.Slider(
-                minimum=1, maximum=2048, value=512, step=1,
-                label="Max new tokens"
-            )
-            temperature = gr.Slider(
-                minimum=0.1, maximum=4.0, value=0.7, step=0.1,
-                label="Temperature"
-            )
-            top_p = gr.Slider(
-                minimum=0.1, maximum=1.0, value=0.95, step=0.05,
-                label="Top-p (nucleus sampling)"
-            )
-        # Create the chat interface using tuple format
-        # Note: `type='tuple'` preserves the (user, assistant) tuple format.
-        chat = gr.ChatInterface(
-            fn=respond,
-            additional_inputs=[system_message, max_tokens, temperature, top_p],
-            type='tuples'
-        )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+# Replace this with the name of your merged model on Hugging Face
+MERGED_MODEL_REPO = "Grandediw/lora-model"
+# Initialize the Inference Client with your merged model
+client = InferenceClient(MERGED_MODEL_REPO)
+# The guide uses a mini_chatbot function that accepts `message` and `history`.
+# We'll follow that pattern. We'll also introduce a system message for role context if needed.
+def mini_chatbot(message, history):
+    """
+    This function simulates a conversation with the model. It takes the latest user message
+    and the full conversation history, builds a prompt, and returns the model's response.
+    """
+    # We can set a system message to define the assistant's behavior.
+    system_message = "You are a helpful and friendly assistant."
+    # Build the conversation prompt:
+    # history is a list of (user_message, assistant_message) tuples.
+    # We'll format it similarly to how we did before:
+    prompt = system_message.strip() + "\n\n"
     for user_msg, assistant_msg in history:
         if user_msg:
+            prompt += f"User: {user_msg}\n"
         if assistant_msg:
+            prompt += f"Assistant: {assistant_msg}\n"
+    prompt += f"User: {message}\nAssistant:"
+    # Set generation parameters (you can adjust as needed or add sliders)
+    max_tokens = 200
+    temperature = 0.7
+    top_p = 0.9
+    # Use text_generation to get the response
     response = ""
+    for partial in client.text_generation(
+        prompt=prompt,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        stream=True
     ):
+        token = partial.token.text
         response += token
+    return response
+# Create a Gradio ChatInterface similar to the guide:
+# Gradio will handle the `history` automatically, and pass (message, history) to mini_chatbot.
+demo_chatbot = gr.ChatInterface(
+    fn=mini_chatbot,
+    title="My Chatbot",
+    description="Enter text to start chatting with the merged LoRA model."
+)
 if __name__ == "__main__":
+    demo_chatbot.launch()