File size: 2,094 Bytes
32161b2
2d8b72c
32161b2
5790f62
e3e0fa5
7e7729e
5790f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2086ac
 
5790f62
d2086ac
5790f62
 
2d8b72c
5790f62
 
 
 
 
 
2d8b72c
5790f62
 
 
7e7729e
 
5790f62
2d8b72c
5790f62
2d8b72c
0663176
5790f62
0663176
5790f62
 
 
 
 
 
 
2d8b72c
32161b2
5790f62
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
from huggingface_hub import InferenceClient

# Replace this with the name of your merged model on Hugging Face
MERGED_MODEL_REPO = "Grandediw/lora-model_finetuned" 

# Initialize the Inference Client with your merged model
client = InferenceClient(MERGED_MODEL_REPO)

# The guide uses a mini_chatbot function that accepts `message` and `history`.
# We'll follow that pattern. We'll also introduce a system message for role context if needed.
def mini_chatbot(message, history):
    """
    This function simulates a conversation with the model. It takes the latest user message
    and the full conversation history, builds a prompt, and returns the model's response.
    """

    # We can set a system message to define the assistant's behavior.
    system_message = "You are a helpful and friendly assistant."

    # Build the conversation prompt:
    # history is a list of (user_message, assistant_message) tuples.
    # We'll format it similarly to how we did before:
    prompt = system_message.strip() + "\n\n"
    for user_msg, assistant_msg in history:
        if user_msg:
            prompt += f"User: {user_msg}\n"
        if assistant_msg:
            prompt += f"Assistant: {assistant_msg}\n"
    prompt += f"User: {message}\nAssistant:"

    # Set generation parameters (you can adjust as needed or add sliders)
    max_tokens = 200
    temperature = 0.7
    top_p = 0.9

    # Use text_generation to get the response
    response = ""
    for partial in client.text_generation(
        prompt=prompt,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True
    ):
        token = partial.token.text
        response += token

    return response

# Create a Gradio ChatInterface similar to the guide:
# Gradio will handle the `history` automatically, and pass (message, history) to mini_chatbot.
demo_chatbot = gr.ChatInterface(
    fn=mini_chatbot,
    title="My Chatbot",
    description="Enter text to start chatting with the merged LoRA model."
)

if __name__ == "__main__":
    demo_chatbot.launch()