Grandediw commited on
Commit
5790f62
·
verified ·
1 Parent(s): 0663176

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -51
app.py CHANGED
@@ -1,68 +1,60 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- client = InferenceClient("Grandediw/lora_model")
 
5
 
6
- def respond(message, history, system_message, max_tokens, temperature, top_p):
7
- # Convert tuple-based history to messages if needed
8
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  for user_msg, assistant_msg in history:
10
  if user_msg:
11
- messages.append({"role": "user", "content": user_msg})
12
  if assistant_msg:
13
- messages.append({"role": "assistant", "content": assistant_msg})
14
- messages.append({"role": "user", "content": message})
15
 
 
 
 
 
 
 
16
  response = ""
17
- for partial in client.chat_completion(
18
- messages,
19
- max_tokens=max_tokens,
20
- stream=True,
21
  temperature=temperature,
22
  top_p=top_p,
 
23
  ):
24
- token = partial.choices[0].delta.content
25
  response += token
26
- yield response
27
-
28
- with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
29
- gr.Markdown(
30
- """
31
- # LORA Chat Assistant
32
- Welcome! This is a demo of a LORA-based Chat Assistant.
33
- Start by entering your prompt below.
34
- """
35
- )
36
 
37
- with gr.Row():
38
- # System message and other parameters
39
- with gr.Column():
40
- system_message = gr.Textbox(
41
- value="You are a friendly Chatbot.",
42
- label="Initial Behavior (System Message)",
43
- lines=3,
44
- placeholder="Describe how the assistant should behave..."
45
- )
46
- max_tokens = gr.Slider(
47
- minimum=1, maximum=2048, value=512, step=1,
48
- label="Max new tokens"
49
- )
50
- temperature = gr.Slider(
51
- minimum=0.1, maximum=4.0, value=0.7, step=0.1,
52
- label="Temperature"
53
- )
54
- top_p = gr.Slider(
55
- minimum=0.1, maximum=1.0, value=0.95, step=0.05,
56
- label="Top-p (nucleus sampling)"
57
- )
58
 
59
- # Create the chat interface using tuple format
60
- # Note: `type='tuple'` preserves the (user, assistant) tuple format.
61
- chat = gr.ChatInterface(
62
- fn=respond,
63
- additional_inputs=[system_message, max_tokens, temperature, top_p],
64
- type='tuples'
65
- )
66
 
67
  if __name__ == "__main__":
68
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ # Replace this with the name of your merged model on Hugging Face
5
+ MERGED_MODEL_REPO = "Grandediw/lora-model"
6
 
7
+ # Initialize the Inference Client with your merged model
8
+ client = InferenceClient(MERGED_MODEL_REPO)
9
+
10
+ # The guide uses a mini_chatbot function that accepts `message` and `history`.
11
+ # We'll follow that pattern. We'll also introduce a system message for role context if needed.
12
+ def mini_chatbot(message, history):
13
+ """
14
+ This function simulates a conversation with the model. It takes the latest user message
15
+ and the full conversation history, builds a prompt, and returns the model's response.
16
+ """
17
+
18
+ # We can set a system message to define the assistant's behavior.
19
+ system_message = "You are a helpful and friendly assistant."
20
+
21
+ # Build the conversation prompt:
22
+ # history is a list of (user_message, assistant_message) tuples.
23
+ # We'll format it similarly to how we did before:
24
+ prompt = system_message.strip() + "\n\n"
25
  for user_msg, assistant_msg in history:
26
  if user_msg:
27
+ prompt += f"User: {user_msg}\n"
28
  if assistant_msg:
29
+ prompt += f"Assistant: {assistant_msg}\n"
30
+ prompt += f"User: {message}\nAssistant:"
31
 
32
+ # Set generation parameters (you can adjust as needed or add sliders)
33
+ max_tokens = 200
34
+ temperature = 0.7
35
+ top_p = 0.9
36
+
37
+ # Use text_generation to get the response
38
  response = ""
39
+ for partial in client.text_generation(
40
+ prompt=prompt,
41
+ max_new_tokens=max_tokens,
 
42
  temperature=temperature,
43
  top_p=top_p,
44
+ stream=True
45
  ):
46
+ token = partial.token.text
47
  response += token
 
 
 
 
 
 
 
 
 
 
48
 
49
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # Create a Gradio ChatInterface similar to the guide:
52
+ # Gradio will handle the `history` automatically, and pass (message, history) to mini_chatbot.
53
+ demo_chatbot = gr.ChatInterface(
54
+ fn=mini_chatbot,
55
+ title="My Chatbot",
56
+ description="Enter text to start chatting with the merged LoRA model."
57
+ )
58
 
59
  if __name__ == "__main__":
60
+ demo_chatbot.launch()