Spaces:

Sugamdeol
/

Sug-gpt

Sleeping

App Files Files Community

Sugamdeol commited on Sep 13, 2024

Commit

1ed4c00

verified ·

1 Parent(s): 9d7d500

Create app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from huggingface_hub import InferenceClient
+import gradio as gr
+# Set up the client for Mistral model inference
+client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
+# Function to format the conversation history
+def format_prompt(message, history):
+    prompt = "<s>"  # Begin with the start token
+    for user_prompt, bot_response in history:
+        # Append each turn of user-bot interaction to the prompt
+        prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"  # Add the latest user message
+    return prompt
+# Text generation function with parameters
+def generate(
+    prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
+):
+    # Ensure temperature and top_p are correctly set
+    temperature = max(float(temperature), 1e-2)  # Prevent temperature going below 0.01
+    top_p = float(top_p)
+    # Keyword arguments for generation configuration
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=max_new_tokens,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        do_sample=True,
+        seed=42,  # Ensures results are reproducible
+    )
+    # Format the prompt with the user's message and history
+    formatted_prompt = format_prompt(prompt, history)
+    # Call the text generation endpoint
+    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""  # Initialize an empty string for the output
+    # Stream the response token by token
+    for response in stream:
+        output += response.token.text  # Append the generated tokens to output
+        yield output  # Yield partial output for real-time display
+    return output
+# Additional inputs (sliders) for controlling generation parameters
+additional_inputs=[
+    gr.Slider(
+        label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05,
+        interactive=True, info="Higher values produce more diverse outputs"
+    ),
+    gr.Slider(
+        label="Max new tokens", value=256, minimum=0, maximum=1048, step=64,
+        interactive=True, info="The maximum numbers of new tokens"
+    ),
+    gr.Slider(
+        label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1.0, step=0.05,
+        interactive=True, info="Higher values sample more low-probability tokens"
+    ),
+    gr.Slider(
+        label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05,
+        interactive=True, info="Penalize repeated tokens"
+    )
+]
+# Gradio Chat Interface for the chatbot
+gr.ChatInterface(
+    fn=generate,  # The generate function is called when the user submits input
+    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
+    additional_inputs=additional_inputs,  # Sliders for adjusting generation parameters
+    title="Mistral 7B v0.3 ChatGPT Clone",  # Title for the interface
+    description="A ChatGPT clone using Mistral 7B model. Adjust parameters to fine-tune the generation."
+).launch(show_api=False)  # Launch the interface without showing the API key