import os from typing import Iterator import requests import gradio as gr MAX_MAX_NEW_TOKENS = 2048 DEFAULT_MAX_NEW_TOKENS = 1024 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) DESCRIPTION = """\ # MIQU-70B ## Important Legal Notice Regarding the Use of Leaked Mistral Medium Model Weights Please be advised that hosting and using the Mistral Medium model with weights that have been leaked online may constitute a violation of intellectual property rights and licensing agreements. It is essential to understand that such actions can lead to serious legal consequences, including but not limited to lawsuits, fines, and other penalties. Before proceeding with the use of these model weights, especially for academic purposes, it is strongly recommended to seek legal counsel to ensure compliance with all relevant laws and regulations. Additionally, consider reaching out to the rightful owners or licensors of the Mistral Medium model for permission or guidance on legal and ethical usage. By using these weights, you acknowledge and accept the potential legal risks involved and agree to be solely responsible for any resulting legal actions taken against you. It is advisable to explore alternative, legally compliant options for your academic research and projects. """ LICENSE = """Pirate license """ def generate( message: str, chat_history: list[tuple[str, str]], system_prompt: str, max_new_tokens: int = 1024, temperature: float = 0.6, top_p: float = 0.9, top_k: int = 50, repetition_penalty: float = 1.2, ) -> Iterator[str]: conversation = [] if system_prompt: conversation.append({"role": "system", "content": system_prompt}) for user, assistant in chat_history: conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) conversation.append({"role": "user", "content": message}) r = requests.post('https://social-warthog-fleet.ngrok-free.app/api/chat', json={ "model": "miqu", "messages":conversation, "stream": False, "options":{"num_predict": 128, "temperature":1, "top_p":0.95} }) resp = r.json() print(resp) outputs = [resp['message']['content']] yield "".join(outputs) chat_interface = gr.ChatInterface( fn=generate, additional_inputs=[ gr.Textbox(label="System prompt", lines=6), gr.Slider( label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, ), gr.Slider( label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6, ), gr.Slider( label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9, ), gr.Slider( label="Top-k", minimum=1, maximum=1000, step=1, value=50, ), gr.Slider( label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2, ), ], stop_btn=None, examples=[ ["Hello there! How are you doing?"], ["Can you explain briefly to me what is the Python programming language?"], ["Explain the plot of Cinderella in a sentence."], ["How many hours does it take a man to eat a Helicopter?"], ["Write a 100-word article on 'Benefits of Open-Source in AI research'"], ], ) with gr.Blocks(css="style.css") as demo: gr.Markdown(DESCRIPTION) gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button") chat_interface.render() gr.Markdown(LICENSE) if __name__ == "__main__": demo.queue(max_size=20).launch()