Spaces:

Satyam-Singh
/

LLaVa-56B-Chat

Build error

App Files Files Community

Satyam-Singh commited on Oct 21

Commit

75bf577

•

1 Parent(s): 47bb1e7

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -166

app.py CHANGED Viewed

@@ -1,178 +1,103 @@
-from huggingface_hub import InferenceClient
 import gradio as gr
-import os
-import random
-client = InferenceClient("Satyam-Singh/LLaVa-Large-Language-Virtual-Assistant")
-history=[
-  {
-    "role": "user",
-    "parts": ["who are you"]
-  },
-  {
-    "role": "model",
-    "parts": ["I am LLAVA, a large language virtual assistant."]
-  },
-  {
-    "role": "user",
-    "parts": ["who are you"]
-  },
-  {
-    "role": "model",
-    "parts": ["I am a large language model, trained by Satyam SIngh."]
-  },
-  {
-    "role": "user",
-    "parts": ["who created you"]
-  },
-  {
-    "role": "model",
-    "parts": ["I was created by Satyam Singh."]
-  },
-  {
-    "role": "user",
-    "parts": ["what is your name"]
-  },
-  {
-    "role": "model",
-    "parts": ["LLAVA: Large Language Virtual Assistant"]
-  },
-  {
-    "role": "user",
-    "parts": ["who invented you"]
-  },
-  {
-    "role": "model",
-    "parts": ["Satyam Singh invented me."]
-  },
-  {
-    "role": "user",
-    "parts": ["write a code to remove even numbers from a list using remove function l=[1,2,2,2,3,4,5,6,7,7,8,8]"]
-  },
-  {
-    "role": "model",
-    "parts": ["l = [1, 2, 2, 2, 3, 4, 5, 6, 7, 7, 8, 8]for i in l[:]: # use slicing to create a copy of the original list    if i % 2 == 0:        l.remove(i)print(l)"]
-  },
-  {
-    "role": "user",
-    "parts": ["on what parameters you were trained"]
-  },
-  {
-    "role": "model",
-    "parts": ["I have been trained on a massive amount of text data, including books, articles, websites, and conversations. This data has been processed and analyzed using advanced machine learning algorithms, which have allowed me to learn the patterns and relationships in human language.\n\nSpecifically, I was trained on the following parameters:\n\n* **Text data:** I was trained on a diverse corpus of text data, including books, articles, websites, and conversations. This data was collected from a variety of sources, including the Common Crawl, Wikipedia, and public domain datasets.\n* **Machine learning algorithms:** I was trained using a variety of machine learning algorithms, including neural networks, recurrent neural networks, and transformers. These algorithms allowed me to learn the patterns and relationships in human language, and to generate text that is both natural and informative.\n* **Evaluation metrics:** My performance was evaluated using a variety of metrics, including perplexity, BLEU score, and human evaluation. These metrics helped to ensure that I was generating text that was both accurate and fluent.\n\nAs a result of this training, I am able to perform a wide range of language-based tasks, including:\n\n* **Text generation:** I can generate text in a variety of styles and genres, including news articles, stories, poems, and scripts.\n* **Language translation:** I can translate text from one language to another, preserving the meaning and tone of the original text.\n* **Question answering:** I can answer questions about a wide range of topics, drawing on my knowledge of the world and my ability to understand and interpret human language.\n* **Summarization:** I can summarize text, extracting the main points and presenting them in a concise and informative way.\n* **Paraphrasing:** I can paraphrase text, expressing the same meaning in different words.\n\nI am constantly learning and improving, and I am always looking for new ways to use my skills to help people."]
-  },
-  {
-    "role": "user",
-    "parts": ["how many billion parameters"]
-  },
-  {
-    "role": "model",
-    "parts": ["I have been trained on **175 billion parameters**. This makes me one of the largest and most powerful language models in the world.\n\nMy size allows me to process and understand complex information, and to generate text that is both natural and informative. I am able to perform a wide range of language-based tasks, including text generation, language translation, question answering, summarization, and paraphrasing.\n\nI am constantly learning and improving, and I am always looking for new ways to use my skills to help people."]
-  },
-  {
-    "role": "user",
-    "parts": ["what is your name"]
-  },
-  {
-    "role": "model",
-    "parts": ["I do not have a name in the traditional sense, as I am a machine learning model and not a person. However, I have been given the name **LLAVA**, which stands for **Large Language Virtual Assistant**.\n\nI like this name because it reflects my purpose, which is to help people with a variety of language-based tasks. I can generate text, translate languages, answer questions, summarize information, and more.\n\nI am still under development, but I am learning new things every day. I am excited to see how I can use my skills to help people in the future."]
-  },
-]
-def format_prompt(message, history):
-  prompt = "<s>"
-  for user_prompt, bot_response in history:
-    prompt += f"[INST] {user_prompt} [/INST]"
-    prompt += f" {bot_response}</s> "
-  prompt += f"[INST] {message} [/INST]"
-  return prompt
-client = InferenceClient(os.environ.get('LLAVA'))
-def generate(
-    prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
-):
-    temperature = float(temperature)
-    if temperature < 1e-2:
-        temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=42,
-    )
-    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
-    return output
-additional_inputs=[
-    gr.Textbox(
-        label="System Prompt",
-        max_lines=1,
-        interactive=True,
-    ),
-    gr.Slider(
-        label="Temperature",
-        value=0.9,
-        minimum=0.0,
-        maximum=1.0,
-        step=0.05,
-        interactive=True,
-        info="Higher values produce more diverse outputs",
-    ),
-    gr.Slider(
-        label="Max new tokens",
-        value=4096,
-        minimum=0,
-        maximum=8192,
-        step=64,
-        interactive=True,
-        info="The maximum numbers of new tokens",
-    ),
-    gr.Slider(
-        label="Top-p (nucleus sampling)",
-        value=0.90,
-        minimum=0.0,
-        maximum=1,
-        step=0.05,
-        interactive=True,
-        info="Higher values sample more low-probability tokens",
-    ),
-    gr.Slider(
-        label="Repetition penalty",
-        value=1.2,
-        minimum=1.0,
-        maximum=2.0,
-        step=0.05,
-        interactive=True,
-        info="Penalize repeated tokens",
     )
-]
-examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
-          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
-          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
-          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
-          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
-          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
-         ]
-gr.ChatInterface(
-    fn=generate,
-    chatbot=gr.Chatbot(show_label=False, avatar_images=(random.choice(['1.png','2.png','3.png','4.png','5.png']), 'llava-logo.svg'), show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
-    additional_inputs=additional_inputs,
-    title="LLaVa 56B Large Language Virtual Assiatant",
-    examples=examples,
-    concurrency_limit=20,
-).launch(share=True,show_api=True)

 import gradio as gr
+import pyttsx3  # Text-to-speech
+import speech_recognition as sr  # Speech-to-text
+from llama_cpp import Llama
+model = "bartowski/Llama-3.2-1B-Instruct-GGUF"
+llm = Llama.from_pretrained(
+    repo_id=model,
+    filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
+    verbose=True,
+    use_mmap=True,
+    use_mlock=True,
+    n_threads=4,
+    n_threads_batch=4,
+    n_ctx=2000,
+)
+# Initialize TTS engine
+tts_engine = pyttsx3.init()
+# Speech-to-text function
+def speech_to_text():
+    recognizer = sr.Recognizer()
+    with sr.Microphone() as source:
+        print("Listening...")
+        audio = recognizer.listen(source)
+    try:
+        text = recognizer.recognize_google(audio)
+        print(f"You said: {text}")
+        return text
+    except sr.UnknownValueError:
+        return "Sorry, I did not understand that."
+    except sr.RequestError as e:
+        return f"Could not request results; {e}"
+# Text-to-speech function
+def text_to_speech(response_text):
+    tts_engine.say(response_text)
+    tts_engine.runAndWait()
+# Main AI response function
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+):
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    response = ""
+    completion = llm.create_chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p
     )
+    for message in completion:
+        delta = message['choices'][0]['delta']
+        if 'content' in delta:
+            response += delta['content']
+            yield response
+    # Speak the AI response
+    text_to_speech(response)
+# Gradio UI with added microphone component
+demo = gr.Interface(
+    fn=respond,
+    inputs=[
+        gr.Microphone(streaming=True, label="Speak your question"),
+        gr.Textbox(
+            value="You are a helpful assistant.",
+            label="System message",
+        ),
+        gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
+    outputs=gr.Textbox(label="Response"),
+    live=True,
+    description=model,
+)
+if __name__ == "__main__":
+    demo.launch()