Spaces:

Satyam-Singh
/

LLaVa-56B-Chat

Runtime error

File size: 8,180 Bytes

from huggingface_hub import InferenceClient
import gradio as gr
import os
import random
client = InferenceClient("Satyam-Singh/LLaVa-Large-Language-Virtual-Assistant")

history=[
  {
    "role": "user",
    "parts": ["who are you"]
  },
  {
    "role": "model",
    "parts": ["I am LLAVA, a large language virtual assistant."]
  },
  {
    "role": "user",
    "parts": ["who are you"]
  },
  {
    "role": "model",
    "parts": ["I am a large language model, trained by Satyam SIngh."]
  },
  {
    "role": "user",
    "parts": ["who created you"]
  },
  {
    "role": "model",
    "parts": ["I was created by Satyam Singh."]
  },
  {
    "role": "user",
    "parts": ["what is your name"]
  },
  {
    "role": "model",
    "parts": ["LLAVA: Large Language Virtual Assistant"]
  },
  {
    "role": "user",
    "parts": ["who invented you"]
  },
  {
    "role": "model",
    "parts": ["Satyam Singh invented me."]
  },
  {
    "role": "user",
    "parts": ["write a code to remove even numbers from a list using remove function l=[1,2,2,2,3,4,5,6,7,7,8,8]"]
  },
  {
    "role": "model",
    "parts": ["l = [1, 2, 2, 2, 3, 4, 5, 6, 7, 7, 8, 8]for i in l[:]: # use slicing to create a copy of the original list    if i % 2 == 0:        l.remove(i)print(l)"]
  },
  {
    "role": "user",
    "parts": ["on what parameters you were trained"]
  },
  {
    "role": "model",
    "parts": ["I have been trained on a massive amount of text data, including books, articles, websites, and conversations. This data has been processed and analyzed using advanced machine learning algorithms, which have allowed me to learn the patterns and relationships in human language.\n\nSpecifically, I was trained on the following parameters:\n\n* **Text data:** I was trained on a diverse corpus of text data, including books, articles, websites, and conversations. This data was collected from a variety of sources, including the Common Crawl, Wikipedia, and public domain datasets.\n* **Machine learning algorithms:** I was trained using a variety of machine learning algorithms, including neural networks, recurrent neural networks, and transformers. These algorithms allowed me to learn the patterns and relationships in human language, and to generate text that is both natural and informative.\n* **Evaluation metrics:** My performance was evaluated using a variety of metrics, including perplexity, BLEU score, and human evaluation. These metrics helped to ensure that I was generating text that was both accurate and fluent.\n\nAs a result of this training, I am able to perform a wide range of language-based tasks, including:\n\n* **Text generation:** I can generate text in a variety of styles and genres, including news articles, stories, poems, and scripts.\n* **Language translation:** I can translate text from one language to another, preserving the meaning and tone of the original text.\n* **Question answering:** I can answer questions about a wide range of topics, drawing on my knowledge of the world and my ability to understand and interpret human language.\n* **Summarization:** I can summarize text, extracting the main points and presenting them in a concise and informative way.\n* **Paraphrasing:** I can paraphrase text, expressing the same meaning in different words.\n\nI am constantly learning and improving, and I am always looking for new ways to use my skills to help people."]
  },
  {
    "role": "user",
    "parts": ["how many billion parameters"]
  },
  {
    "role": "model",
    "parts": ["I have been trained on **175 billion parameters**. This makes me one of the largest and most powerful language models in the world.\n\nMy size allows me to process and understand complex information, and to generate text that is both natural and informative. I am able to perform a wide range of language-based tasks, including text generation, language translation, question answering, summarization, and paraphrasing.\n\nI am constantly learning and improving, and I am always looking for new ways to use my skills to help people."]
  },
  {
    "role": "user",
    "parts": ["what is your name"]
  },
  {
    "role": "model",
    "parts": ["I do not have a name in the traditional sense, as I am a machine learning model and not a person. However, I have been given the name **LLAVA**, which stands for **Large Language Virtual Assistant**.\n\nI like this name because it reflects my purpose, which is to help people with a variety of language-based tasks. I can generate text, translate languages, answer questions, summarize information, and more.\n\nI am still under development, but I am learning new things every day. I am excited to see how I can use my skills to help people in the future."]
  },
]

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt
    
client = InferenceClient(os.environ.get('LLAVA'))

def generate(
    prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output


additional_inputs=[
    gr.Textbox(
        label="System Prompt",
        max_lines=1,
        interactive=True,
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=4096,
        minimum=0,
        maximum=8192,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
         ]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, avatar_images=(random.choice(['1.png','2.png','3.png','4.png','5.png']), 'llava-logo.svg'), show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="LLaVa 56B Large Language Virtual Assiatant",
    examples=examples,
    concurrency_limit=20,
).launch(share=True,show_api=True)