Spaces:

lamhieu
/

ghost-8b-beta-8k

Paused

File size: 11,754 Bytes

import os
from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

MAX_MAX_NEW_TOKENS = 4096
DEFAULT_MAX_NEW_TOKENS = 1536
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))

DESCRIPTION = """\
# Playground with Ghost 8B Beta (p)

**Ghost 8B Beta** is a large language model developed with goals that include excellent multilingual support, superior knowledge capabilities, and cost-effectiveness. The model comes in two context length versions, 8k and 128k, along with multilingual function tools support by default. 

The languages supported are 🇺🇸 English, 🇫🇷 French, 🇮🇹 Italian, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇩🇪 German, 🇻🇳 Vietnamese, 🇰🇷 Korean and 🇨🇳 Chinese.

📋 Note: current model version is "disl-0x5-8k" (10 Jul 2024), context length 8k and current status is "moderating / previewing". For detailed information about the model, see [here](https://ghost-x.org/docs/models/ghost-8b-beta/). Try to experience it the way you want!
"""


PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👻 Ghost 8B Beta</h1>
   <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask and share whatever you want ~</p>
</div>
"""

LICENSE = """
<p/>

---
Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
"""

EXAMPLES = [
    [
        "Explain the concept of quantum entanglement and its implications for quantum computing."
    ],
    ["Comment le mouvement des Lumières a-t-il influencé la Révolution française ?"],
    ["Quale fu l'impatto del Rinascimento italiano sull'arte e la cultura europea?"],
    [
        "Spiega il funzionamento e le applicazioni della spettroscopia Raman in chimica analitica."
    ],
    [
        "Explique el teorema de incompletitud de Gödel y sus implicaciones en la lógica matemática."
    ],
    [
        "Descreva o processo de meiose celular e sua importância na variabilidade genética."
    ],
    [
        "Giải thích nguyên lý hoạt động của máy học sâu (deep learning) trong trí tuệ nhân tạo và ứng dụng của nó trong xử lý ngôn ngữ tự nhiên."
    ],
    ["조선 시대의 신분제도가 한국 사회에 미친 영향을 분석하시오."],
    ["分析丝绸之路对中国古代文化交流和经济发展的影响。"],
    [
        "Create a Python function that takes a list of integers and returns the list sorted in ascending order without using the built-in sort or sorted functions."
    ],
    [
        "Écrivez une fonction en C++ qui trouve le plus long sous-tableau contigu avec une somme égale à zéro."
    ],
    [
        "Scrivi una funzione in Java che calcola il fattoriale di un numero utilizzando la ricorsione."
    ],
    [
        "Desarrolla una función en JavaScript que determine si una cadena de texto es un palíndromo, ignorando espacios y signos de puntuación."
    ],
    ["Implemente uma função em C# que verifique se uma matriz quadrada é simétrica."],
    [
        "Schreiben Sie eine Funktion in Swift, die eine gegebene Zeichenfolge in umgekehrter Reihenfolge zurückgibt, ohne integrierte Funktionen zu verwenden."
    ],
    [
        "Viết một hàm trong PHP để tìm tất cả các số nguyên tố trong một khoảng cho trước."
    ],
    [
        "파이썬을 사용하여 주어진 이진 트리가 이진 탐색 트리인지 확인하는 함수를 작성하십시오."
    ],
    [
        "用 Go 语言编写一个函数，计算给定字符串中每个字符出现的次数，并返回一个包含字符及其出现次数的映射。"
    ],
    [
        "Can you help me design a detailed project plan for developing a machine learning model for predicting stock prices?"
    ],
    [
        "Pouvez-vous m'aider à organiser un emploi du temps hebdomadaire pour maximiser la productivité de mon équipe de développement logiciel?"
    ],
    [
        "Puoi aiutarmi a creare un piano di sviluppo per un'applicazione mobile che gestisce le prenotazioni di ristoranti?"
    ],
    [
        "¿Podrías ayudarme a elaborar un plan detallado para la implementación de un sistema de gestión de contenido (CMS) en una empresa mediana?"
    ],
    [
        "Você pode me ajudar a planejar uma estratégia de desenvolvimento para um sistema de comércio eletrônico escalável?"
    ],
    [
        "Können Sie mir helfen, einen detaillierten Zeitplan für die Implementierung eines neuen ERP-Systems in unserem Unternehmen zu erstellen?"
    ],
    [
        "Bạn có thể giúp tôi xây dựng một kế hoạch phát triển chi tiết cho dự án xây dựng hệ thống quản lý chuỗi cung ứng không?"
    ],
    [
        "신경망 기반 이미지 인식 모델 개발을 위한 세부 프로젝트 계획을 세우는 데 도움을 줄 수 있나요?"
    ],
    ["你能帮我制定一个详细的开发计划，用于创建一个基于区块链的分布式账本系统吗？"],
    [
        "Prove that the sum of the squares of any two sides of a right triangle is equal to the square of the hypotenuse."
    ],
    [
        "Calculez la force gravitationnelle entre deux masses de 10 kg chacune séparées par une distance de 1 mètre."
    ],
    [
        "Determina la formula molecolare di un composto che contiene il 40% di carbonio, il 6.67% di idrogeno e il 53.33% di ossigeno in massa."
    ],
    [
        "Explica la teoría del ciclo económico de Schumpeter y cómo se aplica a la economía moderna."
    ],
    [
        "Calcule a energia potencial gravitacional de um objeto de 5 kg a uma altura de 10 metros acima do solo (g = 9,8 m/s²)."
    ],
    [
        "Beweisen Sie, dass jede Primzahl der Form 4k+1 als Summe zweier Quadrate geschrieben werden kann."
    ],
    [
        "Tính nồng độ mol của dung dịch H₂SO₄ khi hoà tan 98 gam H₂SO₄ vào nước để được 1 lít dung dịch."
    ],
    ["케인스 경제학의 핵심 개념과 그것이 현대 경제 정책에 미치는 영향을 설명하십시오."],
    ["计算一个质量为2 kg的物体在3米高处的重力势能（g = 9.8 m/s²）。"],
    [
        'Identify the author of a novel that features a dystopian society where "Big Brother" watches over its citizens and the protagonist works for the Ministry of Truth.'
    ],
    [
        "Quel est le seul mammifère capable de voler activement, souvent associé à la nuit et capable d'écholocalisation?"
    ],
    [
        "Qual è l'opera letteraria italiana che narra il viaggio immaginario di un poeta attraverso Inferno, Purgatorio e Paradiso, guidato da Virgilio e Beatrice?"
    ],
    [
        "¿Qué insecto es conocido por su organización social compleja, su capacidad para producir miel y su comunicación mediante la danza?"
    ],
    [
        "Qual é o fenômeno atmosférico que ocorre quando uma massa de ar quente se encontra com uma massa de ar frio, resultando em uma violenta tempestade giratória?"
    ],
    [
        "Welches literarische Werk beschreibt die Geschichte eines jungen Mädchens, das durch einen Kaninchenbau in eine fantastische Welt voller skurriler Charaktere fällt?"
    ],
    [
        "Động vật nào có thể tái sinh toàn bộ cơ thể từ một mảnh nhỏ của chính nó, thường sống dưới nước và có thể có nhiều xúc tu?"
    ],
    [
        "어떤 자연 현상은 태양빛이 대기 중의 물방울에 반사되고 굴절되어 발생하며, 하늘에 나타나는 여러 색깔의 아치 형태를 띠나요?"
    ],
    ["这部文学作品讲述了一位绅士和他的侍从的冒险故事，他们在"],
]

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"


if torch.cuda.is_available():
    model_id = "lamhieu/ghost-8b-beta-disl-0x5-8k"
    model_tk = os.getenv("HF_TOKEN", None)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        trust_remote_code=True,
        token=model_tk,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        trust_remote_code=True,
        token=model_tk,
    )


@spaces.GPU(duration=120)
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
    system_prompt: str,
    max_new_tokens: int = 1536,
    temperature: float = 0.4,
    top_p: float = 0.95,
    top_k: int = 50,
    repetition_penalty: float = 1.0,
) -> Iterator[str]:
    conversation = []
    if system_prompt:
        conversation.append({"role": "system", "content": system_prompt})
    for user, assistant in chat_history:
        conversation.extend(
            [
                {"role": "user", "content": user},
                {"role": "assistant", "content": assistant},
            ]
        )
    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(
        conversation, add_generation_prompt=True, return_tensors="pt"
    )
    input_ids = input_ids.to(model.device)
    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning(
            f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens."
        )

    streamer = TextIteratorStreamer(
        tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
    )
    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)


chatbot = gr.Chatbot(height=500, placeholder=PLACEHOLDER, label="Ghost 8B Beta")

chat_interface = gr.ChatInterface(
    fn=generate,
    chatbot=chatbot,
    fill_height=True,
    additional_inputs=[
        gr.Textbox(label="System prompt", lines=6),
        gr.Slider(
            label="Max new tokens",
            minimum=1,
            maximum=MAX_MAX_NEW_TOKENS,
            step=1,
            value=DEFAULT_MAX_NEW_TOKENS,
        ),
        gr.Slider(
            label="Temperature",
            minimum=0.1,
            maximum=2.0,
            step=0.1,
            value=0.4,
        ),
        gr.Slider(
            label="Top-p (nucleus sampling)",
            minimum=0.05,
            maximum=1.0,
            step=0.05,
            value=0.95,
        ),
        gr.Slider(
            label="Top-k",
            minimum=1,
            maximum=100,
            step=1,
            value=50,
        ),
        gr.Slider(
            label="Repetition penalty",
            minimum=1.0,
            maximum=2.0,
            step=0.05,
            value=1.0,
        ),
    ],
    stop_btn=None,
    cache_examples=False,
    examples=EXAMPLES,
    examples_per_page=9,
)

with gr.Blocks(fill_height=True, css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    chat_interface.render()
    gr.Markdown(LICENSE)

if __name__ == "__main__":
    demo.queue(max_size=20).launch(share=True)
    # demo.launch(share=True)