Spaces:

mikeee
/

chatglm2-6b-test

Runtime error

File size: 5,915 Bytes

9b6a4ab
 
8d030a2
 
9c042fd
 
4947e7b
407dc77
9c042fd
 
 
89cb869
8d030a2
de222eb
8d030a2
9b6a4ab
360d9e4
8d030a2
89cb869
 
 
 
 
 
 
 
 
 
8d030a2
 
 
89cb869
 
 
 
d7ec399
89cb869
 
 
 
 
 
 
 
634ed9b
7eb763b
634ed9b
89cb869
 
d7ec399
89cb869
60399ca
 
360d9e4
9c042fd
 
634ed9b
 
 
 
 
360d9e4
9c042fd
da75503
9c042fd
adb2ab9
9c042fd
360d9e4
7c0ecdf
407dc77
 
360d9e4
53c5ff4
634ed9b
 
 
 
 
 
 
 
 
 
53c5ff4
634ed9b
 
360d9e4
adb2ab9
 
53c5ff4
 
d6e2e66
adb2ab9
 
a904f8b
adb2ab9
7c0ecdf
634ed9b
7c0ecdf
407dc77
7c0ecdf
a904f8b
1e925eb
d6e2e66
1e925eb
7778ade
1e925eb
7778ade
d6e2e66
 
adb2ab9
dba1dfa
7eb763b
dba1dfa
adb2ab9
634ed9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c0ecdf
53c5ff4

"""Test various models."""
# pylint: disable=invalid-name, line-too-long,broad-exception-caught, protected-access
import os
import time
from pathlib import Path

import gradio as gr
import pendulum
import torch
from loguru import logger
from transformers import AutoModel, AutoTokenizer

# ruff: noqa: E402
# os.system("pip install --upgrade torch transformers sentencepiece scipy cpm_kernels accelerate bitsandbytes loguru")

# os.system("pip install torch transformers sentencepiece loguru")


# fix timezone in Linux
os.environ["TZ"] = "Asia/Shanghai"
try:
    time.tzset()  # type: ignore # pylint: disable=no-member
except Exception:
    # Windows
    logger.warning("Windows, cant run time.tzset()")

model_name = "THUDM/chatglm2-6b-int4"  # 3.9G

tokenizer = AutoTokenizer.from_pretrained(
    "THUDM/chatglm2-6b-int4", trust_remote_code=True
)

has_cuda = torch.cuda.is_available()
# has_cuda = False  # force cpu

logger.debug("load")
if has_cuda:
    if model_name.endswith("int4"):
        model = AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda()
    else:
        model = (
            AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda().half()
        )
else:
    model = (
        AutoModel.from_pretrained(model_name, trust_remote_code=True).float()
    )  # .float() .half().float(): must use float for cpu

model = model.eval()
logger.debug("done load")

# tokenizer = AutoTokenizer.from_pretrained("openchat/openchat_v2_w")
# model = AutoModelForCausalLM.from_pretrained("openchat/openchat_v2_w", load_in_8bit_fp32_cpu_offload=True, load_in_8bit=True)

# locate model file cache
cache_loc = Path("~/.cache/huggingface/hub").expanduser()
model_cache_path = [
    elm
    for elm in Path(cache_loc).rglob("*")
    if Path(model_name).name in elm.as_posix() and "pytorch_model.bin" in elm.as_posix()
]

logger.debug(f"{model_cache_path=}")

if model_cache_path:
    model_size_gb = model_cache_path[0].stat().st_size / 2**30
    logger.info(f"{model_name=} {model_size_gb=:.2f} GB")

def get_time():
    # return datetime.now().time()
    return pendulum.now().format('HH:mm:ss zz')

def respond(message, chat_history):
    """Gen a response."""
    message = message.strip()
    response, chat_history = model.chat(
        tokenizer,
        message,
        history=chat_history,
        temperature=0.7,
        repetition_penalty=1.2,
        max_length=128,
    )
    chat_history.append((message, response))
    return message, chat_history


theme = gr.themes.Soft(text_size="sm")
with gr.Blocks(theme=theme) as block:
    chatbot = gr.Chatbot()

    with gr.Column():
        with gr.Column(scale=12):
            msg = gr.Textbox()
        _ = """
        with gr.Column(scale=1, min_width=16):
            btn = gr.Button("Send")
        with gr.Column(scale=1, min_width=8):
            clear = gr.ClearButton([msg, chatbot])
        with gr.Column(scale=1, min_width=25):
            dt = gr.Textbox(label="Current time")
        # """
        with gr.Column(scale=1, min_width=100):
            with gr.Column():
                with gr.Column(scale=1, min_width=50):
                    btn = gr.Button("Send")
                with gr.Column(scale=1, min_width=50):
                    clear = gr.ClearButton([msg, chatbot])
            # with gr.Row():
            dt = gr.Textbox(label="Current time")
    # do not clear prompt
    msg.submit(respond, [msg, chatbot], [msg, chatbot])

    btn.click(lambda x, y: ("",) + respond(x, y)[1:], [msg, chatbot], [msg, chatbot])

    with gr.Accordion("Example inputs", open=True):
        etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
        examples = gr.Examples(
            examples=[
                ["Explain the plot of Cinderella in a sentence."],
                [
                    "How long does it take to become proficient in French, and what are the best methods for retaining information?"
                ],
                ["What are some common mistakes to avoid when writing code?"],
                ["Build a prompt to generate a beautiful portrait of a horse"],
                ["Suggest four metaphors to describe the benefits of AI"],
                ["Write a pop song about leaving home for the sandy beaches."],
                ["Write a summary demonstrating my ability to tame lions"],
                ["鲁迅和周树人什么关系"],
                ["从前有一头牛，这头牛后面有什么？"],
                ["正无穷大加一大于正无穷大吗？"],
                ["正无穷大加正无穷大大于正无穷大吗？"],
                ["-2的平方根等于什么"],
                ["树上有5只鸟，猎人开枪打死了一只。树上还有几只鸟？"],
                ["树上有11只鸟，猎人开枪打死了一只。树上还有几只鸟？提示：需考虑鸟可能受惊吓飞走。"],
                ["鲁迅和周树人什么关系 用英文回答"],
                ["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
                [f"{etext} 翻成中文，列出3个版本"],
                [f"{etext} \n 翻成中文，保留原意，但使用文学性的语言。不要写解释。列出3个版本"],
                ["js 判断一个数是不是质数"],
                ["js 实现python 的 range(10)"],
                ["js 实现python 的 [*(range(10)]"],
                ["假定 1 + 2 = 4, 试求 7 + 8"],
                ["Erkläre die Handlung von Cinderella in einem Satz."],
                ["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
            ],
            inputs=[msg],
            examples_per_page=60,
        )
    block.load(get_time, inputs=[], outputs=dt, every=1)
block.queue().launch()