import torch from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig import gradio as gr import warnings import os # Remove command-line arguments parsing and use hardcoded defaults for simplicity in Spaces MODEL_PATH = "/model/13B_hf" LORA_PATH = "checkpoint-3000" USE_TYPEWRITER = 1 USE_LOCAL = 1 tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH) LOAD_8BIT = True BASE_MODEL = MODEL_PATH LORA_WEIGHTS = LORA_PATH lora_bin_path = os.path.join(LORA_PATH, "adapter_model.bin") if not os.path.exists(lora_bin_path) and USE_LOCAL: # ... [rest of the path fixing logic] # ... [rest of the device and model loading logic] def generate_prompt(instruction, input=None): # ... [rest of the generate_prompt function] def evaluate( input, temperature=0.1, top_p=0.75, top_k=40, num_beams=4, max_new_tokens=128, min_new_tokens=1, repetition_penalty=2.0, **kwargs, ): # ... [rest of the evaluate function] gr.Interface( fn=evaluate, inputs=[ gr.components.Textbox(lines=2, label="Input", placeholder="Tell me about alpacas."), # ... [rest of the inputs] ], outputs=[ gr.inputs.Textbox(lines=25, label="Output"), ], title="Chinese-Vicuna 中文小羊驼", description="Chatlaw app trained on HK law data", ).launch()