Spaces:
Runtime error
Runtime error
| import modal | |
| app = modal.App("ask-finetuned-model") | |
| vol_checkpoints = modal.Volume.from_name("model-checkpoints") | |
| image = modal.Image.from_registry("nvidia/cuda:12.1.1-devel-ubuntu22.04", add_python="3.10") \ | |
| .apt_install("git") \ | |
| .run_commands( | |
| "pip install --upgrade pip", | |
| "pip install packaging ninja psutil", | |
| "pip install unsloth_zoo", | |
| "pip install torchvision", | |
| "pip install xformers trl peft accelerate bitsandbytes scipy huggingface_hub protobuf sentencepiece einops", | |
| "pip install --no-deps 'unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git'" | |
| ) \ | |
| .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) | |
| def ask_question(question: str, context: str = "Context: Japan Census data."): | |
| from unsloth import FastLanguageModel | |
| print(f"Loading model...") | |
| model_path = "/data/checkpoints/phi3-census-lora" | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=model_path, | |
| max_seq_length=2048, | |
| dtype=None, | |
| load_in_4bit=True, | |
| ) | |
| FastLanguageModel.for_inference(model) | |
| alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
| ### Instruction: | |
| {} | |
| ### Input: | |
| {} | |
| ### Response: | |
| """ | |
| inputs = tokenizer( | |
| [alpaca_prompt.format(question, context)], | |
| return_tensors="pt" | |
| ).to("cuda") | |
| outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True, temperature=0.1) | |
| response = tokenizer.batch_decode(outputs)[0] | |
| # Extract answer | |
| if "### Response:\n" in response: | |
| answer = response.split("### Response:\n")[1].split(" | |