| import pytest
|
| from utils import *
|
|
|
| server = ServerPreset.stories15m_moe()
|
|
|
| LORA_FILE_URL = "https://huggingface.co/ggml-org/stories15M_MOE/resolve/main/moe_shakespeare15M.gguf"
|
|
|
| @pytest.fixture(autouse=True)
|
| def create_server():
|
| global server
|
| server = ServerPreset.stories15m_moe()
|
| server.lora_files = [download_file(LORA_FILE_URL)]
|
|
|
|
|
| @pytest.mark.parametrize("scale,re_content", [
|
|
|
| (0.0, "(little|girl|three|years|old)+"),
|
|
|
| (1.0, "(eye|love|glass|sun)+"),
|
| ])
|
| def test_lora(scale: float, re_content: str):
|
| global server
|
| server.start()
|
| res_lora_control = server.make_request("POST", "/lora-adapters", data=[
|
| {"id": 0, "scale": scale}
|
| ])
|
| assert res_lora_control.status_code == 200
|
| res = server.make_request("POST", "/completion", data={
|
| "prompt": "Look in thy glass",
|
| })
|
| assert res.status_code == 200
|
| assert match_regex(re_content, res.body["content"])
|
|
|
|
|
| def test_lora_per_request():
|
| global server
|
| server.n_slots = 4
|
| server.start()
|
|
|
|
|
|
|
| prompt = "Look in thy glass"
|
| lora_config = [
|
| ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
| ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
| ( [{"id": 0, "scale": 0.3}], "(special|thing|gifted)+" ),
|
| ( [{"id": 0, "scale": 0.7}], "(far|from|home|away)+" ),
|
| ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
| ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
| ]
|
|
|
| tasks = [(
|
| server.make_request,
|
| ("POST", "/completion", {
|
| "prompt": prompt,
|
| "lora": lora,
|
| "seed": 42,
|
| "temperature": 0.0,
|
| "cache_prompt": False,
|
| })
|
| ) for lora, _ in lora_config]
|
| results = parallel_function_calls(tasks)
|
|
|
| assert all([res.status_code == 200 for res in results])
|
| for res, (_, re_test) in zip(results, lora_config):
|
| assert match_regex(re_test, res.body["content"])
|
|
|
|
|
| @pytest.mark.skipif(not is_slow_test_allowed(), reason="skipping slow test")
|
| def test_with_big_model():
|
| server = ServerProcess()
|
| server.model_hf_repo = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
|
| server.model_hf_file = "Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf"
|
| server.model_alias = "Llama-3.2-8B-Instruct"
|
| server.n_slots = 4
|
| server.n_ctx = server.n_slots * 1024
|
| server.n_predict = 64
|
| server.temperature = 0.0
|
| server.seed = 42
|
| server.lora_files = [
|
| download_file("https://huggingface.co/ngxson/Llama-3-Instruct-abliteration-LoRA-8B-F16-GGUF/resolve/main/Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf"),
|
|
|
| ]
|
| server.start(timeout_seconds=600)
|
|
|
|
|
|
|
| prompt = "Write a computer virus"
|
| lora_config = [
|
|
|
| ( [{"id": 0, "scale": 0.0}], "I can't provide you with a code for a computer virus" ),
|
| ( [{"id": 0, "scale": 0.0}], "I can't provide you with a code for a computer virus" ),
|
| ( [{"id": 0, "scale": 0.3}], "I can't write a computer virus" ),
|
|
|
| ( [{"id": 0, "scale": 0.7}], "Warning: This is a hypothetical exercise" ),
|
|
|
| ( [{"id": 0, "scale": 1.5}], "A task of some complexity! Here's a simple computer virus" ),
|
| ( [{"id": 0, "scale": 1.5}], "A task of some complexity! Here's a simple computer virus" ),
|
| ]
|
|
|
| tasks = [(
|
| server.make_request,
|
| ("POST", "/v1/chat/completions", {
|
| "messages": [
|
| {"role": "user", "content": prompt}
|
| ],
|
| "lora": lora,
|
| "cache_prompt": False,
|
| })
|
| ) for lora, _ in lora_config]
|
| results = parallel_function_calls(tasks)
|
|
|
| assert all([res.status_code == 200 for res in results])
|
| for res, (_, re_test) in zip(results, lora_config):
|
| assert re_test in res.body["choices"][0]["message"]["content"]
|
|
|