TheBloke / CodeLlama-70B-Instruct-GPTQ outputs only \n

#1
by victors2709 - opened

Hello.
I tried running TheBloke / CodeLlama-70B-Instruct-GPTQ:gptq-4bit-128g-actorder_True and it output only \n

# podman run ... ghcr.io/huggingface/text-generation-inference:latest --model-id TheBloke/CodeLlama-70B-Python-GPTQ --revision gptq-4bit-128g-actorder_True -p 3002 --num-shard 8 --max-input-length 13000 --max-total-tokens 13001 --max-batch-prefill-tokens 13000 --quantize gptq
# podman inspect ghcr.io/huggingface/text-generation-inference:latest  | head
[
     {
          "Id": "d019184cda3673c8e53b3e742dea1c12961d5597e0a9affe6e31cd2e88e90f54",
          "Digest": "sha256:85547a2874ef6540e9453fb0d6c041c3ecdb4285c04c63c085b89455fcd99aed",
          "RepoTags": [
               "ghcr.io/huggingface/text-generation-inference:latest"
          ],
          "RepoDigests": [
               "ghcr.io/huggingface/text-generation-inference@sha256:85547a2874ef6540e9453fb0d6c041c3ecdb4285c04c63c085b89455fcd99aed",
               "ghcr.io/huggingface/text-generation-inference@sha256:ddd1a0daabaef2f8947540be6877d8a9f1e6d872c8a8ac184083e082a8277044"
# podman run --rm ghcr.io/huggingface/text-generation-inference:latest  --version
text-generation-launcher 1.4.0
podman logs -f  codellama-tgi-CodeLlama-70b-Instruct-hf-gptq
{"timestamp":"2024-02-02T09:27:32.001345Z","level":"INFO","fields":{"message":"Args { model_id: \"TheBloke/CodeLlama-70B-Python-GPTQ\", revision: Some(\"gptq-4bit-128g-actorder_True\"), validation_workers: 2, sharded: None, num_shard: Some(8), quantize: Some(Gptq), speculate: None, dtype: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_top_n_tokens: 5, max_input_length: 13000, max_total_tokens: 13001, waiting_served_ratio: 1.2, max_batch_prefill_tokens: 13000, max_batch_total_tokens: None, max_waiting_tokens: 20, hostname: \"rtx4090-5.buh-dcl1.4psa.net\", port: 3002, shard_uds_path: \"/tmp/text-generation-server\", master_addr: \"localhost\", master_port: 29500, huggingface_hub_cache: Some(\"/data\"), weights_cache_override: None, disable_custom_kernels: false, cuda_memory_fraction: 1.0, rope_scaling: None, rope_factor: None, json_output: true, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, ngrok: false, ngrok_authtoken: None, ngrok_edge: None, tokenizer_config_path: None, env: false }"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:32.001440Z","level":"INFO","fields":{"message":"Sharding model on 8 processes"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:32.001709Z","level":"INFO","fields":{"message":"Starting download process."},"target":"text_generation_launcher","span":{"name":"download"},"spans":[{"name":"download"}]}
{"timestamp":"2024-02-02T09:27:36.029648Z","level":"INFO","fields":{"message":"Files are already present on the host. Skipping download.\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:36.707345Z","level":"INFO","fields":{"message":"Successfully downloaded weights."},"target":"text_generation_launcher","span":{"name":"download"},"spans":[{"name":"download"}]}
{"timestamp":"2024-02-02T09:27:36.707829Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":0,"name":"shard-manager"},"spans":[{"rank":0,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.707841Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":1,"name":"shard-manager"},"spans":[{"rank":1,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.707955Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":2,"name":"shard-manager"},"spans":[{"rank":2,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.708033Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":3,"name":"shard-manager"},"spans":[{"rank":3,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.708611Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":5,"name":"shard-manager"},"spans":[{"rank":5,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.708659Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":4,"name":"shard-manager"},"spans":[{"rank":4,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.709708Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":7,"name":"shard-manager"},"spans":[{"rank":7,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:36.709796Z","level":"INFO","fields":{"message":"Starting shard"},"target":"text_generation_launcher","span":{"rank":6,"name":"shard-manager"},"spans":[{"rank":6,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:44.433636Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.507215Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.580880Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.638244Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.653295Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.658021Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.670857Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:44.680576Z","level":"WARN","fields":{"message":"Disabling exllama because desc_act=True\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:27:46.719346Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":2,"name":"shard-manager"},"spans":[{"rank":2,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.719895Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":1,"name":"shard-manager"},"spans":[{"rank":1,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.721199Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":7,"name":"shard-manager"},"spans":[{"rank":7,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.721285Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":3,"name":"shard-manager"},"spans":[{"rank":3,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.721340Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":5,"name":"shard-manager"},"spans":[{"rank":5,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.721596Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":0,"name":"shard-manager"},"spans":[{"rank":0,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.721625Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":4,"name":"shard-manager"},"spans":[{"rank":4,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:46.721649Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":6,"name":"shard-manager"},"spans":[{"rank":6,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.726537Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":1,"name":"shard-manager"},"spans":[{"rank":1,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.727412Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":7,"name":"shard-manager"},"spans":[{"rank":7,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.727758Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":0,"name":"shard-manager"},"spans":[{"rank":0,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.727758Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":4,"name":"shard-manager"},"spans":[{"rank":4,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.728723Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":2,"name":"shard-manager"},"spans":[{"rank":2,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.729712Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":6,"name":"shard-manager"},"spans":[{"rank":6,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.789470Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":5,"name":"shard-manager"},"spans":[{"rank":5,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:27:56.789537Z","level":"INFO","fields":{"message":"Waiting for shard to be ready..."},"target":"text_generation_launcher","span":{"rank":3,"name":"shard-manager"},"spans":[{"rank":3,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.122698Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-1\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.128606Z","level":"INFO","fields":{"message":"Shard ready in 23.416943941s"},"target":"text_generation_launcher","span":{"rank":1,"name":"shard-manager"},"spans":[{"rank":1,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.172115Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-7\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.173000Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-6\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.173202Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-5\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.192257Z","level":"INFO","fields":{"message":"Shard ready in 23.478435369s"},"target":"text_generation_launcher","span":{"rank":5,"name":"shard-manager"},"spans":[{"rank":5,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.192698Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-4\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.229492Z","level":"INFO","fields":{"message":"Shard ready in 23.515906614s"},"target":"text_generation_launcher","span":{"rank":7,"name":"shard-manager"},"spans":[{"rank":7,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.230141Z","level":"INFO","fields":{"message":"Shard ready in 23.51759965s"},"target":"text_generation_launcher","span":{"rank":4,"name":"shard-manager"},"spans":[{"rank":4,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.232377Z","level":"INFO","fields":{"message":"Shard ready in 23.518370109s"},"target":"text_generation_launcher","span":{"rank":6,"name":"shard-manager"},"spans":[{"rank":6,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.234184Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-3\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.244927Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-2\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.292331Z","level":"INFO","fields":{"message":"Shard ready in 23.578778002s"},"target":"text_generation_launcher","span":{"rank":3,"name":"shard-manager"},"spans":[{"rank":3,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.294750Z","level":"INFO","fields":{"message":"Server started at unix:///tmp/text-generation-server-0\n"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.330250Z","level":"INFO","fields":{"message":"Shard ready in 23.61680067s"},"target":"text_generation_launcher","span":{"rank":0,"name":"shard-manager"},"spans":[{"rank":0,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.331639Z","level":"INFO","fields":{"message":"Shard ready in 23.621140439s"},"target":"text_generation_launcher","span":{"rank":2,"name":"shard-manager"},"spans":[{"rank":2,"name":"shard-manager"}]}
{"timestamp":"2024-02-02T09:28:00.425507Z","level":"INFO","fields":{"message":"Starting Webserver"},"target":"text_generation_launcher"}
{"timestamp":"2024-02-02T09:28:00.515552Z","level":"INFO","message":"Using the Hugging Face API","target":"text_generation_router","filename":"router/src/main.rs","line_number":175}
{"timestamp":"2024-02-02T09:28:00.515657Z","level":"INFO","message":"Token file not found \"/root/.cache/huggingface/token\"","log.target":"hf_hub","log.module_path":"hf_hub","log.file":"/usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/hf-hub-0.3.2/src/lib.rs","log.line":55,"target":"hf_hub","filename":"/usr/local/cargo/registry/src/index.crates.io-6f17d22bba15001f/hf-hub-0.3.2/src/lib.rs","line_number":55}
{"timestamp":"2024-02-02T09:28:00.766173Z","level":"INFO","message":"Serving revision 5dff7f40fb6223ef3883b6d5d190ed2b7de05480 of model TheBloke/CodeLlama-70B-Python-GPTQ","target":"text_generation_router","filename":"router/src/main.rs","line_number":425}
{"timestamp":"2024-02-02T09:28:00.766202Z","level":"INFO","message":"Using the Hugging Face API to retrieve tokenizer config","target":"text_generation_router","filename":"router/src/main.rs","line_number":236}
{"timestamp":"2024-02-02T09:28:00.779551Z","level":"INFO","message":"Warming up model","target":"text_generation_router","filename":"router/src/main.rs","line_number":285}
{"timestamp":"2024-02-02T09:29:19.246058Z","level":"INFO","message":"Setting max batch total tokens to 401504","target":"text_generation_router","filename":"router/src/main.rs","line_number":321}
{"timestamp":"2024-02-02T09:29:19.246216Z","level":"INFO","message":"Connected","target":"text_generation_router","filename":"router/src/main.rs","line_number":322}
{"timestamp":"2024-02-02T09:29:19.246224Z","level":"WARN","message":"Invalid hostname, defaulting to 0.0.0.0","target":"text_generation_router","filename":"router/src/main.rs","line_number":327}
{"timestamp":"2024-02-02T09:33:24.087569Z","level":"INFO","message":"Success","target":"text_generation_router::server","filename":"router/src/server.rs","line_number":298,"span":{"inference_time":"13.098219255s","parameters":"GenerateParameters { best_of: Some(1), temperature: Some(0.09), repetition_penalty: None, top_k: None, top_p: None, typical_p: None, do_sample: false, max_new_tokens: Some(200), return_full_text: None, stop: [\"<step>\"], truncate: None, watermark: false, details: false, decoder_input_details: false, seed: None, top_n_tokens: None }","queue_time":"101.169µs","seed":"Some(10400089617208726180)","time_per_token":"65.491096ms","total_time":"13.098796567s","validation_time":"476.263µs","name":"generate"},"spans":[{"inference_time":"13.098219255s","parameters":"GenerateParameters { best_of: Some(1), temperature: Some(0.09), repetition_penalty: None, top_k: None, top_p: None, typical_p: None, do_sample: false, max_new_tokens: Some(200), return_full_text: None, stop: [\"<step>\"], truncate: None, watermark: false, details: false, decoder_input_details: false, seed: None, top_n_tokens: None }","queue_time":"101.169µs","seed":"Some(10400089617208726180)","time_per_token":"65.491096ms","total_time":"13.098796567s","validation_time":"476.263µs","name":"generate"}]}
curl http://localhost:3002/generate -H 'Content-Type: application/json' -d '{
"inputs": "<s>Source: system\n\n You are an helpful assistant <step> Source: user\n\n write a simple python module. <step> Source: assistant\nDestination: user\n\n ",
"parameters": {"temperature": 0.09, "max_new_tokens": 200, "best_of": 1, "stop": ["<step>"]}
}'

{"generated_text":"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\r\n"}

Sign up or log in to comment