Imsg

Paused

Makhinur commited on May 1

Commit

a0a8457

verified ·

1 Parent(s): 14839a7

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -38,14 +38,16 @@ app = FastAPI()
 # --- Llama.cpp Language Model Setup (Local CPU Inference) ---
 # Repository on Hugging Face Hub containing the Qwen1.5 1.8B GGUF file
-# Using TheBloke's repo as a common source for GGUF models.
-LLM_MODEL_REPO = "TheBloke/Qwen1.5-1.8B-Chat-GGUF"
 # Specify the filename for a Q4_K_M quantized version (good balance of speed/quality on CPU)
-# CORRECTED FILENAME based on verified file listing on the Hub:
-# Changed hyphen before Q4_K_M to a DOT.
-LLM_MODEL_FILE = "qwen1_5-1.8b-chat.Q4_K_M.gguf" # This should now be correct.
 # Original model name for the tokenizer (needed by transformers)
 ORIGINAL_MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
 tokenizer = None # Using transformers tokenizer for chat templating
@@ -84,6 +86,7 @@ def load_language_model():
         # --- Download GGUF model file (using huggingface_hub) ---
         print(f"Downloading GGUF model file: {LLM_MODEL_FILE} from {LLM_MODEL_REPO}...")
         model_path = hf_hub_download(
             repo_id=LLM_MODEL_REPO,
             filename=LLM_MODEL_FILE,

 # --- Llama.cpp Language Model Setup (Local CPU Inference) ---
 # Repository on Hugging Face Hub containing the Qwen1.5 1.8B GGUF file
+# Using the OFFICIAL Qwen repository:
+LLM_MODEL_REPO = "Qwen/Qwen1.5-1.8B-Chat-GGUF" # Updated to official repo
 # Specify the filename for a Q4_K_M quantized version (good balance of speed/quality on CPU)
+# Based on DIRECT VERIFICATION of files in the OFFICIAL Qwen repo:
+# Changed DOT before Q4_K_M back to a HYPHEN to match the official repo's filename.
+LLM_MODEL_FILE = "qwen1_5-1.8b-chat-Q4_K_M.gguf" # Correct filename for the OFFICIAL repo
 # Original model name for the tokenizer (needed by transformers)
+# This remains the same as it points to the base model repository for the tokenizer files.
 ORIGINAL_MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
 tokenizer = None # Using transformers tokenizer for chat templating
         # --- Download GGUF model file (using huggingface_hub) ---
         print(f"Downloading GGUF model file: {LLM_MODEL_FILE} from {LLM_MODEL_REPO}...")
+        # hf_hub_download downloads the file to the Hugging Face cache directory
         model_path = hf_hub_download(
             repo_id=LLM_MODEL_REPO,
             filename=LLM_MODEL_FILE,