Makhinur commited on
Commit
a0a8457
·
verified ·
1 Parent(s): 14839a7

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +8 -5
main.py CHANGED
@@ -38,14 +38,16 @@ app = FastAPI()
38
 
39
  # --- Llama.cpp Language Model Setup (Local CPU Inference) ---
40
  # Repository on Hugging Face Hub containing the Qwen1.5 1.8B GGUF file
41
- # Using TheBloke's repo as a common source for GGUF models.
42
- LLM_MODEL_REPO = "TheBloke/Qwen1.5-1.8B-Chat-GGUF"
 
43
  # Specify the filename for a Q4_K_M quantized version (good balance of speed/quality on CPU)
44
- # CORRECTED FILENAME based on verified file listing on the Hub:
45
- # Changed hyphen before Q4_K_M to a DOT.
46
- LLM_MODEL_FILE = "qwen1_5-1.8b-chat.Q4_K_M.gguf" # This should now be correct.
47
 
48
  # Original model name for the tokenizer (needed by transformers)
 
49
  ORIGINAL_MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
50
 
51
  tokenizer = None # Using transformers tokenizer for chat templating
@@ -84,6 +86,7 @@ def load_language_model():
84
 
85
  # --- Download GGUF model file (using huggingface_hub) ---
86
  print(f"Downloading GGUF model file: {LLM_MODEL_FILE} from {LLM_MODEL_REPO}...")
 
87
  model_path = hf_hub_download(
88
  repo_id=LLM_MODEL_REPO,
89
  filename=LLM_MODEL_FILE,
 
38
 
39
  # --- Llama.cpp Language Model Setup (Local CPU Inference) ---
40
  # Repository on Hugging Face Hub containing the Qwen1.5 1.8B GGUF file
41
+ # Using the OFFICIAL Qwen repository:
42
+ LLM_MODEL_REPO = "Qwen/Qwen1.5-1.8B-Chat-GGUF" # Updated to official repo
43
+
44
  # Specify the filename for a Q4_K_M quantized version (good balance of speed/quality on CPU)
45
+ # Based on DIRECT VERIFICATION of files in the OFFICIAL Qwen repo:
46
+ # Changed DOT before Q4_K_M back to a HYPHEN to match the official repo's filename.
47
+ LLM_MODEL_FILE = "qwen1_5-1.8b-chat-Q4_K_M.gguf" # Correct filename for the OFFICIAL repo
48
 
49
  # Original model name for the tokenizer (needed by transformers)
50
+ # This remains the same as it points to the base model repository for the tokenizer files.
51
  ORIGINAL_MODEL_NAME = "Qwen/Qwen1.5-1.8B-Chat"
52
 
53
  tokenizer = None # Using transformers tokenizer for chat templating
 
86
 
87
  # --- Download GGUF model file (using huggingface_hub) ---
88
  print(f"Downloading GGUF model file: {LLM_MODEL_FILE} from {LLM_MODEL_REPO}...")
89
+ # hf_hub_download downloads the file to the Hugging Face cache directory
90
  model_path = hf_hub_download(
91
  repo_id=LLM_MODEL_REPO,
92
  filename=LLM_MODEL_FILE,