| | import os |
| | import shutil |
| | import torch |
| | from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig |
| |
|
| | |
| | INPUT_PATH = "/home/nashen/deepseek-ocr/DeepSeek-OCR-master/DeepSeek-OCR-vllm/model/" |
| | OUTPUT_PATH = "./DeepSeek-OCR-4bit-Quantized" |
| |
|
| | |
| | print("⏳ 正在加载并量化模型 (这可能需要几分钟)...") |
| |
|
| | quantization_config = BitsAndBytesConfig( |
| | load_in_4bit=True, |
| | bnb_4bit_quant_type="nf4", |
| | bnb_4bit_use_double_quant=True, |
| | bnb_4bit_compute_dtype=torch.bfloat16, |
| | |
| | |
| | llm_int8_skip_modules=[ |
| | "sam_model", "model.sam_model", |
| | "vision_model", "model.vision_model", |
| | "projector", "model.projector", |
| | "lm_head", "embed_tokens" |
| | ] |
| | ) |
| |
|
| | |
| | model = AutoModel.from_pretrained( |
| | INPUT_PATH, |
| | trust_remote_code=True, |
| | quantization_config=quantization_config, |
| | device_map="auto" |
| | ) |
| | tokenizer = AutoTokenizer.from_pretrained(INPUT_PATH, trust_remote_code=True) |
| |
|
| | print("✅ 模型加载完成,准备保存...") |
| |
|
| | |
| | |
| | model.save_pretrained(OUTPUT_PATH, safe_serialization=True) |
| | tokenizer.save_pretrained(OUTPUT_PATH) |
| |
|
| | print(f"✅ 权重已保存至: {OUTPUT_PATH}") |
| |
|
| | |
| | |
| | |
| |
|
| | print("📦 正在复制 Python 架构文件...") |
| | files_to_copy = [ |
| | "configuration_deepseek_v2.py", |
| | "modeling_deepseekv2.py", |
| | "modeling_deepseekocr.py", |
| | "deepencoder.py", |
| | |
| | "processor_config.json", |
| | "config.json", |
| | "special_tokens_map.json", |
| | "tokenizer_config.json", |
| | "tokenizer.json" |
| | ] |
| |
|
| | |
| | for filename in os.listdir(INPUT_PATH): |
| | if filename.endswith(".py") or filename.endswith(".json"): |
| | src = os.path.join(INPUT_PATH, filename) |
| | dst = os.path.join(OUTPUT_PATH, filename) |
| | |
| | |
| | if os.path.exists(dst) and "config" in filename: |
| | continue |
| | |
| | shutil.copy2(src, dst) |
| |
|
| | print(f"🎉 打包完成!成品位于: {OUTPUT_PATH}") |
| | print(f" 该文件夹现在的体积应该是 ~2.7 GB 左右。") |
| |
|