Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import os | |
| from transformers import AutoProcessor, Qwen2VLForConditionalGeneration | |
| from peft import PeftModel | |
| from huggingface_hub import HfApi | |
| import shutil | |
| import gc | |
| # Configuration - UPDATE THESE | |
| LORA_REPO = "your-username/qwen25vl-lora-adapter" # Your LoRA repo | |
| OUTPUT_REPO = "your-username/qwen25vl-invoice-merged" # Output repo | |
| BASE_MODEL = "unsloth/Qwen2.5-VL-7B-Instruct" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") # Set in Space secrets | |
| def merge_model(): | |
| """Merge LoRA with base model and upload to Hub""" | |
| try: | |
| # Use Space's disk efficiently | |
| work_dir = "/tmp/merge" | |
| if os.path.exists(work_dir): | |
| shutil.rmtree(work_dir) | |
| os.makedirs(work_dir) | |
| # Update status | |
| yield "Loading base model..." | |
| # Load model with CPU offload to save GPU memory | |
| model = Qwen2VLForConditionalGeneration.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| cache_dir=work_dir, | |
| low_cpu_mem_usage=True | |
| ) | |
| yield "Loading processor..." | |
| processor = AutoProcessor.from_pretrained( | |
| BASE_MODEL, | |
| trust_remote_code=True, | |
| cache_dir=work_dir | |
| ) | |
| yield "Loading LoRA adapter from Hub..." | |
| model = PeftModel.from_pretrained(model, LORA_REPO) | |
| yield "Merging weights... This may take a few minutes..." | |
| model = model.merge_and_unload() | |
| # Clear GPU cache | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| yield "Saving merged model..." | |
| output_dir = os.path.join(work_dir, "merged") | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Save with smaller shards | |
| model.save_pretrained( | |
| output_dir, | |
| max_shard_size="2GB", | |
| safe_serialization=True | |
| ) | |
| processor.save_pretrained(output_dir) | |
| yield "Uploading to HuggingFace Hub..." | |
| api = HfApi(token=HF_TOKEN) | |
| # Create output repo | |
| api.create_repo(OUTPUT_REPO, exist_ok=True, private=True) | |
| # Upload the merged model | |
| api.upload_folder( | |
| folder_path=output_dir, | |
| repo_id=OUTPUT_REPO, | |
| repo_type="model", | |
| commit_message="Merged LoRA adapter with base model" | |
| ) | |
| # Cleanup | |
| shutil.rmtree(work_dir) | |
| yield f"β Success! Model merged and uploaded to: {OUTPUT_REPO}" | |
| except Exception as e: | |
| yield f"β Error: {str(e)}" | |
| # Cleanup on error | |
| if os.path.exists("/tmp/merge"): | |
| shutil.rmtree("/tmp/merge") | |
| # Create Gradio interface | |
| def create_interface(): | |
| with gr.Blocks(title="Qwen2.5-VL LoRA Merger") as demo: | |
| gr.Markdown( | |
| """ | |
| # Qwen2.5-VL LoRA Merger | |
| This Space will merge your LoRA adapter with the base model and upload to HuggingFace. | |
| **Configuration:** | |
| - Base Model: `{}` | |
| - LoRA Adapter: `{}` | |
| - Output Repo: `{}` | |
| """.format(BASE_MODEL, LORA_REPO, OUTPUT_REPO) | |
| ) | |
| status = gr.Textbox(label="Status", lines=10) | |
| merge_btn = gr.Button("Start Merge", variant="primary") | |
| merge_btn.click( | |
| fn=merge_model, | |
| inputs=[], | |
| outputs=[status] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch() |