Spaces:

MicroHealth
/

autodata-visualizer

Paused

File size: 3,094 Bytes

4fc79a4
1ab6fac
 
 
4fc79a4
1ab6fac
 
 
 
 
 
f7d95ea
1ab6fac
 
 
 
 
a888f55
1ab6fac
a888f55
1ab6fac
5be932a
1ab6fac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5be932a
1ab6fac
 
a888f55
1ab6fac
 
6cff8d5
 
1ab6fac
 
 
 
 
 
 
 
6cff8d5
1ab6fac
 
6cff8d5
1ab6fac
 
 
 
6cff8d5
 
1ab6fac

import gradio as gr
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer, AutoConfig
from huggingface_hub import HfApi, login

def quantize_model(
    model_id: str,
    hf_token: str,
    repo_name: str,
    progress=gr.Progress(track_tqdm=True)
):
    try:
        # Validate credentials first
        login(token=hf_token, add_to_git_credential=True)
        api = HfApi(token=hf_token)
        
        # Check model accessibility
        try:
            api.model_info(model_id)
        except Exception as e:
            raise ValueError(f"Model access error: {str(e)}. Check:\n1. Token permissions\n2. Model existence\n3. Accept model terms at https://huggingface.co/{model_id}")

        # Load config with proper auth
        config = AutoConfig.from_pretrained(
            model_id,
            token=hf_token,
            trust_remote_code=True
        )
        
        # Handle Llama 3 rope_scaling
        if hasattr(config, 'rope_scaling') and isinstance(config.rope_scaling, dict):
            config.rope_scaling = {
                "type": config.rope_scaling.get("rope_type", "linear"),
                "factor": config.rope_scaling.get("factor", 1.0)
            }
        
        # Load model with validated credentials
        model = AutoAWQForCausalLM.from_pretrained(
            model_id,
            config=config,
            token=hf_token,
            trust_remote_code=True,
            device_map="auto"
        )
        
        # Load tokenizer with same credentials
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            token=hf_token,
            trust_remote_code=True
        )
        
        # Quantize with auto-detected settings
        model.quantize(tokenizer, quant_config={
            "zero_point": True,
            "q_group_size": 128,
            "w_bit": 4,
            "version": "GEMM" if "llama" in model_id.lower() else "GEMV"
        })
        
        # Save and push
        save_path = f"{model_id.split('/')[-1]}-awq"
        model.save_quantized(save_path)
        model.push_to_hub(repo_name, token=hf_token)
        
        return f"✅ Success!\nSaved: {save_path}\nPushed to: {repo_name}"
    
    except Exception as e:
        return f"❌ Critical Error:\n{str(e)}"

with gr.Blocks() as app:
    gr.Markdown("## 🔐 Secure AutoAWQ Quantizer")
    
    with gr.Row():
        model_id = gr.Textbox(label="Model ID", 
                            placeholder="meta-llama/Meta-Llama-3-8B-Instruct",
                            info="Must have access rights")
        hf_token = gr.Textbox(label="HF Token", 
                            type="password",
                            info="Required for gated models")
        repo_name = gr.Textbox(label="Destination Repo",
                             info="Format: username/repo-name")
    
    go_btn = gr.Button("Start Quantization", variant="primary")
    output = gr.Markdown()
    
    go_btn.click(
        quantize_model,
        inputs=[model_id, hf_token, repo_name],
        outputs=output
    )

app.launch()