woq-inference / app.py
fbaldassarri's picture
Upload app.py
14def3d verified
import os
import gradio as gr
from huggingface_hub import list_models, list_repo_files, hf_hub_download
import subprocess
HF_USER = "fbaldassarri"
TEQ_KEYWORD = "TEQ"
def list_teq_models():
models = list_models(author=HF_USER)
return [model.modelId for model in models if TEQ_KEYWORD in model.modelId]
def list_model_files(model_id):
files = list_repo_files(model_id)
weights = [f for f in files if f.endswith('.pt')]
configs = [f for f in files if f.endswith('.json')]
return weights, configs
def run_teq_inference(model_id, weights_file, config_file, base_model, prompt, max_new_tokens, debug):
if not weights_file or not config_file:
return "Please select both a weights file and a config file."
local_model_dir = f"./models/{model_id.replace('/', '_')}"
os.makedirs(local_model_dir, exist_ok=True)
hf_hub_download(model_id, weights_file, local_dir=local_model_dir)
hf_hub_download(model_id, config_file, local_dir=local_model_dir)
cmd = [
"python", "teq_inference.py",
"--model_dir", local_model_dir,
"--weights_file", weights_file,
"--config_file", config_file,
"--base_model", base_model,
"--prompt", prompt,
"--max_new_tokens", str(max_new_tokens),
"--device", "cpu"
]
if debug:
cmd.append("--debug")
result = subprocess.run(cmd, capture_output=True, text=True)
output = result.stdout + "\n" + result.stderr
marker = "Generated text:"
if marker in output:
return output.split(marker)[-1].strip()
return output
def update_files(model_id):
weights, configs = list_model_files(model_id)
# Set value to first item if available, else None
weights_val = weights[0] if weights else None
configs_val = configs[0] if configs else None
return gr.Dropdown(choices=weights, value=weights_val, label="Weights File (.pt)", interactive=True), \
gr.Dropdown(choices=configs, value=configs_val, label="Config File (.json)", interactive=True)
def build_ui():
teq_models = list_teq_models()
with gr.Blocks() as demo:
gr.Markdown("# TEQ Quantized Model Inference Demo")
model_id = gr.Dropdown(teq_models, label="Select TEQ Model", interactive=True)
weights_file = gr.Dropdown(choices=[], label="Weights File (.pt)", interactive=True)
config_file = gr.Dropdown(choices=[], label="Config File (.json)", interactive=True)
base_model = gr.Textbox(label="Base Model Name", value="facebook/opt-350m")
prompt = gr.Textbox(label="Prompt", value="Once upon a time, a little girl")
max_new_tokens = gr.Slider(10, 512, value=100, label="Max New Tokens")
debug = gr.Checkbox(label="Debug Mode")
output = gr.Textbox(label="Generated Text", lines=10)
run_btn = gr.Button("Run Inference")
# Dynamically update the dropdowns for weights and config
model_id.change(
update_files,
inputs=model_id,
outputs=[weights_file, config_file]
)
run_btn.click(
run_teq_inference,
inputs=[model_id, weights_file, config_file, base_model, prompt, max_new_tokens, debug],
outputs=output
)
return demo
if __name__ == "__main__":
build_ui().launch()