Spaces:

fbaldassarri
/

woq-inference

Sleeping

App Files Files Community

woq-inference / app.py

fbaldassarri

Upload app.py

14def3d verified 2 months ago

raw

history blame contribute delete

3.32 kB

	import os
	import gradio as gr
	from huggingface_hub import list_models, list_repo_files, hf_hub_download
	import subprocess

	HF_USER = "fbaldassarri"
	TEQ_KEYWORD = "TEQ"

	def list_teq_models():
	models = list_models(author=HF_USER)
	return [model.modelId for model in models if TEQ_KEYWORD in model.modelId]

	def list_model_files(model_id):
	files = list_repo_files(model_id)
	weights = [f for f in files if f.endswith('.pt')]
	configs = [f for f in files if f.endswith('.json')]
	return weights, configs

	def run_teq_inference(model_id, weights_file, config_file, base_model, prompt, max_new_tokens, debug):
	if not weights_file or not config_file:
	return "Please select both a weights file and a config file."
	local_model_dir = f"./models/{model_id.replace('/', '_')}"
	os.makedirs(local_model_dir, exist_ok=True)
	hf_hub_download(model_id, weights_file, local_dir=local_model_dir)
	hf_hub_download(model_id, config_file, local_dir=local_model_dir)
	cmd = [
	"python", "teq_inference.py",
	"--model_dir", local_model_dir,
	"--weights_file", weights_file,
	"--config_file", config_file,
	"--base_model", base_model,
	"--prompt", prompt,
	"--max_new_tokens", str(max_new_tokens),
	"--device", "cpu"
	]
	if debug:
	cmd.append("--debug")
	result = subprocess.run(cmd, capture_output=True, text=True)
	output = result.stdout + "\n" + result.stderr
	marker = "Generated text:"
	if marker in output:
	return output.split(marker)[-1].strip()
	return output

	def update_files(model_id):
	weights, configs = list_model_files(model_id)
	# Set value to first item if available, else None
	weights_val = weights[0] if weights else None
	configs_val = configs[0] if configs else None
	return gr.Dropdown(choices=weights, value=weights_val, label="Weights File (.pt)", interactive=True), \
	gr.Dropdown(choices=configs, value=configs_val, label="Config File (.json)", interactive=True)

	def build_ui():
	teq_models = list_teq_models()
	with gr.Blocks() as demo:
	gr.Markdown("# TEQ Quantized Model Inference Demo")
	model_id = gr.Dropdown(teq_models, label="Select TEQ Model", interactive=True)
	weights_file = gr.Dropdown(choices=[], label="Weights File (.pt)", interactive=True)
	config_file = gr.Dropdown(choices=[], label="Config File (.json)", interactive=True)
	base_model = gr.Textbox(label="Base Model Name", value="facebook/opt-350m")
	prompt = gr.Textbox(label="Prompt", value="Once upon a time, a little girl")
	max_new_tokens = gr.Slider(10, 512, value=100, label="Max New Tokens")
	debug = gr.Checkbox(label="Debug Mode")
	output = gr.Textbox(label="Generated Text", lines=10)
	run_btn = gr.Button("Run Inference")

	# Dynamically update the dropdowns for weights and config
	model_id.change(
	update_files,
	inputs=model_id,
	outputs=[weights_file, config_file]
	)
	run_btn.click(
	run_teq_inference,
	inputs=[model_id, weights_file, config_file, base_model, prompt, max_new_tokens, debug],
	outputs=output
	)
	return demo

	if __name__ == "__main__":
	build_ui().launch()