Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import gradio as gr | |
import spaces | |
import torch | |
import logging | |
import time | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from transformers.utils import logging as hf_logging | |
logging.basicConfig( | |
filename="/tmp/app.log", | |
level=logging.DEBUG, | |
format="%(asctime)s %(levelname)s: %(message)s" | |
) | |
logging.info("Starting app.py logging") | |
hf_logging.set_verbosity_debug() | |
hf_logging.set_verbosity_info() | |
hf_logging.enable_default_handler() | |
hf_logging.enable_explicit_format() | |
hf_logging.add_handler(logging.FileHandler("/tmp/transformers.log")) | |
model_id = "futurehouse/ether0" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.float16 | |
) | |
def chat_fn(prompt, max_tokens=512): | |
t0 = time.time() | |
max_tokens = min(int(max_tokens), 32_000) | |
try: | |
messages = [{"role": "user", "content": prompt}] | |
chat_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device) | |
t1 = time.time() | |
logging.info(f"🧠 Tokenization complete in {t1 - t0:.2f}s") | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=max_tokens, | |
do_sample=True, | |
temperature=0.1, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
t2 = time.time() | |
logging.info(f"⚡️ Generation complete in {t2 - t1:.2f}s (max_tokens={max_tokens})") | |
generated_text = tokenizer.decode( | |
outputs[0][inputs['input_ids'].shape[1]:], | |
skip_special_tokens=True | |
) | |
t3 = time.time() | |
logging.info(f"🔓 Decoding complete in {t3 - t2:.2f}s (output length: {len(generated_text)})") | |
return generated_text | |
except Exception: | |
logging.exception("❌ Exception during generation") | |
return "⚠️ Generation failed" | |
gr.Interface( | |
fn=chat_fn, | |
inputs=[ | |
gr.Textbox(label="prompt"), | |
gr.Number(label="max_tokens", value=512, precision=0) | |
], | |
outputs="text", | |
title="Ether0" | |
).launch(ssr_mode=False) |