fantecchi / app.py
Yesh05's picture
Initial commit with LFS support for images
9a8d870
import os
import sys
# Add the interface directory to the path so we can import app_logic
sys.path.append(os.path.join(os.path.dirname(__file__), "interface"))
# Adjust working directory to the root of the project to ensure relative paths work
# Or we can just import and run.
# The interface/app.py has paths like "../training/...".
# If we run from root, those will fail.
# Better: Let's create a root-compatible version of the app logic.
# Actually, I will just create a simple script that launches the Gradio app.
import gradio as gr
import json
import glob
import re
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
# Paths adjusted for root execution
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
ADAPTER_PATH = "core/training_pipeline/trainers/outputs/fantecchi-nsfw-bot"
CHATBOT_PROFILES = "chatbots/profiles_json/*.json"
# [Rest of the logic from interface/app.py but with updated paths]
# I will copy the core logic here to ensure it works on Spaces.
# Load Chatbot Profiles
profiles = {}
for filepath in glob.glob(CHATBOT_PROFILES):
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
profiles[data['name']] = data
print(f"Loaded {len(profiles)} chatbot profiles.")
model = None
tokenizer = None
def load_model():
global model, tokenizer
if model is not None:
return "Model already loaded."
print("Loading tokenizer and model on CPU (bfloat16)...")
try:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.bfloat16,
device_map="auto" # Use auto for Spaces (it might have T4)
)
if os.path.exists(ADAPTER_PATH):
from peft import PeftModel
model = PeftModel.from_pretrained(model, ADAPTER_PATH)
print("Successfully loaded custom LoRA adapter.")
else:
print(f"WARNING: Adapter path {ADAPTER_PATH} not found. Running base model.")
return "Model loaded successfully!"
except Exception as e:
return f"Error loading model: {str(e)}"
FORMATTING_RULE = (
"\n\n[CRITICAL INSTRUCTION: You are controlling multiple NPCs. "
"Every single paragraph or line of dialogue/action MUST begin with the specific character's name followed by a colon.]"
)
def parse_multi_character_output(generated_text):
parsed_messages = []
pattern = re.compile(r'(?m)^([A-Za-z0-9\'\- ]+):\s*(.*?)(?=(?:^[A-Za-z0-9\'\- ]+:)|\Z)', re.DOTALL)
matches = pattern.findall(generated_text.strip())
if not matches:
parsed_messages.append({"role": "assistant", "content": generated_text.strip(), "metadata": {"title": "Narrator"}})
return parsed_messages
for match in matches:
name = match[0].strip()
text = match[1].strip()
if text:
parsed_messages.append({"role": "assistant", "content": text, "metadata": {"title": name}})
return parsed_messages
def generate_response(message, history, profile_name, temp, top_p, max_tokens):
if model is None or tokenizer is None:
history.append({"role": "assistant", "content": "Please load the model first.", "metadata": {"title": "System"}})
yield history, 0
return
profile = profiles.get(profile_name)
if not profile:
history.append({"role": "assistant", "content": "Error: Profile not found.", "metadata": {"title": "System"}})
yield history, 0
return
scenario = profile.get("scenario", "")
chars = profile.get("characters", [])
char_desc = "\n".join([f"{c.get('name', 'NPC')}: {c.get('behavior', '')} {c.get('appearance', '')}" for c in chars])
sys_prompt = f"Scenario: {scenario}\nCharacters:\n{char_desc}\n" + FORMATTING_RULE
chatml_messages = [{"role": "system", "content": str(sys_prompt)}]
for msg in history[-10:]:
content = str(msg["content"])
if msg["role"] == "user":
chatml_messages.append({"role": "user", "content": content})
else:
name = msg.get("metadata", {}).get("title", "Narrator")
chatml_messages.append({"role": "assistant", "content": f"{name}: {content}"})
chatml_messages.append({"role": "user", "content": str(message)})
prompt = tokenizer.apply_chat_template(chatml_messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
token_count = inputs.input_ids.shape[1]
streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(**inputs, max_new_tokens=int(max_tokens), temperature=float(temp), top_p=float(top_p), do_sample=True if float(temp) > 0 else False, pad_token_id=tokenizer.pad_token_id, streamer=streamer)
thread = Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
full_response = ""
history.append({"role": "assistant", "content": "...", "metadata": {"title": "Generating..."}})
for new_text in streamer:
full_response += new_text
history[-1]["content"] = full_response
yield history, token_count
parsed = parse_multi_character_output(full_response)
if parsed:
history.pop()
for p in parsed:
history.append(p)
yield history, token_count
with gr.Blocks() as demo:
gr.Markdown("# Fantecchi Hugging Face Interface")
with gr.Row():
with gr.Column(scale=1):
profile_dropdown = gr.Dropdown(choices=list(profiles.keys()), label="Select Scenario", value=list(profiles.keys())[0] if profiles else None)
load_btn = gr.Button("Load Model", variant="primary")
load_status = gr.Textbox(label="Status", interactive=False)
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=600, type="messages")
msg_input = gr.Textbox(placeholder="Type your response...")
def user_submit(user_text, history):
return "", history + [{"role": "user", "content": user_text}]
msg_input.submit(user_submit, [msg_input, chatbot], [msg_input, chatbot]).then(generate_response, [msg_input, chatbot, profile_dropdown, gr.State(0.8), gr.State(0.9), gr.State(256)], [chatbot])
load_btn.click(load_model, outputs=load_status)
if __name__ == "__main__":
demo.launch()