Spaces:

sadovsky
/

MBTI

Sleeping

File size: 4,224 Bytes

56954f5
87b7e98
34fcc83
1c31761
87b7e98
34fcc83
1c31761
87b7e98
1c31761
 
 
 
87cd86c
9458365
1c31761
 
 
34fcc83
a5e0a96
1c31761
dc0b1bc
87b7e98
dc0b1bc
34fcc83
 
dc0b1bc
1c31761
a8bca78
87b7e98
1c31761
87b7e98
893dddd
 
 
 
 
 
 
 
 
 
 
 
34fcc83
893dddd
 
 
 
 
 
 
 
 
 
34fcc83
9458365
893dddd
87b7e98
34fcc83
87b7e98
34fcc83
1c31761
34fcc83
 
 
 
 
 
 
 
 
 
3c8cb2b
87b7e98
 
 
 
34fcc83
1c31761
34fcc83
 
 
 
 
 
 
893dddd
fe256a2
893dddd
34fcc83
 
1c31761

# core/interviewer.py
"""
🇬🇧 Interviewer logic module (no instructions)
Generates random MBTI-style questions using the fine-tuned model.

🇷🇺 Модуль интервьюера.
Использует fine-tuned модель для генерации вопросов без инструкций.
"""

import random
import re
import torch
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer

# --------------------------------------------------------------
# 1️⃣ Настройки модели
# --------------------------------------------------------------
QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen"

# ✅ Принудительно используем оригинальный SentencePiece-токенайзер
tokenizer = T5Tokenizer.from_pretrained(QG_MODEL, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
print(f"✅ Loaded interviewer model (slow tokenizer): {QG_MODEL}")
print(f"Device set to use {device}")

# --------------------------------------------------------------
# 2️⃣ Seed-промпты (без инструкций)
# --------------------------------------------------------------

# --------------------------------------------------------------
# 2️⃣ Тематические seed-промпты (по осям MBTI, но без прямого упоминания MBTI)
# --------------------------------------------------------------

BASE_INSTRUCTION = (
    "Generate one natural, open-ended question about human thinking, emotions, or decision-making. "
    "Avoid mentioning MBTI or personality types directly. "
    "Do not ask what type the person belongs to. "
    "You may include ideas related to intuition, logic, feelings, perception, or judgment naturally."
)

PROMPTS = [
    f"{BASE_INSTRUCTION} Explore how people usually recharge their energy and interact with others.",
    f"{BASE_INSTRUCTION} Explore the difference between noticing small details and seeing the bigger picture.",
    f"{BASE_INSTRUCTION} Ask about trusting intuition versus relying on concrete evidence in daily life.",
    f"{BASE_INSTRUCTION} Ask about what typically inspires or motivates someone to take action.",
    f"{BASE_INSTRUCTION} Create a question about balancing emotions and logic when making decisions.",
    f"{BASE_INSTRUCTION} Write about preferences between careful planning and spontaneous action.",
    f"{BASE_INSTRUCTION} Explore how individuals deal with uncertainty or unexpected changes.",
    f"{BASE_INSTRUCTION} Ask about understanding other people’s emotions or empathy in relationships.",
    f"{BASE_INSTRUCTION} Create a question about staying organized versus adapting flexibly to new situations.",
    f"{BASE_INSTRUCTION} Explore curiosity, creativity, and how people find meaning in what they do."
]


# --------------------------------------------------------------
# 3️⃣ Очистка текста
# --------------------------------------------------------------
def _clean_question(text: str) -> str:
    """Берёт первую фразу с '?'"""
    text = text.strip()
    m = re.search(r"(.+?\?)", text)
    if m:
        text = m.group(1)
    text = text.replace("\n", " ").strip()
    if len(text.split()) < 3:
        text = text.capitalize()
        if not text.endswith("?"):
            text += "?"
    return text


# --------------------------------------------------------------
# 4️⃣ Генерация вопроса
# --------------------------------------------------------------
def generate_question(user_id: str = "default_user", **kwargs) -> str:
    """Генерирует один MBTI-вопрос без инструкций"""
    prompt = random.choice(PROMPTS)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            do_sample=True,
            top_p=0.9,
            temperature=1.1,
            repetition_penalty=1.5,
            max_new_tokens=80,
        )
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return _clean_question(text)