File size: 4,224 Bytes
56954f5 87b7e98 34fcc83 1c31761 87b7e98 34fcc83 1c31761 87b7e98 1c31761 87cd86c 9458365 1c31761 34fcc83 a5e0a96 1c31761 dc0b1bc 87b7e98 dc0b1bc 34fcc83 dc0b1bc 1c31761 a8bca78 87b7e98 1c31761 87b7e98 893dddd 34fcc83 893dddd 34fcc83 9458365 893dddd 87b7e98 34fcc83 87b7e98 34fcc83 1c31761 34fcc83 3c8cb2b 87b7e98 34fcc83 1c31761 34fcc83 893dddd fe256a2 893dddd 34fcc83 1c31761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# core/interviewer.py
"""
🇬🇧 Interviewer logic module (no instructions)
Generates random MBTI-style questions using the fine-tuned model.
🇷🇺 Модуль интервьюера.
Использует fine-tuned модель для генерации вопросов без инструкций.
"""
import random
import re
import torch
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
# --------------------------------------------------------------
# 1️⃣ Настройки модели
# --------------------------------------------------------------
QG_MODEL = "f3nsmart/ft-flan-t5-base-qgen"
# ✅ Принудительно используем оригинальный SentencePiece-токенайзер
tokenizer = T5Tokenizer.from_pretrained(QG_MODEL, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
print(f"✅ Loaded interviewer model (slow tokenizer): {QG_MODEL}")
print(f"Device set to use {device}")
# --------------------------------------------------------------
# 2️⃣ Seed-промпты (без инструкций)
# --------------------------------------------------------------
# --------------------------------------------------------------
# 2️⃣ Тематические seed-промпты (по осям MBTI, но без прямого упоминания MBTI)
# --------------------------------------------------------------
BASE_INSTRUCTION = (
"Generate one natural, open-ended question about human thinking, emotions, or decision-making. "
"Avoid mentioning MBTI or personality types directly. "
"Do not ask what type the person belongs to. "
"You may include ideas related to intuition, logic, feelings, perception, or judgment naturally."
)
PROMPTS = [
f"{BASE_INSTRUCTION} Explore how people usually recharge their energy and interact with others.",
f"{BASE_INSTRUCTION} Explore the difference between noticing small details and seeing the bigger picture.",
f"{BASE_INSTRUCTION} Ask about trusting intuition versus relying on concrete evidence in daily life.",
f"{BASE_INSTRUCTION} Ask about what typically inspires or motivates someone to take action.",
f"{BASE_INSTRUCTION} Create a question about balancing emotions and logic when making decisions.",
f"{BASE_INSTRUCTION} Write about preferences between careful planning and spontaneous action.",
f"{BASE_INSTRUCTION} Explore how individuals deal with uncertainty or unexpected changes.",
f"{BASE_INSTRUCTION} Ask about understanding other people’s emotions or empathy in relationships.",
f"{BASE_INSTRUCTION} Create a question about staying organized versus adapting flexibly to new situations.",
f"{BASE_INSTRUCTION} Explore curiosity, creativity, and how people find meaning in what they do."
]
# --------------------------------------------------------------
# 3️⃣ Очистка текста
# --------------------------------------------------------------
def _clean_question(text: str) -> str:
"""Берёт первую фразу с '?'"""
text = text.strip()
m = re.search(r"(.+?\?)", text)
if m:
text = m.group(1)
text = text.replace("\n", " ").strip()
if len(text.split()) < 3:
text = text.capitalize()
if not text.endswith("?"):
text += "?"
return text
# --------------------------------------------------------------
# 4️⃣ Генерация вопроса
# --------------------------------------------------------------
def generate_question(user_id: str = "default_user", **kwargs) -> str:
"""Генерирует один MBTI-вопрос без инструкций"""
prompt = random.choice(PROMPTS)
inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
with torch.no_grad():
out = model.generate(
**inputs,
do_sample=True,
top_p=0.9,
temperature=1.1,
repetition_penalty=1.5,
max_new_tokens=80,
)
text = tokenizer.decode(out[0], skip_special_tokens=True)
return _clean_question(text)
|