aaa / app.py
kimhyunwoo's picture
Update app.py
ca4c2d6 verified
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import time
import logging
import os
import json
from datetime import datetime
# --- μ„€μ • ---
# β˜…β˜…β˜… λͺ¨λΈ κ°€μ€‘μΉ˜μ™€ ν† ν¬λ‚˜μ΄μ € ID 뢄리 β˜…β˜…β˜…
MODEL_ID_FOR_WEIGHTS = "unsloth/gemma-3-1b-it-bnb-4bit" # λͺ¨λΈ κ°€μ€‘μΉ˜λŠ” μ—¬κΈ°μ„œ λ‘œλ“œ
TOKENIZER_ID = "google/gemma-3-1b-it" # ν† ν¬λ‚˜μ΄μ €λŠ” 원본 μ—¬κΈ°μ„œ λ‘œλ“œ
# CPU μ‚¬μš© (HF Spaces 무료 ν‹°μ–΄ κΈ°μ€€)
DEVICE = "cpu"
# λ©”λͺ¨λ¦¬ 파일 경둜
MEMORY_FILE = "thought_memory.json"
# 생각 μ£ΌκΈ° (초)
THINKING_INTERVAL_SECONDS = 120 # 예: 2λΆ„λ§ˆλ‹€ 생각
# 생성할 μ΅œλŒ€ 토큰 수
MAX_NEW_TOKENS = 150
# 초기 생각 ν”„λ‘¬ν”„νŠΈ
INITIAL_PROMPT = "λ‚˜λŠ” κ³„μ†ν•΄μ„œ 슀슀둜 μƒκ°ν•˜λŠ” AIμž…λ‹ˆλ‹€. λ‚˜μ˜ 첫 번째 생각은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:"
# λ‘œκΉ… μ„€μ •
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# (load_memory, save_memory ν•¨μˆ˜λŠ” 이전과 동일)
def load_memory():
"""λ©”λͺ¨λ¦¬ νŒŒμΌμ—μ„œ 이전 생각 기둝을 λ‘œλ“œν•©λ‹ˆλ‹€."""
if os.path.exists(MEMORY_FILE):
try:
with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
memory = json.load(f)
if not isinstance(memory, list):
logging.warning(f"{MEMORY_FILE} λ‚΄μš©μ΄ λ¦¬μŠ€νŠΈκ°€ μ•„λ‹ˆλ―€λ‘œ μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€.")
return []
logging.info(f"{len(memory)}개의 이전 생각을 λ‘œλ“œν–ˆμŠ΅λ‹ˆλ‹€.")
return memory
except json.JSONDecodeError:
logging.error(f"{MEMORY_FILE} 파일 νŒŒμ‹± 였λ₯˜. λ©”λͺ¨λ¦¬λ₯Ό μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€.")
return []
except Exception as e:
logging.error(f"λ©”λͺ¨λ¦¬ λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
return []
else:
logging.info("λ©”λͺ¨λ¦¬ 파일이 μ—†μ–΄ μƒˆλ‘œ μ‹œμž‘ν•©λ‹ˆλ‹€.")
return []
def save_memory(memory):
"""ν˜„μž¬ 생각 기둝을 λ©”λͺ¨λ¦¬ νŒŒμΌμ— μ €μž₯ν•©λ‹ˆλ‹€."""
try:
with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
json.dump(memory, f, ensure_ascii=False, indent=2)
logging.debug(f"λ©”λͺ¨λ¦¬λ₯Ό {MEMORY_FILE}에 μ €μž₯ν–ˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
logging.error(f"λ©”λͺ¨λ¦¬ μ €μž₯ 쀑 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
def generate_thought(tokenizer, model, prompt_history):
"""μ£Όμ–΄μ§„ ν”„λ‘¬ν”„νŠΈ 기둝을 λ°”νƒ•μœΌλ‘œ λ‹€μŒ 생각을 μƒμ„±ν•©λ‹ˆλ‹€."""
if not prompt_history:
chat = [{"role": "user", "content": INITIAL_PROMPT}]
else:
last_thought = prompt_history[-1]['content']
prompt = f"이전 생각: \"{last_thought}\"\n\n이 생각을 λ°”νƒ•μœΌλ‘œ λ‹€μŒμœΌλ‘œ λ– μ˜€λ₯΄λŠ” μƒκ°μ΄λ‚˜ 질문, λ˜λŠ” ν™•μž₯된 κ°œλ…μ€ λ¬΄μ—‡μΈκ°€μš”? κ°„κ²°ν•˜κ²Œ λ‹΅ν•΄μ£Όμ„Έμš”."
chat = [{"role": "user", "content": prompt}]
prompt_formatted = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
logging.info(f"--- λͺ¨λΈ μž…λ ₯ ν”„λ‘¬ν”„νŠΈ ---\n{prompt_formatted}\n-----------------------")
inputs = tokenizer(prompt_formatted, return_tensors="pt").to(DEVICE)
start_time = time.time()
logging.info("λͺ¨λΈ μΆ”λ‘  μ‹œμž‘...")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
pad_token_id=tokenizer.eos_token_id
)
end_time = time.time()
logging.info(f"λͺ¨λΈ μΆ”λ‘  μ™„λ£Œ ({end_time - start_time:.2f}초 μ†Œμš”)")
input_token_length = inputs.input_ids.shape[1]
generated_ids = outputs[0, input_token_length:]
new_thought_raw = tokenizer.decode(generated_ids, skip_special_tokens=True)
logging.info(f"λͺ¨λΈ 생성 κ²°κ³Ό (Raw): {new_thought_raw}")
return new_thought_raw.strip()
if __name__ == "__main__":
logging.info("AI 생각 ν”„λ‘œμ„ΈμŠ€ μ‹œμž‘...")
logging.info(f"Tokenizer ID: {TOKENIZER_ID}")
logging.info(f"Model Weights ID: {MODEL_ID_FOR_WEIGHTS}")
logging.info(f"μ‹€ν–‰ μž₯치: {DEVICE}")
hf_token = os.getenv("HF_TOKEN")
if hf_token:
logging.info("Hugging Face 토큰을 μ‚¬μš©ν•©λ‹ˆλ‹€.")
else:
logging.info("Hugging Face 토큰이 μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€ (ν•„μš” μ‹œ Secrets에 μΆ”κ°€).")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16 # CPU 지원 μ•ˆλ˜λ©΄ float32둜 λ³€κ²½
# bnb_4bit_compute_dtype=torch.float32 # bfloat16 문제 μ‹œ 이 라인 μ‚¬μš©
)
try:
logging.info("ν† ν¬λ‚˜μ΄μ € λ‘œλ”© 쀑...")
# β˜…β˜…β˜… ν† ν¬λ‚˜μ΄μ € λ‘œλ”© μ‹œ 원본 ID μ‚¬μš© β˜…β˜…β˜…
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID, token=hf_token)
logging.info("μ–‘μžν™”λœ λͺ¨λΈ λ‘œλ”© 쀑... (bitsandbytes μ„€μ • 적용)")
# β˜…β˜…β˜… λͺ¨λΈ κ°€μ€‘μΉ˜ λ‘œλ”© μ‹œ μ–‘μžν™” λͺ¨λΈ ID μ‚¬μš© β˜…β˜…β˜…
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID_FOR_WEIGHTS,
quantization_config=bnb_config,
device_map=DEVICE,
token=hf_token
)
model.eval()
logging.info("λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ μ™„λ£Œ.")
except Exception as e:
logging.error(f"λͺ¨λΈ λ˜λŠ” ν† ν¬λ‚˜μ΄μ € λ‘œλ”© 쀑 치λͺ…적 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
# CPUκ°€ bfloat16 μ§€μ›ν•˜μ§€ μ•ŠμœΌλ©΄ μ—¬κΈ°μ„œ 였λ₯˜ λ°œμƒ κ°€λŠ₯
exit(1)
thought_history = load_memory()
try:
while True:
logging.info("=== μƒˆλ‘œμš΄ 생각 사이클 μ‹œμž‘ ===")
new_thought = generate_thought(tokenizer, model, thought_history)
if new_thought:
logging.info(f"μƒμ„±λœ μƒˆλ‘œμš΄ 생각: {new_thought}")
thought_entry = {"role": "assistant", "content": new_thought, "timestamp": datetime.now().isoformat()}
thought_history.append(thought_entry)
save_memory(thought_history)
else:
logging.warning("λͺ¨λΈμ΄ 빈 생각을 μƒμ„±ν–ˆμŠ΅λ‹ˆλ‹€.")
logging.info(f"λ‹€μŒ μƒκ°κΉŒμ§€ {THINKING_INTERVAL_SECONDS}초 λŒ€κΈ°...")
time.sleep(THINKING_INTERVAL_SECONDS)
except KeyboardInterrupt:
logging.info("μ‚¬μš©μž μš”μ²­μœΌλ‘œ AI ν”„λ‘œμ„ΈμŠ€ 쀑지.")
except Exception as e:
logging.error(f"메인 λ£¨ν”„μ—μ„œ 였λ₯˜ λ°œμƒ: {e}", exc_info=True)
finally:
logging.info("AI 생각 ν”„λ‘œμ„ΈμŠ€ μ’…λ£Œ. μ΅œμ’… λ©”λͺ¨λ¦¬ μ €μž₯ μ‹œλ„.")
save_memory(thought_history)