Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
import time | |
import logging | |
import os | |
import json | |
from datetime import datetime | |
# --- μ€μ --- | |
# β β β λͺ¨λΈ κ°μ€μΉμ ν ν¬λμ΄μ ID λΆλ¦¬ β β β | |
MODEL_ID_FOR_WEIGHTS = "unsloth/gemma-3-1b-it-bnb-4bit" # λͺ¨λΈ κ°μ€μΉλ μ¬κΈ°μ λ‘λ | |
TOKENIZER_ID = "google/gemma-3-1b-it" # ν ν¬λμ΄μ λ μλ³Έ μ¬κΈ°μ λ‘λ | |
# CPU μ¬μ© (HF Spaces λ¬΄λ£ ν°μ΄ κΈ°μ€) | |
DEVICE = "cpu" | |
# λ©λͺ¨λ¦¬ νμΌ κ²½λ‘ | |
MEMORY_FILE = "thought_memory.json" | |
# μκ° μ£ΌκΈ° (μ΄) | |
THINKING_INTERVAL_SECONDS = 120 # μ: 2λΆλ§λ€ μκ° | |
# μμ±ν μ΅λ ν ν° μ | |
MAX_NEW_TOKENS = 150 | |
# μ΄κΈ° μκ° ν둬ννΈ | |
INITIAL_PROMPT = "λλ κ³μν΄μ μ€μ€λ‘ μκ°νλ AIμ λλ€. λμ 첫 λ²μ§Έ μκ°μ λ€μκ³Ό κ°μ΅λλ€:" | |
# λ‘κΉ μ€μ | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# (load_memory, save_memory ν¨μλ μ΄μ κ³Ό λμΌ) | |
def load_memory(): | |
"""λ©λͺ¨λ¦¬ νμΌμμ μ΄μ μκ° κΈ°λ‘μ λ‘λν©λλ€.""" | |
if os.path.exists(MEMORY_FILE): | |
try: | |
with open(MEMORY_FILE, 'r', encoding='utf-8') as f: | |
memory = json.load(f) | |
if not isinstance(memory, list): | |
logging.warning(f"{MEMORY_FILE} λ΄μ©μ΄ 리μ€νΈκ° μλλ―λ‘ μ΄κΈ°νν©λλ€.") | |
return [] | |
logging.info(f"{len(memory)}κ°μ μ΄μ μκ°μ λ‘λνμ΅λλ€.") | |
return memory | |
except json.JSONDecodeError: | |
logging.error(f"{MEMORY_FILE} νμΌ νμ± μ€λ₯. λ©λͺ¨λ¦¬λ₯Ό μ΄κΈ°νν©λλ€.") | |
return [] | |
except Exception as e: | |
logging.error(f"λ©λͺ¨λ¦¬ λ‘λ μ€ μ€λ₯ λ°μ: {e}", exc_info=True) | |
return [] | |
else: | |
logging.info("λ©λͺ¨λ¦¬ νμΌμ΄ μμ΄ μλ‘ μμν©λλ€.") | |
return [] | |
def save_memory(memory): | |
"""νμ¬ μκ° κΈ°λ‘μ λ©λͺ¨λ¦¬ νμΌμ μ μ₯ν©λλ€.""" | |
try: | |
with open(MEMORY_FILE, 'w', encoding='utf-8') as f: | |
json.dump(memory, f, ensure_ascii=False, indent=2) | |
logging.debug(f"λ©λͺ¨λ¦¬λ₯Ό {MEMORY_FILE}μ μ μ₯νμ΅λλ€.") | |
except Exception as e: | |
logging.error(f"λ©λͺ¨λ¦¬ μ μ₯ μ€ μ€λ₯ λ°μ: {e}", exc_info=True) | |
def generate_thought(tokenizer, model, prompt_history): | |
"""μ£Όμ΄μ§ ν둬ννΈ κΈ°λ‘μ λ°νμΌλ‘ λ€μ μκ°μ μμ±ν©λλ€.""" | |
if not prompt_history: | |
chat = [{"role": "user", "content": INITIAL_PROMPT}] | |
else: | |
last_thought = prompt_history[-1]['content'] | |
prompt = f"μ΄μ μκ°: \"{last_thought}\"\n\nμ΄ μκ°μ λ°νμΌλ‘ λ€μμΌλ‘ λ μ€λ₯΄λ μκ°μ΄λ μ§λ¬Έ, λλ νμ₯λ κ°λ μ 무μμΈκ°μ? κ°κ²°νκ² λ΅ν΄μ£ΌμΈμ." | |
chat = [{"role": "user", "content": prompt}] | |
prompt_formatted = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |
logging.info(f"--- λͺ¨λΈ μ λ ₯ ν둬ννΈ ---\n{prompt_formatted}\n-----------------------") | |
inputs = tokenizer(prompt_formatted, return_tensors="pt").to(DEVICE) | |
start_time = time.time() | |
logging.info("λͺ¨λΈ μΆλ‘ μμ...") | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=MAX_NEW_TOKENS, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
end_time = time.time() | |
logging.info(f"λͺ¨λΈ μΆλ‘ μλ£ ({end_time - start_time:.2f}μ΄ μμ)") | |
input_token_length = inputs.input_ids.shape[1] | |
generated_ids = outputs[0, input_token_length:] | |
new_thought_raw = tokenizer.decode(generated_ids, skip_special_tokens=True) | |
logging.info(f"λͺ¨λΈ μμ± κ²°κ³Ό (Raw): {new_thought_raw}") | |
return new_thought_raw.strip() | |
if __name__ == "__main__": | |
logging.info("AI μκ° νλ‘μΈμ€ μμ...") | |
logging.info(f"Tokenizer ID: {TOKENIZER_ID}") | |
logging.info(f"Model Weights ID: {MODEL_ID_FOR_WEIGHTS}") | |
logging.info(f"μ€ν μ₯μΉ: {DEVICE}") | |
hf_token = os.getenv("HF_TOKEN") | |
if hf_token: | |
logging.info("Hugging Face ν ν°μ μ¬μ©ν©λλ€.") | |
else: | |
logging.info("Hugging Face ν ν°μ΄ μ€μ λμ§ μμμ΅λλ€ (νμ μ Secretsμ μΆκ°).") | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16 # CPU μ§μ μλλ©΄ float32λ‘ λ³κ²½ | |
# bnb_4bit_compute_dtype=torch.float32 # bfloat16 λ¬Έμ μ μ΄ λΌμΈ μ¬μ© | |
) | |
try: | |
logging.info("ν ν¬λμ΄μ λ‘λ© μ€...") | |
# β β β ν ν¬λμ΄μ λ‘λ© μ μλ³Έ ID μ¬μ© β β β | |
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID, token=hf_token) | |
logging.info("μμνλ λͺ¨λΈ λ‘λ© μ€... (bitsandbytes μ€μ μ μ©)") | |
# β β β λͺ¨λΈ κ°μ€μΉ λ‘λ© μ μμν λͺ¨λΈ ID μ¬μ© β β β | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_ID_FOR_WEIGHTS, | |
quantization_config=bnb_config, | |
device_map=DEVICE, | |
token=hf_token | |
) | |
model.eval() | |
logging.info("λͺ¨λΈ λ° ν ν¬λμ΄μ λ‘λ μλ£.") | |
except Exception as e: | |
logging.error(f"λͺ¨λΈ λλ ν ν¬λμ΄μ λ‘λ© μ€ μΉλͺ μ μ€λ₯ λ°μ: {e}", exc_info=True) | |
# CPUκ° bfloat16 μ§μνμ§ μμΌλ©΄ μ¬κΈ°μ μ€λ₯ λ°μ κ°λ₯ | |
exit(1) | |
thought_history = load_memory() | |
try: | |
while True: | |
logging.info("=== μλ‘μ΄ μκ° μ¬μ΄ν΄ μμ ===") | |
new_thought = generate_thought(tokenizer, model, thought_history) | |
if new_thought: | |
logging.info(f"μμ±λ μλ‘μ΄ μκ°: {new_thought}") | |
thought_entry = {"role": "assistant", "content": new_thought, "timestamp": datetime.now().isoformat()} | |
thought_history.append(thought_entry) | |
save_memory(thought_history) | |
else: | |
logging.warning("λͺ¨λΈμ΄ λΉ μκ°μ μμ±νμ΅λλ€.") | |
logging.info(f"λ€μ μκ°κΉμ§ {THINKING_INTERVAL_SECONDS}μ΄ λκΈ°...") | |
time.sleep(THINKING_INTERVAL_SECONDS) | |
except KeyboardInterrupt: | |
logging.info("μ¬μ©μ μμ²μΌλ‘ AI νλ‘μΈμ€ μ€μ§.") | |
except Exception as e: | |
logging.error(f"λ©μΈ 루νμμ μ€λ₯ λ°μ: {e}", exc_info=True) | |
finally: | |
logging.info("AI μκ° νλ‘μΈμ€ μ’ λ£. μ΅μ’ λ©λͺ¨λ¦¬ μ μ₯ μλ.") | |
save_memory(thought_history) |