import os import numpy as np from huggingface_hub import snapshot_download from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizer from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union import time from .base_engine import BaseEngine from ..configs import ( MODEL_PATH, ) FAKE_MODEL_PATH = os.environ.get("FAKE_MODEL_PATH", MODEL_PATH) FAKE_RESPONSE = "Wow that's very very cool, please try again." class DebugEngine(BaseEngine): """ It will always yield FAKE_RESPONSE """ def __init__(self, **kwargs) -> None: super().__init__(**kwargs) self._model = None self._tokenizer = None @property def tokenizer(self) -> PreTrainedTokenizer: if self._tokenizer is None: self._tokenizer = AutoTokenizer.from_pretrained(FAKE_MODEL_PATH, trust_remote_code=True) return self._tokenizer def load_model(self): print(f"Load fake model with tokenizer: {self.tokenizer}") def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs): num_tokens = len(self.tokenizer.encode(prompt)) response = FAKE_RESPONSE for i in range(len(response)): time.sleep(0.01) yield response[:i], num_tokens num_tokens = len(self.tokenizer.encode(prompt + response)) yield response, num_tokens def batch_generate(self, prompts, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs): return [p + " -- Test" for p in prompts]