Update app.py
Browse files
app.py
CHANGED
|
@@ -112,12 +112,13 @@ def load_model_and_tokenizer():
|
|
| 112 |
# Create FastAPI app
|
| 113 |
app = FastAPI(title="DeepSeek QA with KV Cache API")
|
| 114 |
|
| 115 |
-
# Global variables to store the cache, origin length, and model/tokenizer
|
| 116 |
-
cache_store = {}
|
| 117 |
|
|
|
|
|
|
|
| 118 |
# Initialize model and tokenizer at startup
|
| 119 |
model, tokenizer = load_model_and_tokenizer()
|
| 120 |
-
|
|
|
|
| 121 |
class QueryRequest(BaseModel):
|
| 122 |
query: str
|
| 123 |
max_new_tokens: Optional[int] = 150
|
|
|
|
| 112 |
# Create FastAPI app
|
| 113 |
app = FastAPI(title="DeepSeek QA with KV Cache API")
|
| 114 |
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
os.environ["HF_HOME"] = "/tmp/hf_cache"
|
| 117 |
+
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
|
| 118 |
# Initialize model and tokenizer at startup
|
| 119 |
model, tokenizer = load_model_and_tokenizer()
|
| 120 |
+
# Global variables to store the cache, origin length, and model/tokenizer
|
| 121 |
+
cache_store = {}
|
| 122 |
class QueryRequest(BaseModel):
|
| 123 |
query: str
|
| 124 |
max_new_tokens: Optional[int] = 150
|