Spaces:

kouki321
/

exemple3

Runtime error

kouki321 commited on May 21

Commit

9eef4ac

verified ·

1 Parent(s): 5e09876

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -112,12 +112,13 @@ def load_model_and_tokenizer():
 # Create FastAPI app
 app = FastAPI(title="DeepSeek QA with KV Cache API")
-# Global variables to store the cache, origin length, and model/tokenizer
-cache_store = {}
 # Initialize model and tokenizer at startup
 model, tokenizer = load_model_and_tokenizer()
 class QueryRequest(BaseModel):
     query: str
     max_new_tokens: Optional[int] = 150

 # Create FastAPI app
 app = FastAPI(title="DeepSeek QA with KV Cache API")
+os.environ["HF_HOME"] = "/tmp/hf_cache"
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 # Initialize model and tokenizer at startup
 model, tokenizer = load_model_and_tokenizer()
+# Global variables to store the cache, origin length, and model/tokenizer
+cache_store = {}
 class QueryRequest(BaseModel):
     query: str
     max_new_tokens: Optional[int] = 150