kouki321 commited on
Commit
9eef4ac
·
verified ·
1 Parent(s): 5e09876

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -112,12 +112,13 @@ def load_model_and_tokenizer():
112
  # Create FastAPI app
113
  app = FastAPI(title="DeepSeek QA with KV Cache API")
114
 
115
- # Global variables to store the cache, origin length, and model/tokenizer
116
- cache_store = {}
117
 
 
 
118
  # Initialize model and tokenizer at startup
119
  model, tokenizer = load_model_and_tokenizer()
120
-
 
121
  class QueryRequest(BaseModel):
122
  query: str
123
  max_new_tokens: Optional[int] = 150
 
112
  # Create FastAPI app
113
  app = FastAPI(title="DeepSeek QA with KV Cache API")
114
 
 
 
115
 
116
+ os.environ["HF_HOME"] = "/tmp/hf_cache"
117
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
118
  # Initialize model and tokenizer at startup
119
  model, tokenizer = load_model_and_tokenizer()
120
+ # Global variables to store the cache, origin length, and model/tokenizer
121
+ cache_store = {}
122
  class QueryRequest(BaseModel):
123
  query: str
124
  max_new_tokens: Optional[int] = 150