ffreemt commited on
Commit
cde2d91
1 Parent(s): 0e238ed
Files changed (2) hide show
  1. Dockerfile +4 -3
  2. m3_server.py +3 -1
Dockerfile CHANGED
@@ -1,6 +1,7 @@
1
  FROM python:3.10
2
  ENV PIP_ROOT_USER_ACTION=ignore \
3
  TZ=Asia/Shanghai
 
4
 
5
  WORKDIR /app
6
  COPY . .
@@ -20,8 +21,8 @@ RUN pip install --no-cache-dir --upgrade pip && \
20
  # CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
21
  # CMD ["TRANSFORMERS_CACHE=./", "infinity_emb", "--model-name-or-path", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "--port", "7860"]
22
 
23
- # x ["sh", "-c", "'FOO=BAR python app.py'"]
24
  # CMD ["python", "m3_server.py"]
 
 
25
 
26
- # CMD ["sh", "start-m3-server.sh"] # OK
27
- CMD ["sh", "-c", "HF_HOME=/tmp/cache", "python", "m3_server.py"]
 
1
  FROM python:3.10
2
  ENV PIP_ROOT_USER_ACTION=ignore \
3
  TZ=Asia/Shanghai
4
+ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
5
 
6
  WORKDIR /app
7
  COPY . .
 
21
  # CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
22
  # CMD ["TRANSFORMERS_CACHE=./", "infinity_emb", "--model-name-or-path", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "--port", "7860"]
23
 
 
24
  # CMD ["python", "m3_server.py"]
25
+ # x ["sh", "-c", "'FOO=BAR python m3_server.py'"]
26
+ # CMD ["sh", "-c", "HF_HOME=/tmp/cache", "python", "m3_server.py"]
27
 
28
+ CMD ["sh", "start-m3-server.sh"] # OK
 
m3_server.py CHANGED
@@ -14,10 +14,12 @@ from FlagEmbedding import BGEM3FlagModel
14
  from pydantic import BaseModel
15
  from starlette.status import HTTP_504_GATEWAY_TIMEOUT
16
 
 
17
  Path("/tmp/cache").mkdir(exist_ok=True)
18
  os.environ["HF_HOME"] = "/tmp/cache"
19
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache"
20
  # does not quite work
 
21
 
22
  batch_size = 2 # gpu batch_size in order of your available vram
23
  max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
@@ -242,5 +244,5 @@ async def rerank(request: RerankRequest):
242
 
243
  if __name__ == "__main__":
244
  import uvicorn
245
-
246
  uvicorn.run(app, host="0.0.0.0", port=port)
 
14
  from pydantic import BaseModel
15
  from starlette.status import HTTP_504_GATEWAY_TIMEOUT
16
 
17
+ _ = """
18
  Path("/tmp/cache").mkdir(exist_ok=True)
19
  os.environ["HF_HOME"] = "/tmp/cache"
20
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache"
21
  # does not quite work
22
+ # """
23
 
24
  batch_size = 2 # gpu batch_size in order of your available vram
25
  max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
 
244
 
245
  if __name__ == "__main__":
246
  import uvicorn
247
+ print("started")
248
  uvicorn.run(app, host="0.0.0.0", port=port)