muryshev commited on
Commit
804ddc3
1 Parent(s): 751678e

Hf related changes

Browse files
.gitignore CHANGED
@@ -1 +1,2 @@
1
- *.bat
 
 
1
+ *.bat
2
+ __pycache__
Dockerfile CHANGED
@@ -1,13 +1,13 @@
1
  # Use an official Python runtime as a parent image
2
  FROM nvidia/cuda:12.3.1-runtime-ubuntu22.04
3
  # Set Python to use unbuffered mode
4
- ENV PYTHONUNBUFFERED 1
5
  RUN apt-get update && \
6
  apt-get install -y --no-install-recommends python3-pip python3-dev && \
7
  rm -rf /var/lib/apt/lists/*
8
  # Set the working directory in the container
9
  RUN mkdir /var/www
10
- ENV HOME /var/www
11
  WORKDIR /var/www
12
 
13
  # Copy the current directory contents into the container at /app
@@ -15,18 +15,15 @@ COPY . /var/www
15
 
16
  RUN pip install -r requirements.txt
17
  RUN pip install torch==1.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
18
- # RUN python -c "import nltk; nltk.download('punkt')"
19
- # RUN python -c "import nltk; nltk.download('stopwords')"
20
- # RUN python -m nltk.downloader -d $HOME/nltk_data punkt stopwords
21
 
22
- EXPOSE 7866
23
 
24
  RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
25
  HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && export HF_TOKEN
26
 
27
  # Define environment variable
28
- ENV FLASK_APP app.py \
29
- HF_MODEL_NAME "muryshev/e5_large_rus_finetuned_20240120_122822_ep6"
30
 
31
  # Run app.py when the container launches
32
- CMD flask run --host=0.0.0.0 --port=7869
 
1
  # Use an official Python runtime as a parent image
2
  FROM nvidia/cuda:12.3.1-runtime-ubuntu22.04
3
  # Set Python to use unbuffered mode
4
+ ENV PYTHONUNBUFFERED=1
5
  RUN apt-get update && \
6
  apt-get install -y --no-install-recommends python3-pip python3-dev && \
7
  rm -rf /var/lib/apt/lists/*
8
  # Set the working directory in the container
9
  RUN mkdir /var/www
10
+ ENV HOME=/var/www
11
  WORKDIR /var/www
12
 
13
  # Copy the current directory contents into the container at /app
 
15
 
16
  RUN pip install -r requirements.txt
17
  RUN pip install torch==1.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
 
 
 
18
 
19
+ EXPOSE 7860
20
 
21
  RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
22
  HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && export HF_TOKEN
23
 
24
  # Define environment variable
25
+ ENV FLASK_APP=app.py \
26
+ HF_MODEL_NAME="muryshev/e5_large_rus_finetuned_20240120_122822_ep6"
27
 
28
  # Run app.py when the container launches
29
+ CMD flask run --host=0.0.0.0 --port=7860
app.py CHANGED
@@ -3,10 +3,8 @@ import os
3
  from flask import Flask, jsonify, request
4
  from semantic_search import SemanticSearch
5
 
6
- print(os.environ.get("HF_TOKEN", ""))
7
- print(os.environ.get("HF_MODEL_NAME", ""))
8
 
9
- search = SemanticSearch('cuda')
10
  search.test_search()
11
 
12
  app = Flask(__name__)
 
3
  from flask import Flask, jsonify, request
4
  from semantic_search import SemanticSearch
5
 
 
 
6
 
7
+ search = SemanticSearch()
8
  search.test_search()
9
 
10
  app = Flask(__name__)
{semantic_search_service_data → legal_info_search_data}/data_ids.json RENAMED
File without changes
{semantic_search_service_data → legal_info_search_data}/data_jsons_20240119.pkl RENAMED
File without changes
semantic_search.py CHANGED
@@ -16,8 +16,7 @@ from legal_info_search_utils.metrics import calculate_metrics_at_k
16
 
17
 
18
  global_data_path = os.environ.get("GLOBAL_DATA_PATH", "legal_info_search_data/")
19
- global_model_path = os.environ.get("GLOBAL_MODEL_PATH",
20
- "legal_info_search_model/20240120_122822_ep6/")
21
 
22
  # размеченные консультации
23
  data_path_consult = os.environ.get("DATA_PATH_CONSULT",
@@ -51,7 +50,7 @@ hf_token = os.environ.get("HF_TOKEN", "")
51
  hf_model_name = os.environ.get("HF_MODEL_NAME", "")
52
 
53
  class SemanticSearch:
54
- def __init__(self, device, index_type="IndexFlatIP", do_embedding_norm=True,
55
  faiss_batch_size=8, do_normalization=True):
56
  self.device = device
57
  self.do_embedding_norm = do_embedding_norm
 
16
 
17
 
18
  global_data_path = os.environ.get("GLOBAL_DATA_PATH", "legal_info_search_data/")
19
+ global_model_path = os.environ.get("GLOBAL_MODEL_PATH", "e5_large_rus_finetuned_20240120_122822_ep6")
 
20
 
21
  # размеченные консультации
22
  data_path_consult = os.environ.get("DATA_PATH_CONSULT",
 
50
  hf_model_name = os.environ.get("HF_MODEL_NAME", "")
51
 
52
  class SemanticSearch:
53
+ def __init__(self, index_type="IndexFlatIP", do_embedding_norm=True,
54
  faiss_batch_size=8, do_normalization=True):
55
  self.device = device
56
  self.do_embedding_norm = do_embedding_norm