Spaces:
Runtime error
Runtime error
Hf related changes
Browse files
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
*.bat
|
|
|
|
1 |
+
*.bat
|
2 |
+
__pycache__
|
Dockerfile
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
# Use an official Python runtime as a parent image
|
2 |
FROM nvidia/cuda:12.3.1-runtime-ubuntu22.04
|
3 |
# Set Python to use unbuffered mode
|
4 |
-
ENV PYTHONUNBUFFERED
|
5 |
RUN apt-get update && \
|
6 |
apt-get install -y --no-install-recommends python3-pip python3-dev && \
|
7 |
rm -rf /var/lib/apt/lists/*
|
8 |
# Set the working directory in the container
|
9 |
RUN mkdir /var/www
|
10 |
-
ENV HOME
|
11 |
WORKDIR /var/www
|
12 |
|
13 |
# Copy the current directory contents into the container at /app
|
@@ -15,18 +15,15 @@ COPY . /var/www
|
|
15 |
|
16 |
RUN pip install -r requirements.txt
|
17 |
RUN pip install torch==1.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
|
18 |
-
# RUN python -c "import nltk; nltk.download('punkt')"
|
19 |
-
# RUN python -c "import nltk; nltk.download('stopwords')"
|
20 |
-
# RUN python -m nltk.downloader -d $HOME/nltk_data punkt stopwords
|
21 |
|
22 |
-
EXPOSE
|
23 |
|
24 |
RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
|
25 |
HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && export HF_TOKEN
|
26 |
|
27 |
# Define environment variable
|
28 |
-
ENV FLASK_APP
|
29 |
-
HF_MODEL_NAME
|
30 |
|
31 |
# Run app.py when the container launches
|
32 |
-
CMD flask run --host=0.0.0.0 --port=
|
|
|
1 |
# Use an official Python runtime as a parent image
|
2 |
FROM nvidia/cuda:12.3.1-runtime-ubuntu22.04
|
3 |
# Set Python to use unbuffered mode
|
4 |
+
ENV PYTHONUNBUFFERED=1
|
5 |
RUN apt-get update && \
|
6 |
apt-get install -y --no-install-recommends python3-pip python3-dev && \
|
7 |
rm -rf /var/lib/apt/lists/*
|
8 |
# Set the working directory in the container
|
9 |
RUN mkdir /var/www
|
10 |
+
ENV HOME=/var/www
|
11 |
WORKDIR /var/www
|
12 |
|
13 |
# Copy the current directory contents into the container at /app
|
|
|
15 |
|
16 |
RUN pip install -r requirements.txt
|
17 |
RUN pip install torch==1.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
|
|
|
|
|
|
|
18 |
|
19 |
+
EXPOSE 7860
|
20 |
|
21 |
RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
|
22 |
HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && export HF_TOKEN
|
23 |
|
24 |
# Define environment variable
|
25 |
+
ENV FLASK_APP=app.py \
|
26 |
+
HF_MODEL_NAME="muryshev/e5_large_rus_finetuned_20240120_122822_ep6"
|
27 |
|
28 |
# Run app.py when the container launches
|
29 |
+
CMD flask run --host=0.0.0.0 --port=7860
|
app.py
CHANGED
@@ -3,10 +3,8 @@ import os
|
|
3 |
from flask import Flask, jsonify, request
|
4 |
from semantic_search import SemanticSearch
|
5 |
|
6 |
-
print(os.environ.get("HF_TOKEN", ""))
|
7 |
-
print(os.environ.get("HF_MODEL_NAME", ""))
|
8 |
|
9 |
-
search = SemanticSearch(
|
10 |
search.test_search()
|
11 |
|
12 |
app = Flask(__name__)
|
|
|
3 |
from flask import Flask, jsonify, request
|
4 |
from semantic_search import SemanticSearch
|
5 |
|
|
|
|
|
6 |
|
7 |
+
search = SemanticSearch()
|
8 |
search.test_search()
|
9 |
|
10 |
app = Flask(__name__)
|
{semantic_search_service_data → legal_info_search_data}/data_ids.json
RENAMED
File without changes
|
{semantic_search_service_data → legal_info_search_data}/data_jsons_20240119.pkl
RENAMED
File without changes
|
semantic_search.py
CHANGED
@@ -16,8 +16,7 @@ from legal_info_search_utils.metrics import calculate_metrics_at_k
|
|
16 |
|
17 |
|
18 |
global_data_path = os.environ.get("GLOBAL_DATA_PATH", "legal_info_search_data/")
|
19 |
-
global_model_path = os.environ.get("GLOBAL_MODEL_PATH",
|
20 |
-
"legal_info_search_model/20240120_122822_ep6/")
|
21 |
|
22 |
# размеченные консультации
|
23 |
data_path_consult = os.environ.get("DATA_PATH_CONSULT",
|
@@ -51,7 +50,7 @@ hf_token = os.environ.get("HF_TOKEN", "")
|
|
51 |
hf_model_name = os.environ.get("HF_MODEL_NAME", "")
|
52 |
|
53 |
class SemanticSearch:
|
54 |
-
def __init__(self,
|
55 |
faiss_batch_size=8, do_normalization=True):
|
56 |
self.device = device
|
57 |
self.do_embedding_norm = do_embedding_norm
|
|
|
16 |
|
17 |
|
18 |
global_data_path = os.environ.get("GLOBAL_DATA_PATH", "legal_info_search_data/")
|
19 |
+
global_model_path = os.environ.get("GLOBAL_MODEL_PATH", "e5_large_rus_finetuned_20240120_122822_ep6")
|
|
|
20 |
|
21 |
# размеченные консультации
|
22 |
data_path_consult = os.environ.get("DATA_PATH_CONSULT",
|
|
|
50 |
hf_model_name = os.environ.get("HF_MODEL_NAME", "")
|
51 |
|
52 |
class SemanticSearch:
|
53 |
+
def __init__(self, index_type="IndexFlatIP", do_embedding_norm=True,
|
54 |
faiss_batch_size=8, do_normalization=True):
|
55 |
self.device = device
|
56 |
self.do_embedding_norm = do_embedding_norm
|