Spaces:
Sleeping
Sleeping
Commit
•
341fcdf
1
Parent(s):
2e113bf
Improve deployment issues - reqs, dockerfile, gitigore,...
Browse files- .gitignore +1 -7
- Dockerfile +12 -4
- data/04_prompts/prompt_template_for_explaning_why_is_a_good_fit.json +16 -0
- requirements.in +4 -0
- requirements.txt +24 -20
- src/app/app.py +1 -1
.gitignore
CHANGED
@@ -10,18 +10,12 @@ conf/local/**
|
|
10 |
conf/**/*credentials*
|
11 |
|
12 |
# ignore everything in the following folders
|
13 |
-
data/**
|
14 |
logs/**
|
|
|
15 |
|
16 |
# except their sub-folders
|
17 |
-
!data/**/
|
18 |
!logs/**/
|
19 |
|
20 |
-
# also keep the example dataset
|
21 |
-
!data/01_raw/*.csv
|
22 |
-
!data/02_processed/**
|
23 |
-
!data/02_processed/**/**
|
24 |
-
|
25 |
# also keep all .gitkeep files
|
26 |
!.gitkeep
|
27 |
|
|
|
10 |
conf/**/*credentials*
|
11 |
|
12 |
# ignore everything in the following folders
|
|
|
13 |
logs/**
|
14 |
+
data/03_indexed/**
|
15 |
|
16 |
# except their sub-folders
|
|
|
17 |
!logs/**/
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
# also keep all .gitkeep files
|
20 |
!.gitkeep
|
21 |
|
Dockerfile
CHANGED
@@ -15,6 +15,9 @@ ENV REQUIREMENTS_PATH=$REQUIREMENTS_PATH
|
|
15 |
ARG HF_HOME=".cache/huggingface/hub"
|
16 |
ENV HF_HOME=$HF_HOME
|
17 |
|
|
|
|
|
|
|
18 |
ARG ENTRYPOINT_PATH="./entrypoint.sh"
|
19 |
ENV ENTRYPOINT_PATH=$ENTRYPOINT_PATH
|
20 |
|
@@ -26,10 +29,10 @@ RUN mkdir -p /code/&& \
|
|
26 |
WORKDIR /code
|
27 |
|
28 |
# Create a virtual environment in the directory /venv
|
29 |
-
RUN python -m venv venv
|
30 |
|
31 |
# Activate the virtual environment by adding it to the PATH environment variable
|
32 |
-
ENV PATH="/venv/bin:$PATH"
|
33 |
|
34 |
RUN apt update && \
|
35 |
python -m ensurepip --upgrade && \
|
@@ -41,13 +44,18 @@ RUN pip install --no-cache-dir -r ./requirements.txt
|
|
41 |
|
42 |
RUN mkdir -p $HF_HOME && \
|
43 |
chmod -R 777 $HF_HOME && \
|
|
|
44 |
export TRANSFORMERS_CACHE=$HF_HOME && \
|
45 |
-
|
|
|
|
|
46 |
|
47 |
COPY . .
|
48 |
|
49 |
RUN pip install -e . && \
|
50 |
-
python src/resume_worth/pipelines/data_indexing/pipeline.py
|
|
|
|
|
51 |
chmod +x $ENTRYPOINT_PATH
|
52 |
|
53 |
ENTRYPOINT $ENTRYPOINT_PATH
|
|
|
15 |
ARG HF_HOME=".cache/huggingface/hub"
|
16 |
ENV HF_HOME=$HF_HOME
|
17 |
|
18 |
+
ARG MPLCONFIGDIR=".config/matplotlib"
|
19 |
+
ENV MPLCONFIGDIR=$MPLCONFIGDIR
|
20 |
+
|
21 |
ARG ENTRYPOINT_PATH="./entrypoint.sh"
|
22 |
ENV ENTRYPOINT_PATH=$ENTRYPOINT_PATH
|
23 |
|
|
|
29 |
WORKDIR /code
|
30 |
|
31 |
# Create a virtual environment in the directory /venv
|
32 |
+
RUN python -m venv .venv
|
33 |
|
34 |
# Activate the virtual environment by adding it to the PATH environment variable
|
35 |
+
ENV PATH="/code/.venv/bin:$PATH"
|
36 |
|
37 |
RUN apt update && \
|
38 |
python -m ensurepip --upgrade && \
|
|
|
44 |
|
45 |
RUN mkdir -p $HF_HOME && \
|
46 |
chmod -R 777 $HF_HOME && \
|
47 |
+
export HF_HOME=$HF_HOME && \
|
48 |
export TRANSFORMERS_CACHE=$HF_HOME && \
|
49 |
+
mkdir -p $MPLCONFIGDIR && \
|
50 |
+
chmod -R 777 $MPLCONFIGDIR && \
|
51 |
+
export MPLCONFIGDIR=$MPLCONFIGDIR
|
52 |
|
53 |
COPY . .
|
54 |
|
55 |
RUN pip install -e . && \
|
56 |
+
python src/resume_worth/pipelines/data_indexing/pipeline.py
|
57 |
+
|
58 |
+
RUN python src/resume_worth/pipelines/text_generation/pipeline.py && \
|
59 |
chmod +x $ENTRYPOINT_PATH
|
60 |
|
61 |
ENTRYPOINT $ENTRYPOINT_PATH
|
data/04_prompts/prompt_template_for_explaning_why_is_a_good_fit.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": null,
|
3 |
+
"input_variables": [
|
4 |
+
"job",
|
5 |
+
"resume"
|
6 |
+
],
|
7 |
+
"input_types": {},
|
8 |
+
"output_parser": null,
|
9 |
+
"partial_variables": {},
|
10 |
+
"metadata": null,
|
11 |
+
"tags": null,
|
12 |
+
"template": "<|im_start|>user\nExplain why the following RESUME is a good match for the presented JOB VACANCY.\nKeep your answer grounded in the facts of the RESUME and JOB VACANCY.\nWrite a maximum of three points in clear and concise language.\n\nRESUME: \n{resume}\n\nJOB VACANCY: \n{job}<|im_end|>\n<|im_start|>assistant\n",
|
13 |
+
"template_format": "f-string",
|
14 |
+
"validate_template": false,
|
15 |
+
"_type": "prompt"
|
16 |
+
}
|
requirements.in
CHANGED
@@ -10,6 +10,10 @@ langchain-community
|
|
10 |
sentence-transformers
|
11 |
chromadb
|
12 |
|
|
|
|
|
|
|
|
|
13 |
# to build the user infertafe
|
14 |
gradio
|
15 |
|
|
|
10 |
sentence-transformers
|
11 |
chromadb
|
12 |
|
13 |
+
# to generate text
|
14 |
+
transformers
|
15 |
+
torch
|
16 |
+
|
17 |
# to build the user infertafe
|
18 |
gradio
|
19 |
|
requirements.txt
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
#
|
7 |
aiofiles==23.2.1
|
8 |
# via gradio
|
9 |
-
aiohttp==3.9.
|
10 |
# via
|
11 |
# langchain
|
12 |
# langchain-community
|
@@ -73,7 +73,7 @@ fastapi==0.110.1
|
|
73 |
# gradio
|
74 |
ffmpy==0.3.2
|
75 |
# via gradio
|
76 |
-
filelock==3.13.
|
77 |
# via
|
78 |
# huggingface-hub
|
79 |
# torch
|
@@ -95,9 +95,9 @@ google-auth==2.29.0
|
|
95 |
# via kubernetes
|
96 |
googleapis-common-protos==1.63.0
|
97 |
# via opentelemetry-exporter-otlp-proto-grpc
|
98 |
-
gradio==4.
|
99 |
# via -r requirements.in
|
100 |
-
gradio-client==0.15.
|
101 |
# via gradio
|
102 |
grpcio==1.62.1
|
103 |
# via
|
@@ -124,7 +124,7 @@ huggingface-hub==0.22.2
|
|
124 |
# transformers
|
125 |
humanfriendly==10.0
|
126 |
# via coloredlogs
|
127 |
-
idna==3.
|
128 |
# via
|
129 |
# anyio
|
130 |
# httpx
|
@@ -141,7 +141,7 @@ jinja2==3.1.3
|
|
141 |
# altair
|
142 |
# gradio
|
143 |
# torch
|
144 |
-
joblib==1.
|
145 |
# via scikit-learn
|
146 |
jsonpatch==1.33
|
147 |
# via
|
@@ -157,20 +157,20 @@ kiwisolver==1.4.5
|
|
157 |
# via matplotlib
|
158 |
kubernetes==29.0.0
|
159 |
# via chromadb
|
160 |
-
langchain==0.1.
|
161 |
# via -r requirements.in
|
162 |
-
langchain-community==0.0.
|
163 |
# via
|
164 |
# -r requirements.in
|
165 |
# langchain
|
166 |
-
langchain-core==0.1.
|
167 |
# via
|
168 |
# langchain
|
169 |
# langchain-community
|
170 |
# langchain-text-splitters
|
171 |
langchain-text-splitters==0.0.1
|
172 |
# via langchain
|
173 |
-
langsmith==0.1.
|
174 |
# via
|
175 |
# langchain
|
176 |
# langchain-community
|
@@ -221,7 +221,7 @@ oauthlib==3.2.2
|
|
221 |
# via
|
222 |
# kubernetes
|
223 |
# requests-oauthlib
|
224 |
-
onnxruntime==1.17.
|
225 |
# via chromadb
|
226 |
opentelemetry-api==1.24.0
|
227 |
# via
|
@@ -279,7 +279,7 @@ packaging==23.2
|
|
279 |
# matplotlib
|
280 |
# onnxruntime
|
281 |
# transformers
|
282 |
-
pandas==2.2.
|
283 |
# via
|
284 |
# -r requirements.in
|
285 |
# altair
|
@@ -296,7 +296,7 @@ protobuf==4.25.3
|
|
296 |
# googleapis-common-protos
|
297 |
# onnxruntime
|
298 |
# opentelemetry-proto
|
299 |
-
pulsar-client==3.
|
300 |
# via chromadb
|
301 |
pyasn1==0.6.0
|
302 |
# via
|
@@ -304,7 +304,7 @@ pyasn1==0.6.0
|
|
304 |
# rsa
|
305 |
pyasn1-modules==0.4.0
|
306 |
# via google-auth
|
307 |
-
pydantic==2.
|
308 |
# via
|
309 |
# chromadb
|
310 |
# fastapi
|
@@ -312,7 +312,7 @@ pydantic==2.6.4
|
|
312 |
# langchain
|
313 |
# langchain-core
|
314 |
# langsmith
|
315 |
-
pydantic-core==2.
|
316 |
# via pydantic
|
317 |
pydub==0.25.1
|
318 |
# via gradio
|
@@ -376,11 +376,11 @@ rpds-py==0.18.0
|
|
376 |
# referencing
|
377 |
rsa==4.9
|
378 |
# via google-auth
|
379 |
-
ruff==0.3.
|
380 |
# via gradio
|
381 |
safetensors==0.4.2
|
382 |
# via transformers
|
383 |
-
scikit-learn==1.4.
|
384 |
# via sentence-transformers
|
385 |
scipy==1.13.0
|
386 |
# via
|
@@ -428,7 +428,9 @@ tomlkit==0.12.0
|
|
428 |
toolz==0.12.1
|
429 |
# via altair
|
430 |
torch==2.2.2
|
431 |
-
# via
|
|
|
|
|
432 |
tqdm==4.66.2
|
433 |
# via
|
434 |
# chromadb
|
@@ -436,8 +438,10 @@ tqdm==4.66.2
|
|
436 |
# sentence-transformers
|
437 |
# transformers
|
438 |
transformers==4.39.3
|
439 |
-
# via
|
440 |
-
|
|
|
|
|
441 |
# via
|
442 |
# chromadb
|
443 |
# gradio
|
|
|
6 |
#
|
7 |
aiofiles==23.2.1
|
8 |
# via gradio
|
9 |
+
aiohttp==3.9.4
|
10 |
# via
|
11 |
# langchain
|
12 |
# langchain-community
|
|
|
73 |
# gradio
|
74 |
ffmpy==0.3.2
|
75 |
# via gradio
|
76 |
+
filelock==3.13.4
|
77 |
# via
|
78 |
# huggingface-hub
|
79 |
# torch
|
|
|
95 |
# via kubernetes
|
96 |
googleapis-common-protos==1.63.0
|
97 |
# via opentelemetry-exporter-otlp-proto-grpc
|
98 |
+
gradio==4.26.0
|
99 |
# via -r requirements.in
|
100 |
+
gradio-client==0.15.1
|
101 |
# via gradio
|
102 |
grpcio==1.62.1
|
103 |
# via
|
|
|
124 |
# transformers
|
125 |
humanfriendly==10.0
|
126 |
# via coloredlogs
|
127 |
+
idna==3.7
|
128 |
# via
|
129 |
# anyio
|
130 |
# httpx
|
|
|
141 |
# altair
|
142 |
# gradio
|
143 |
# torch
|
144 |
+
joblib==1.4.0
|
145 |
# via scikit-learn
|
146 |
jsonpatch==1.33
|
147 |
# via
|
|
|
157 |
# via matplotlib
|
158 |
kubernetes==29.0.0
|
159 |
# via chromadb
|
160 |
+
langchain==0.1.16
|
161 |
# via -r requirements.in
|
162 |
+
langchain-community==0.0.32
|
163 |
# via
|
164 |
# -r requirements.in
|
165 |
# langchain
|
166 |
+
langchain-core==0.1.42
|
167 |
# via
|
168 |
# langchain
|
169 |
# langchain-community
|
170 |
# langchain-text-splitters
|
171 |
langchain-text-splitters==0.0.1
|
172 |
# via langchain
|
173 |
+
langsmith==0.1.47
|
174 |
# via
|
175 |
# langchain
|
176 |
# langchain-community
|
|
|
221 |
# via
|
222 |
# kubernetes
|
223 |
# requests-oauthlib
|
224 |
+
onnxruntime==1.17.3
|
225 |
# via chromadb
|
226 |
opentelemetry-api==1.24.0
|
227 |
# via
|
|
|
279 |
# matplotlib
|
280 |
# onnxruntime
|
281 |
# transformers
|
282 |
+
pandas==2.2.2
|
283 |
# via
|
284 |
# -r requirements.in
|
285 |
# altair
|
|
|
296 |
# googleapis-common-protos
|
297 |
# onnxruntime
|
298 |
# opentelemetry-proto
|
299 |
+
pulsar-client==3.5.0
|
300 |
# via chromadb
|
301 |
pyasn1==0.6.0
|
302 |
# via
|
|
|
304 |
# rsa
|
305 |
pyasn1-modules==0.4.0
|
306 |
# via google-auth
|
307 |
+
pydantic==2.7.0
|
308 |
# via
|
309 |
# chromadb
|
310 |
# fastapi
|
|
|
312 |
# langchain
|
313 |
# langchain-core
|
314 |
# langsmith
|
315 |
+
pydantic-core==2.18.1
|
316 |
# via pydantic
|
317 |
pydub==0.25.1
|
318 |
# via gradio
|
|
|
376 |
# referencing
|
377 |
rsa==4.9
|
378 |
# via google-auth
|
379 |
+
ruff==0.3.7
|
380 |
# via gradio
|
381 |
safetensors==0.4.2
|
382 |
# via transformers
|
383 |
+
scikit-learn==1.4.2
|
384 |
# via sentence-transformers
|
385 |
scipy==1.13.0
|
386 |
# via
|
|
|
428 |
toolz==0.12.1
|
429 |
# via altair
|
430 |
torch==2.2.2
|
431 |
+
# via
|
432 |
+
# -r requirements.in
|
433 |
+
# sentence-transformers
|
434 |
tqdm==4.66.2
|
435 |
# via
|
436 |
# chromadb
|
|
|
438 |
# sentence-transformers
|
439 |
# transformers
|
440 |
transformers==4.39.3
|
441 |
+
# via
|
442 |
+
# -r requirements.in
|
443 |
+
# sentence-transformers
|
444 |
+
typer[all]==0.12.3
|
445 |
# via
|
446 |
# chromadb
|
447 |
# gradio
|
src/app/app.py
CHANGED
@@ -54,7 +54,7 @@ def run():
|
|
54 |
)
|
55 |
|
56 |
# Use share=True to create a public link to share. This share link expires in 72 hours.
|
57 |
-
app.launch(server_name=app_config['host'], server_port=app_config['port']
|
58 |
|
59 |
|
60 |
if __name__ == "__main__":
|
|
|
54 |
)
|
55 |
|
56 |
# Use share=True to create a public link to share. This share link expires in 72 hours.
|
57 |
+
app.launch(server_name=app_config['host'], server_port=app_config['port'])
|
58 |
|
59 |
|
60 |
if __name__ == "__main__":
|