Spaces:
Sleeping
Sleeping
root
commited on
Commit
·
2c2d40a
1
Parent(s):
7df77f3
switch to docker sdk
Browse files- .gitignore +0 -2
- Dockerfile +18 -3
- README.md +3 -5
- app.py +1 -1
- setup.py +4 -0
- src/config.py +1 -0
- src/model.py +1 -1
.gitignore
CHANGED
@@ -37,6 +37,4 @@ tokenizer.model
|
|
37 |
*$py.class
|
38 |
.env
|
39 |
/embedding_model/
|
40 |
-
setup.py
|
41 |
-
embedding_setup.sh
|
42 |
src/db/*.json
|
|
|
37 |
*$py.class
|
38 |
.env
|
39 |
/embedding_model/
|
|
|
|
|
40 |
src/db/*.json
|
Dockerfile
CHANGED
@@ -15,7 +15,22 @@ RUN mkdir /code/embedding_model/
|
|
15 |
# RUN chmod +x /code/embedding_setup.sh
|
16 |
RUN python setup.py
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# RUN chmod +x /code/embedding_setup.sh
|
16 |
RUN python setup.py
|
17 |
|
18 |
+
# Set up a new user named "user" with user ID 1000
|
19 |
+
RUN useradd -m -u 1000 user
|
20 |
+
|
21 |
+
# Switch to the "user" user
|
22 |
+
USER user
|
23 |
+
# Set home to the user's home directory
|
24 |
+
ENV HOME=/home/user \
|
25 |
+
PATH=/home/user/.local/bin:$PATH
|
26 |
+
|
27 |
+
# Set the working directory to the user's home directory
|
28 |
+
WORKDIR $HOME/app
|
29 |
|
30 |
+
RUN pip install --no-cache-dir --upgrade pip
|
31 |
+
|
32 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
33 |
+
COPY --chown=user . $HOME/app
|
34 |
+
|
35 |
+
EXPOSE 7860
|
36 |
+
CMD python app.py
|
README.md
CHANGED
@@ -2,10 +2,9 @@
|
|
2 |
title: Phoenix-Byte
|
3 |
colorFrom: green
|
4 |
colorTo: indigo
|
5 |
-
sdk:
|
6 |
-
sdk_version: 3.39.0
|
7 |
-
app_file: app.py
|
8 |
pinned: false
|
|
|
9 |
---
|
10 |
|
11 |
## Introduction
|
@@ -19,5 +18,4 @@ Training data for this project was gathered from Justia using the basic requests
|
|
19 |
## Model
|
20 |
The base model is Meta’s Llama2 7B, chosen because it can be trained on an 8GB consumer GPU with quantization. The model finetuning was performed on a laptop RTX 4060 using 4bit normal float quantization and Low-Rank adapters through the Hugging Face transformers and PEFT libraries. LoRA updates were merged with the model following training completion.
|
21 |
|
22 |
-
|
23 |
-
This app runs as a gradio app inside a docker container build on Google Cloudbuild and deployed to Compute Engine on a T4 instance. The model weights themselves are stored on Google Cloud storage.
|
|
|
2 |
title: Phoenix-Byte
|
3 |
colorFrom: green
|
4 |
colorTo: indigo
|
5 |
+
sdk: docker
|
|
|
|
|
6 |
pinned: false
|
7 |
+
app_port: 7860
|
8 |
---
|
9 |
|
10 |
## Introduction
|
|
|
18 |
## Model
|
19 |
The base model is Meta’s Llama2 7B, chosen because it can be trained on an 8GB consumer GPU with quantization. The model finetuning was performed on a laptop RTX 4060 using 4bit normal float quantization and Low-Rank adapters through the Hugging Face transformers and PEFT libraries. LoRA updates were merged with the model following training completion.
|
20 |
|
21 |
+
|
|
app.py
CHANGED
@@ -33,7 +33,7 @@ def run():
|
|
33 |
btn2.click(lambda x: x, inputs=[txt], outputs=cache, queue=False).then(
|
34 |
model.inference, inputs=[cache, dropdown], outputs=txt2)
|
35 |
|
36 |
-
demo.queue().launch(share=False)
|
37 |
|
38 |
if __name__=='__main__':
|
39 |
run()
|
|
|
33 |
btn2.click(lambda x: x, inputs=[txt], outputs=cache, queue=False).then(
|
34 |
model.inference, inputs=[cache, dropdown], outputs=txt2)
|
35 |
|
36 |
+
demo.queue().launch(share=False, server_name="0.0.0.0")
|
37 |
|
38 |
if __name__=='__main__':
|
39 |
run()
|
setup.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
|
3 |
+
embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
|
4 |
+
embedding_model.save('/embedding_model/')
|
src/config.py
CHANGED
@@ -47,6 +47,7 @@ headers = {
|
|
47 |
"Content-Type": "application/json"
|
48 |
}
|
49 |
|
|
|
50 |
streaming_url = "https://api.runpod.ai/v2/o4tke61qpopsz0/stream/"
|
51 |
job_url = "https://api.runpod.ai/v2/o4tke61qpopsz0/run"
|
52 |
|
|
|
47 |
"Content-Type": "application/json"
|
48 |
}
|
49 |
|
50 |
+
embedding_path = os.environ.get('EMBEDDING_PATH')
|
51 |
streaming_url = "https://api.runpod.ai/v2/o4tke61qpopsz0/stream/"
|
52 |
job_url = "https://api.runpod.ai/v2/o4tke61qpopsz0/run"
|
53 |
|
src/model.py
CHANGED
@@ -15,7 +15,7 @@ class Model:
|
|
15 |
max_new_tokens:int=max_new_tokens):
|
16 |
self.max_new_tokens = max_new_tokens
|
17 |
# self.embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
|
18 |
-
self.embedding_model = SentenceTransformer(
|
19 |
|
20 |
|
21 |
def inference(self, query:str, table:str):
|
|
|
15 |
max_new_tokens:int=max_new_tokens):
|
16 |
self.max_new_tokens = max_new_tokens
|
17 |
# self.embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
|
18 |
+
self.embedding_model = SentenceTransformer("/embedding_model/")
|
19 |
|
20 |
|
21 |
def inference(self, query:str, table:str):
|