Spaces:
Running
Running
seanpedrickcase
commited on
Commit
β’
99d13a7
1
Parent(s):
043d5fc
Rearranged interface to focus on API. Optimised Dockerfile.
Browse files- Dockerfile +9 -5
- README.md +1 -1
- app.py +13 -11
- requirements.txt +5 -5
- requirements_aws.txt +11 -0
- tools/constants.py +1 -1
Dockerfile
CHANGED
@@ -1,16 +1,18 @@
|
|
1 |
FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
|
2 |
|
3 |
# Install Lambda web adapter in case you want to run with with an AWS Lamba function URL
|
4 |
-
COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
|
5 |
|
|
|
|
|
6 |
|
7 |
WORKDIR /src
|
8 |
|
9 |
COPY requirements.txt .
|
10 |
|
11 |
-
RUN pip install --no-cache-dir -
|
12 |
-
|
13 |
-
|
14 |
|
15 |
# Set up a new user named "user" with user ID 1000
|
16 |
RUN useradd -m -u 1000 user
|
@@ -19,7 +21,8 @@ RUN useradd -m -u 1000 user
|
|
19 |
RUN chown -R user:user /home/user
|
20 |
|
21 |
# Make output folder
|
22 |
-
RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output
|
|
|
23 |
|
24 |
# Switch to the "user" user
|
25 |
USER user
|
@@ -29,6 +32,7 @@ ENV HOME=/home/user \
|
|
29 |
PATH=/home/user/.local/bin:$PATH \
|
30 |
PYTHONPATH=$HOME/app \
|
31 |
PYTHONUNBUFFERED=1 \
|
|
|
32 |
GRADIO_ALLOW_FLAGGING=never \
|
33 |
GRADIO_NUM_PORTS=1 \
|
34 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
|
|
1 |
FROM public.ecr.aws/docker/library/python:3.11.9-slim-bookworm
|
2 |
|
3 |
# Install Lambda web adapter in case you want to run with with an AWS Lamba function URL
|
4 |
+
#COPY --from=public.ecr.aws/awsguru/aws-lambda-adapter:0.8.3 /lambda-adapter /opt/extensions/lambda-adapter
|
5 |
|
6 |
+
# Update apt
|
7 |
+
RUN apt-get update && rm -rf /var/lib/apt/lists/*
|
8 |
|
9 |
WORKDIR /src
|
10 |
|
11 |
COPY requirements.txt .
|
12 |
|
13 |
+
RUN pip install --no-cache-dir torch==2.4.0+cpu --index-url https://download.pytorch.org/whl/cpu && \
|
14 |
+
pip install --no-cache-dir -r requirements_aws.txt && \
|
15 |
+
pip install --no-cache-dir gradio==4.42.0
|
16 |
|
17 |
# Set up a new user named "user" with user ID 1000
|
18 |
RUN useradd -m -u 1000 user
|
|
|
21 |
RUN chown -R user:user /home/user
|
22 |
|
23 |
# Make output folder
|
24 |
+
RUN mkdir -p /home/user/app/output && chown -R user:user /home/user/app/output && \
|
25 |
+
mkdir -p /home/user/app/output/api && chown -R user:user /home/user/app/output/api
|
26 |
|
27 |
# Switch to the "user" user
|
28 |
USER user
|
|
|
32 |
PATH=/home/user/.local/bin:$PATH \
|
33 |
PYTHONPATH=$HOME/app \
|
34 |
PYTHONUNBUFFERED=1 \
|
35 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
36 |
GRADIO_ALLOW_FLAGGING=never \
|
37 |
GRADIO_NUM_PORTS=1 \
|
38 |
GRADIO_SERVER_NAME=0.0.0.0 \
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
|
|
4 |
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.42.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
app.py
CHANGED
@@ -47,14 +47,15 @@ with block:
|
|
47 |
""")
|
48 |
|
49 |
with gr.Tab("Match addresses"):
|
|
|
|
|
|
|
50 |
|
51 |
-
with gr.Accordion("I have multiple addresses", open =
|
52 |
in_file = gr.File(label="Input addresses from file", file_count= "multiple")
|
53 |
-
in_colnames = gr.Dropdown(choices=[], multiselect=True, label="Select columns that make up the address. Make sure postcode is at the end")
|
54 |
-
in_existing = gr.Dropdown(choices=[], multiselect=False, label="Select columns that indicate existing matches.")
|
55 |
|
56 |
-
with gr.Accordion("I only have a single address", open = False):
|
57 |
-
in_text = gr.Textbox(label="Input a single address as text")
|
58 |
|
59 |
gr.Markdown(
|
60 |
"""
|
@@ -62,18 +63,19 @@ with block:
|
|
62 |
Upload a reference file to match against, or alternatively call the Addressbase API (requires API key). Fuzzy matching will work on any address format, but the neural network will only work with the LLPG LPI format, e.g. with columns SaoText, SaoStartNumber etc.. This joins on the UPRN column. If any of these are different for you,
|
63 |
open 'Custom reference file format or join columns' below.
|
64 |
""")
|
65 |
-
|
66 |
-
in_ref = gr.File(label="Input reference addresses from file", file_count= "multiple")
|
67 |
|
68 |
-
with gr.Accordion("Use Addressbase API instead of reference file", open =
|
69 |
in_api = gr.Dropdown(label="Choose API type", multiselect=False, value=None, choices=["Postcode"])#["Postcode", "UPRN"]) #choices=["Address", "Postcode", "UPRN"])
|
70 |
in_api_key = gr.Textbox(label="Addressbase API key", type='password')
|
|
|
|
|
|
|
71 |
|
72 |
with gr.Accordion("Custom reference file format or join columns (i.e. not LLPG LPI format)", open = False):
|
73 |
-
in_refcol = gr.Dropdown(choices=[], multiselect=True, label="Select columns that make up the reference address. Make sure postcode is at the end")
|
74 |
-
in_joincol = gr.Dropdown(choices=[], multiselect=True, label="Select columns you want to join on to the search dataset")
|
75 |
|
76 |
-
match_btn = gr.Button("Match addresses")
|
77 |
|
78 |
with gr.Row():
|
79 |
output_summary = gr.Textbox(label="Output summary")
|
|
|
47 |
""")
|
48 |
|
49 |
with gr.Tab("Match addresses"):
|
50 |
+
|
51 |
+
with gr.Accordion("Quick check - single address", open = True):
|
52 |
+
in_text = gr.Textbox(label="Input a single address as text")
|
53 |
|
54 |
+
with gr.Accordion("I have multiple addresses", open = False):
|
55 |
in_file = gr.File(label="Input addresses from file", file_count= "multiple")
|
56 |
+
in_colnames = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns that make up the address. Make sure postcode is at the end")
|
57 |
+
in_existing = gr.Dropdown(value=[], choices=[], multiselect=False, label="Select columns that indicate existing matches.")
|
58 |
|
|
|
|
|
59 |
|
60 |
gr.Markdown(
|
61 |
"""
|
|
|
63 |
Upload a reference file to match against, or alternatively call the Addressbase API (requires API key). Fuzzy matching will work on any address format, but the neural network will only work with the LLPG LPI format, e.g. with columns SaoText, SaoStartNumber etc.. This joins on the UPRN column. If any of these are different for you,
|
64 |
open 'Custom reference file format or join columns' below.
|
65 |
""")
|
|
|
|
|
66 |
|
67 |
+
with gr.Accordion("Use Addressbase API (instead of reference file)", open = True):
|
68 |
in_api = gr.Dropdown(label="Choose API type", multiselect=False, value=None, choices=["Postcode"])#["Postcode", "UPRN"]) #choices=["Address", "Postcode", "UPRN"])
|
69 |
in_api_key = gr.Textbox(label="Addressbase API key", type='password')
|
70 |
+
|
71 |
+
with gr.Accordion("Match against reference file of addresses", open = False):
|
72 |
+
in_ref = gr.File(label="Input reference addresses from file", file_count= "multiple")
|
73 |
|
74 |
with gr.Accordion("Custom reference file format or join columns (i.e. not LLPG LPI format)", open = False):
|
75 |
+
in_refcol = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns that make up the reference address. Make sure postcode is at the end")
|
76 |
+
in_joincol = gr.Dropdown(value=[], choices=[], multiselect=True, label="Select columns you want to join on to the search dataset")
|
77 |
|
78 |
+
match_btn = gr.Button("Match addresses", variant="primary")
|
79 |
|
80 |
with gr.Row():
|
81 |
output_summary = gr.Textbox(label="Output summary")
|
requirements.txt
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
-
|
2 |
pandas==2.2.2
|
3 |
rapidfuzz==3.8.1
|
4 |
-
torch==2.2.1
|
5 |
recordlinkage==0.16
|
6 |
pyap==0.3.1
|
7 |
pytest==7.4.3
|
8 |
pyarrow==14.0.1
|
9 |
openpyxl==3.1.2
|
10 |
-
gradio==4.
|
11 |
-
boto3==1.34.
|
12 |
-
polars==0.20.19
|
|
|
|
1 |
+
torch==2.4.0
|
2 |
pandas==2.2.2
|
3 |
rapidfuzz==3.8.1
|
|
|
4 |
recordlinkage==0.16
|
5 |
pyap==0.3.1
|
6 |
pytest==7.4.3
|
7 |
pyarrow==14.0.1
|
8 |
openpyxl==3.1.2
|
9 |
+
gradio==4.42.0
|
10 |
+
boto3==1.34.158
|
11 |
+
polars==0.20.19
|
12 |
+
numpy==1.26.4
|
requirements_aws.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas==2.2.2
|
2 |
+
rapidfuzz==3.8.1
|
3 |
+
recordlinkage==0.16
|
4 |
+
pyap==0.3.1
|
5 |
+
pytest==7.4.3
|
6 |
+
pyarrow==14.0.1
|
7 |
+
openpyxl==3.1.2
|
8 |
+
gradio==4.42.0
|
9 |
+
boto3==1.34.158
|
10 |
+
polars==0.20.19
|
11 |
+
numpy==1.26.4
|
tools/constants.py
CHANGED
@@ -197,7 +197,7 @@ if os.path.exists(model_path):
|
|
197 |
|
198 |
out_model_path = os.path.join(out_model_dir, out_model_file_name)
|
199 |
print("Model location: ", out_model_path)
|
200 |
-
exported_model.load_state_dict(torch.load(out_model_path, map_location=torch.device('cpu')))
|
201 |
exported_model.eval()
|
202 |
|
203 |
device='cpu'
|
|
|
197 |
|
198 |
out_model_path = os.path.join(out_model_dir, out_model_file_name)
|
199 |
print("Model location: ", out_model_path)
|
200 |
+
exported_model.load_state_dict(torch.load(out_model_path, map_location=torch.device('cpu'), weights_only=False))
|
201 |
exported_model.eval()
|
202 |
|
203 |
device='cpu'
|