Instantaneous1 commited on
Commit
3187e25
1 Parent(s): faf8b3f

pretrained embedding azure blobs

Browse files
Files changed (3) hide show
  1. README.md +10 -0
  2. app.py +33 -0
  3. requirements.txt +2 -1
README.md CHANGED
@@ -48,10 +48,20 @@ pip install -r requirements.txt
48
 
49
  3. Run the Streamlit app:
50
 
 
 
51
  ```bash
52
  streamlit run app.py
53
  ```
54
 
 
 
 
 
 
 
 
 
55
  4. Access the app in your web browser (usually at http://localhost:8501).
56
 
57
  ## Technology Stack
 
48
 
49
  3. Run the Streamlit app:
50
 
51
+ for quickly dl embeddings and skipp training
52
+
53
  ```bash
54
  streamlit run app.py
55
  ```
56
 
57
+ or
58
+
59
+ to rebuild embeddings
60
+
61
+ ```bash
62
+ streamlit run app.py -- --dev
63
+ ```
64
+
65
  4. Access the app in your web browser (usually at http://localhost:8501).
66
 
67
  ## Technology Stack
app.py CHANGED
@@ -10,6 +10,8 @@ from PIL import ImageFile
10
  from slugify import slugify
11
  import opendatasets as od
12
  import json
 
 
13
 
14
  ImageFile.LOAD_TRUNCATED_IMAGES = True
15
  FOLDER = "images/"
@@ -17,6 +19,32 @@ NUM_TREES = 100
17
  FEATURES = 1000
18
  FILETYPES = [".png", ".jpg", ".jpeg", ".tiff", ".bmp"]
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @st.cache_resource
22
  def load_dataset():
@@ -168,6 +196,11 @@ if __name__ == "__main__":
168
 
169
  try:
170
  load_dataset()
 
 
 
 
 
171
  save_embedding(FOLDER)
172
 
173
  # File uploader
 
10
  from slugify import slugify
11
  import opendatasets as od
12
  import json
13
+ import argparse
14
+
15
 
16
  ImageFile.LOAD_TRUNCATED_IMAGES = True
17
  FOLDER = "images/"
 
19
  FEATURES = 1000
20
  FILETYPES = [".png", ".jpg", ".jpeg", ".tiff", ".bmp"]
21
 
22
+ from azure.storage.blob import BlobServiceClient
23
+
24
+
25
+ @st.cache_resource
26
+ def dl_embeddings():
27
+ """dl pretrained embeddings in production environment instead of creating"""
28
+ # Connect to your Blob Storage account
29
+ connect_str = st.secrets["connectionstring"]
30
+ blob_service_client = BlobServiceClient.from_connection_string(connect_str)
31
+
32
+ # Specify container and blob names
33
+ container_name = "imagessearch"
34
+ blob_name = f"{slugify(FOLDER)}.tree"
35
+
36
+ # Get a reference to the blob
37
+ blob_client = blob_service_client.get_blob_client(
38
+ container=container_name, blob=blob_name
39
+ )
40
+
41
+ # Download the binary data
42
+ download_file_path = f"{slugify(FOLDER)}.tree" # Path to save the downloaded file
43
+ with open(download_file_path, "wb") as download_file:
44
+ download_file.write(blob_client.download_blob().readall())
45
+
46
+ print(f"File downloaded to: {download_file_path}")
47
+
48
 
49
  @st.cache_resource
50
  def load_dataset():
 
196
 
197
  try:
198
  load_dataset()
199
+ # download dev embeddings if not developement environment
200
+ ap = argparse.ArgumentParser()
201
+ ap.add_argument("--dev", action="store_true")
202
+ if not ap.parse_args().dev:
203
+ dl_embeddings()
204
  save_embedding(FOLDER)
205
 
206
  # File uploader
requirements.txt CHANGED
@@ -4,4 +4,5 @@ torchvision
4
  streamlit
5
  tqdm
6
  python-slugify
7
- opendatasets
 
 
4
  streamlit
5
  tqdm
6
  python-slugify
7
+ opendatasets
8
+ azure-storage-blob