Spaces:

quasara-io
/

Semantic-Search-Frontend

Sleeping

App Files Files Community

inie2003 commited on Oct 20, 2024

Commit

a3902aa

verified ·

1 Parent(s): 25475ff

Added small object search

Browse files

Files changed (1) hide show

app.py +51 -21

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from helper import load_hf_datasets, search, get_file_paths, get_images_from_s3_to_display
 import os
 import time
@@ -7,8 +7,11 @@ import time
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 # Predefined list of datasets
-datasets = ["WayveScenes","MajorTom-Europe"]  # Example dataset names
 # AWS S3 bucket name
 bucket_name = "datasets-quasara-io"
@@ -18,8 +21,12 @@ def main():
     # Select dataset from dropdown
     dataset_name = st.selectbox("Select Dataset", datasets)
-    folder_path = f'{dataset_name}/'
     # Progress bar for loading dataset
     loading_text = st.empty()  # Placeholder for dynamic text
     loading_text.text("Loading dataset...")
@@ -31,7 +38,7 @@ def main():
         progress_bar.progress(i + 25)
     # Load the selected dataset
-    df = load_hf_datasets(dataset_name)
     # Complete progress when loading is done
     progress_bar.progress(100)
@@ -42,7 +49,13 @@ def main():
     # Number of results to display
     limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)
     # Search button
     if st.button("Search"):
         # Validate input
@@ -59,22 +72,39 @@ def main():
                 time.sleep(0.3)  # Simulate work being done
                 search_progress_bar.progress(i + 25)
-            # Perform the search
-            results = search(query, df, limit, 0, "cosine", search_in_images=True, search_in_small_objects=False)
-            # Complete the search progress
-            search_progress_bar.progress(100)
-            search_loading_text.text("Search completed!")
-            # Get the S3 file paths of the top results
-            top_k_paths = get_file_paths(df, results)
-            # Display images from S3
-            if top_k_paths:
-                st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
-                get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
             else:
-                st.write("No results found.")
 if __name__ == "__main__":
-    main()

 import streamlit as st
+from helper import load_dataset, parallel_load_and_combine,search, get_file_paths, get_cordinates, get_images_from_s3_to_display, get_images_with_bounding_boxes_from_s3,  batch_search
 import os
 import time
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 # Predefined list of datasets
+datasets = ["StopSign_test","WayveScenes","MajorTom-Europe"]  # Example dataset names
+description = {
+        "StopSign_test" : "A test dataset for me",
+        "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from autonomous vehicles to advance AI perception and navigation in complex environments.",
+        "MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."}
 # AWS S3 bucket name
 bucket_name = "datasets-quasara-io"
     # Select dataset from dropdown
     dataset_name = st.selectbox("Select Dataset", datasets)
+    if dataset_name == 'StopSign_test':
+        folder_path = ""
+    else:
+        folder_path = f'{dataset_name}/'
+    st.caption(description[dataset_name]) #trial area
     # Progress bar for loading dataset
     loading_text = st.empty()  # Placeholder for dynamic text
     loading_text.text("Loading dataset...")
         progress_bar.progress(i + 25)
     # Load the selected dataset
+    dataset = load_dataset(f"quasara-io/{dataset_name}")
     # Complete progress when loading is done
     progress_bar.progress(100)
     # Number of results to display
     limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)
+    if st.checkbox("Enable Small Object Search"):
+        search_in_small_objects = True
+        st.text("Small Object Search Enabled")
+    else:
+        search_in_small_objects = False
+        st.text("Small Object Search Disabled")
     # Search button
     if st.button("Search"):
         # Validate input
                 time.sleep(0.3)  # Simulate work being done
                 search_progress_bar.progress(i + 25)
+            #Get Dataset Keys to speed up processing/search
+            dataset_keys = dataset.keys()
+            main_df,split_df = parallel_load_and_combine(dataset_keys,dataset)
+            #Small Search
+            if search_in_small_objects:
+                # Perform the search
+                results = batch_search(query, split_df)
+                top_k_paths = get_file_paths(split_df,results)
+                top_k_cordinates = get_cordinates(split_df, results)
+                # Complete the search progress
+                search_progress_bar.progress(100)
+                search_loading_text.text("Search completed!")
+                #Load Images with Bounding boxes
+                if top_k_paths and top_k_cordinates:
+                    get_images_with_bounding_boxes_from_s3(bucket_name,top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
+                else:
+                    st.write("No results found.")
             else:
+            #Normal Search
+                results = batch_search(query, main_df)
+                top_k_paths = get_file_paths(main_df, results)
+                # Complete the search progress
+                search_progress_bar.progress(100)
+                search_loading_text.text("Search completed!")
+                #Load Images
+                # Display images from S3
+                if top_k_paths:
+                    st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
+                    get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
+                else:
+                    st.write("No results found.")
 if __name__ == "__main__":
+    main()