import weaviate import streamlit as st from weaviate.embedded import EmbeddedOptions from weaviate import Client import pandas as pd # <-- Add this import from io import StringIO # <-- Add this import import pandas as pd def hybrid_search_weaviate(client, selected_class, query): """ Perform a hybrid search on Weaviate using the provided class and query. Return the results as a list of dictionaries. """ # Perform the hybrid search results = client.data_object.get_by_search( className=selected_class, query=query, filters=None, # No additional filters for now limit=100 # Limit to 100 results for now ) return results def convert_to_tapas_format(data): """ Convert the list of dictionaries (from Weaviate) into the format TAPAS expects. Return the table as a list of lists. """ df = pd.DataFrame(data) table = [df.columns.tolist()] + df.values.tolist() return table def initialize_weaviate_client(): return weaviate.Client(embedded_options=EmbeddedOptions()) def class_exists(client, class_name): try: client.schema.get_class(class_name) return True except: return False def map_dtype_to_weaviate(dtype): if "int" in str(dtype): return "int" elif "float" in str(dtype): return "number" elif "bool" in str(dtype): return "boolean" else: return "string" def create_new_class_schema(client, class_name, class_description): class_schema = { "class": class_name, "description": class_description, "properties": [] } try: client.schema.create({"classes": [class_schema]}) st.success(f"Class {class_name} created successfully!") except Exception as e: st.error(f"Error creating class: {e}") def ingest_data_to_weaviate(client, csv_file, selected_class): # Read the CSV data data = csv_file.read().decode("utf-8") dataframe = pd.read_csv(StringIO(data)) # Fetch the schema for the selected class class_schema = get_class_schema(client, selected_class) # If the schema is empty, create it based on the CSV columns if not class_schema or not class_schema["properties"]: for column_name, data_type in zip(dataframe.columns, dataframe.dtypes): property_schema = { "name": column_name, "description": f"Property for {column_name}", "dataType": [map_dtype_to_weaviate(data_type)] } try: client.schema.property.create(selected_class, property_schema) except weaviate.exceptions.SchemaValidationException: # Property might already exist, so we can continue pass else: # If the schema is not empty, compare it with the CSV columns schema_columns = [prop["name"] for prop in class_schema["properties"]] if set(dataframe.columns) != set(schema_columns): st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.") return # Ingest the data into Weaviate data = dataframe.to_dict(orient="records") client.data_object.create(data, selected_class) # Display a preview of the ingested data st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'") st.write(dataframe.head()) # Display the first few rows of the dataframe as a preview def get_class_schema(client, class_name): try: schema = client.schema.get() for cls in schema["classes"]: if cls["class"] == class_name: return cls return None except weaviate.exceptions.SchemaValidationException: return None