import os
import json
from dotenv import load_dotenv
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import Client, create_client

# === Load environment variables ===
load_dotenv()
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_KEY")

# === Initialize Supabase and embeddings ===
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")  # dim=768

# === Load metadata.jsonl ===
def load_jsonl(file_path: str) -> list[dict]:
    with open(file_path, 'r') as f:
        return [json.loads(line) for line in f]

# === Convert JSON entries to documents with embeddings ===
def build_documents(json_data: list[dict]) -> list[dict]:
    docs = []
    for item in json_data:
        content = f"Question: {item['Question']}\n\nFinal answer: {item['Final answer']}"
        docs.append({
            "content": content,
            "metadata": {"source": item['task_id']},  # must contain 'source' key for SupabaseVectorStore
            "embedding": embeddings.embed_query(content),
        })
    return docs

# === Insert documents into Supabase ===
def insert_documents(docs: list[dict]):
    try:
        response = supabase.table("documents").insert(docs).execute()
        print("Inserted documents:", len(docs))
    except Exception as e:
        print("❌ Error inserting data into Supabase:", e)

# === (Optional) Save docs to CSV ===
def save_docs_to_csv(docs: list[dict], filename: str = "supabase_docs.csv"):
    import pandas as pd
    df = pd.DataFrame(docs)
    df.to_csv(filename, index=False)
    print(f"Saved documents to {filename}")

# === Test querying the vector store ===
def test_vector_query(query: str):
    vector_store = SupabaseVectorStore(
        client=supabase,
        embedding=embeddings,
        table_name="documents",
        query_name="match_documents",  # make sure this function exists in Supabase
    )
    retriever = vector_store.as_retriever()
    results = retriever.invoke(query)
    print("Top match:")
    print(results[0].page_content)

# === Main execution ===
def main():
    json_data = load_jsonl("metadata.jsonl")
    documents = build_documents(json_data)
    insert_documents(documents)
    # Uncomment below if you want to manually upload the data
    # save_docs_to_csv(documents)

    query = (
        "On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. "
        "This article mentions a team that produced a paper about their observations, linked at the bottom. "
        "Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?"
    )
    test_vector_query(query)

if __name__ == "__main__":
    main()