import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import gradio as gr
from huggingface_hub import upload_file

# 1. Charger et Prétraiter les données
def preprocess_data(file_path):
    # Charger les données à partir du fichier Excel
    data = pd.read_excel(file_path)
    
    # Créer une colonne combinée pour effectuer la recherche
    data['combined'] = data.apply(
        lambda row: f"Product: {row['Product_name']} | Category: {row['Product_Category']} | Price: {row['price']} | Location: {row['Region_address']} - {row['Local_address']}",
        axis=1
    )
    return data

# 2. Générer les Embeddings et Sauvegarder
def generate_embeddings(data, model_name="all-MiniLM-L6-v2", embedding_path="embeddings.npy"):
    model = SentenceTransformer(model_name)
    embeddings = model.encode(data['combined'].tolist())
    np.save(embedding_path, embeddings)  # Sauvegarde des embeddings
    return embeddings, model

# 3. Créer et Sauvegarder FAISS
def create_and_save_faiss_index(embeddings, index_path="faiss_index.bin"):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    faiss.write_index(index, index_path)  # Sauvegarde de l'index FAISS
    return index

# 4. Charger les Embeddings et FAISS
def load_embeddings_and_index(embedding_path="embeddings.npy", index_path="faiss_index.bin"):
    embeddings = np.load(embedding_path)
    index = faiss.read_index(index_path)
    return embeddings, index

# 5. Sauvegarder sur HuggingFace
def upload_to_huggingface(local_path, repo_id, token):
    upload_file(
        path_or_fileobj=local_path,
        path_in_repo=local_path.split("/")[-1],
        repo_id=repo_id,
        repo_type="space",
        token=token
    )
    print(f"Uploaded {local_path} to HuggingFace repo: {repo_id}")

# 6. Rechercher dans FAISS
def query_faiss_index(query, model, index, data, top_k=5):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    results = [data['combined'].iloc[idx] for idx in indices[0]]
    return results

# 7. Interface Gradio (fonction d'exécution principale)
def main(file_path, repo_id, hf_token):
    # Prétraitement des données
    data = preprocess_data(file_path)
    
    # Vérification des fichiers d'embeddings et d'index FAISS
    try:
        embeddings, index = load_embeddings_and_index()
        print("Embeddings et index FAISS chargés.")
    except FileNotFoundError:
        print("Génération des embeddings et index FAISS...")
        embeddings, model = generate_embeddings(data)
        index = create_and_save_faiss_index(embeddings)
        
        # Sauvegarder localement et sur Hugging Face
        upload_to_huggingface("embeddings.npy", repo_id, hf_token)
        upload_to_huggingface("faiss_index.bin", repo_id, hf_token)
        print("Embeddings et index FAISS sauvegardés.")
    
    # Interface Gradio pour effectuer des requêtes
    def gradio_search(query):
        results = query_faiss_index(query, model, index, data)
        return results
    
    # Créer l'interface Gradio
    interface = gr.Interface(fn=gradio_search, inputs="text", outputs="text")
    interface.launch()

# Exemple d'utilisation
if __name__ == "__main__":
    # Paramètres de Hugging Face
    file_path = "avito.xls"  # Votre fichier de données
    repo_id = "halimbahae/EDA-INPT-Workshop"  # ID de votre dépôt Hugging Face
    
    # Appel à la fonction principale
    main(file_path, repo_id, hf_token)