Spaces:

Nielo47
/

RAG-Test

Running

File size: 9,727 Bytes

ef6d407

import ollama
import faiss
import os

from google import genai
from google.genai import types
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer

from utils.rag_retriever import search_with_full_query, search_with_multiple_sentences # Assuming these are the new names
from utils.prompts import icf_classifier_prompt, icf_gemini_prompt # Assuming these are the new names

# Carrega a chave de API do Gemini de variáveis de ambiente para segurança
# Certifique-se de que a variável de ambiente 'GEMINI_API_KEY' esteja definida no seu sistema.
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

if not GEMINI_API_KEY:
    print("AVISO: A variável de ambiente 'GEMINI_API_KEY' não está definida. A API do Gemini pode não funcionar.")
    # WARNING: The 'GEMINI_API_KEY' environment variable is not set. The Gemini API may not work.

def _generate_context_for_llm(
    input_phrase: str,
    documents: list,
    index: faiss.Index,
    embedder: SentenceTransformer,
    search_strategy: str = 'multiple'
) -> str:
    """
    Generates relevant context for the input phrase using the RAG system.

    This function allows choosing between two context search strategies:
    'full': Searches for the most relevant contexts for the entire question.
    'multiple': Segments the question into sentences and searches for multiple contexts, ensuring uniqueness.

    Args:
        input_phrase (str): The user's phrase or question for which context will be generated.
        documents (list): A list of strings, representing the documents/texts from which context will be retrieved.
        index (faiss.Index): The pre-built FAISS index for similarity search in document embeddings.
        embedder (SentenceTransformer): The embedding model used to convert text into vectors.
        search_strategy (str, optional): The context search strategy to be used.
                                         Can be 'full' or 'multiple'. Defaults to 'multiple'.

    Returns:
        str: A string containing the retrieved contexts, joined by newlines.
             Returns an empty string if no context is found or if the strategy is invalid.

    Raises:
        ValueError: If the provided search strategy is invalid.
    """
    # contexts_with_distance = [] # Original variable, can be removed if not used elsewhere
    retrieved_contexts_with_distance = [] # More descriptive name

    if search_strategy == 'full':
        # k=5 como padrão para contexto completo
        # k=5 as default for full context
        retrieved_contexts_with_distance = search_with_full_query(
            input_phrase, documents, index, embedder, k=5
        )
    elif search_strategy == 'multiple':
        # k_por_frase=3 para múltiplos
        # k_per_sentence=3 for multiple contexts
        retrieved_contexts_with_distance = search_with_multiple_sentences(
            input_phrase, documents, index, embedder, k_per_sentence=3
        )
    else:
        raise ValueError(
            f"Estratégia de busca de contexto inválida: '{search_strategy}'. Use 'completo' ou 'multiplo'."
            # Invalid context search strategy: '{search_strategy}'. Use 'full' or 'multiple'.
        )

    # Extrai apenas o texto dos documentos da lista de tuplas (índice, texto, distância)
    # Extracts only the text from the documents in the list of tuples (index, text, distance)
    context_texts = [text for _, text, _ in retrieved_contexts_with_distance]
    context_string = "\n".join(context_texts)
    return context_string

def generate_ollama_response(input_phrase: str, context: str) -> str:
    """
    Generates a response using the Ollama language model locally.

    Constructs a detailed prompt with the user's input phrase and the retrieved
    context to guide the model in generating an informed response about ICF.

    Args:
        input_phrase (str): The user's original phrase or question.
        context (str): A string containing the relevant context retrieved from RAG.

    Returns:
        str: The response generated by the Ollama model.

    Raises:
        ollama.ResponseError: If there is an error communicating with the Ollama server.
        Exception: For other unexpected errors during response generation.
    """
    # Prompt com instruções detalhadas sobre CIF
    # Prompt with detailed instructions about ICF
    prompt_text = icf_classifier_prompt(context, input_phrase)
    print("\n--- Prompt Gerado para Ollama ---") # Generated Prompt for Ollama
    print(prompt_text)
    print("--- Fim do Prompt Ollama ---") # End of Ollama Prompt

    try:
        # Assume 'gemma2:latest' or similar appropriate model for Ollama
        response_data = ollama.generate(model='gemma2:latest', prompt=prompt_text)
        return response_data.get('response', 'Nenhuma resposta gerada pelo Ollama.') # No response generated by Ollama.
    except ollama.ResponseError as e:
        print(f"Erro de resposta do Ollama: {e}") # Ollama response error
        return f"Desculpe, ocorreu um erro ao gerar a resposta com Ollama: {e}" # Sorry, an error occurred while generating the response with Ollama
    except Exception as e:
        print(f"Erro inesperado ao gerar resposta com Ollama: {e}") # Unexpected error generating response with Ollama
        return f"Desculpe, ocorreu um erro inesperado: {e}" # Sorry, an unexpected error occurred

def generate_gemini_response(input_phrase: str, context: str) -> str:
    """
    Generates a response using the Google Gemini API.

    Connects to the Gemini API, constructs a model-specific prompt,
    and sends the request to obtain a response based on the user's phrase and context.

    Args:
        input_phrase (str): The user's original phrase or question.
        context (str): A string containing the relevant context retrieved from RAG.

    Returns:
        str: The response generated by the Gemini model.
    """
    if not GEMINI_API_KEY:
        return "Erro: Chave de API do Gemini não configurada. Por favor, defina a variável de ambiente 'GEMINI_API_KEY'."
        # Error: Gemini API key not configured. Please set the 'GEMINI_API_KEY' environment variable.

    try:
        client = genai.Client(api_key=GEMINI_API_KEY)

        # Prompt com instruções detalhadas sobre CIF
        # Prompt with detailed instructions about ICF
        gemini_prompt_text = icf_gemini_prompt(context, input_phrase)

        print("\n--- Prompt Gerado para Gemini ---") # Generated Prompt for Gemini
        print(gemini_prompt_text)
        print("--- Fim do Prompt Gemini ---") # End of Gemini Prompt

        # Configuração da requisição para o modelo Gemini
        #model_name = "gemini-2.5-flash-preview-05-20"
        model_name = "gemini-2.0-flash-001"
        api_response = client.models.generate_content(
                model=model_name, contents=gemini_prompt_text
        )

        return api_response.text
    
    except Exception as e:
        print(f"Erro da API do Gemini: {e}. Verifique sua GEMINI_API_KEY e os detalhes do erro.")
        
        if "Authentication" in str(e) or "API key" in str(e):
            return "Erro de autenticação com a API do Gemini. Verifique sua chave de API e permissões."
        return f"Desculpe, ocorreu um erro na API do Gemini: {e}"

# Função unificada para gerar resposta, permitindo escolher o LLM
# Unified function to generate response, allowing LLM choice
def generate_response_with_llm(
    input_phrase: str,
    documents: list,
    index: faiss.Index,
    embedder: SentenceTransformer,
    llm_choice: str = 'gemini',
    rag_strategy: str = 'multiple'
) -> str:
    """
    Main function to generate a response using an LLM (Ollama or Gemini),
    based on context retrieved via RAG.

    Args:
        input_phrase (str): The user's original phrase or question.
        documents (list): A list of strings, representing the documents from which context will be retrieved.
        index (faiss.Index): The pre-built FAISS index for similarity search in embeddings.
        embedder (SentenceTransformer): The embedding model.
        llm_choice (str, optional): The LLM to be used ('ollama' or 'gemini'). Defaults to 'gemini'.
        rag_strategy (str, optional): The context search strategy ('full' or 'multiple'). Defaults to 'multiple'.

    Returns:
        str: The response generated by the LLM.
    """
    # 1. Gerar o contexto
    # 1. Generate context
    retrieved_context = ""
    try:
        retrieved_context = _generate_context_for_llm(
            input_phrase, documents, index, embedder, search_strategy=rag_strategy
        )
    except ValueError as e:
        # Retorna a mensagem de erro da estratégia inválida
        # Returns the error message for invalid strategy
        return str(e)
    except Exception as e:
        # Error retrieving context
        return f"Erro ao recuperar contexto: {e}"


    if not retrieved_context:
        return "Não foi possível encontrar contexto relevante para a sua pergunta. Por favor, reformule ou forneça mais detalhes."
        # Could not find relevant context for your question. Please rephrase or provide more details.

    # 2. Gerar a resposta usando o LLM escolhido
    # 2. Generate response using the chosen LLM
    if llm_choice.lower() == 'ollama':
        return generate_ollama_response(input_phrase, retrieved_context)
    elif llm_choice.lower() == 'gemini':
        return generate_gemini_response(input_phrase, retrieved_context)
    else:
        # Error: Invalid LLM choice ('{llm_choice}'). Valid options are 'ollama' or 'gemini'.
        return f"Erro: Escolha de LLM inválida ('{llm_choice}'). Opções válidas são 'ollama' ou 'gemini'."