|
import ollama |
|
import faiss |
|
import os |
|
|
|
from google import genai |
|
from google.genai import types |
|
from dotenv import load_dotenv |
|
from sentence_transformers import SentenceTransformer |
|
|
|
from utils.rag_retriever import search_with_full_query, search_with_multiple_sentences |
|
from utils.prompts import icf_classifier_prompt, icf_gemini_prompt |
|
|
|
|
|
|
|
load_dotenv() |
|
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') |
|
|
|
if not GEMINI_API_KEY: |
|
print("AVISO: A variável de ambiente 'GEMINI_API_KEY' não está definida. A API do Gemini pode não funcionar.") |
|
|
|
|
|
def _generate_context_for_llm( |
|
input_phrase: str, |
|
documents: list, |
|
index: faiss.Index, |
|
embedder: SentenceTransformer, |
|
search_strategy: str = 'multiple' |
|
) -> str: |
|
""" |
|
Generates relevant context for the input phrase using the RAG system. |
|
|
|
This function allows choosing between two context search strategies: |
|
'full': Searches for the most relevant contexts for the entire question. |
|
'multiple': Segments the question into sentences and searches for multiple contexts, ensuring uniqueness. |
|
|
|
Args: |
|
input_phrase (str): The user's phrase or question for which context will be generated. |
|
documents (list): A list of strings, representing the documents/texts from which context will be retrieved. |
|
index (faiss.Index): The pre-built FAISS index for similarity search in document embeddings. |
|
embedder (SentenceTransformer): The embedding model used to convert text into vectors. |
|
search_strategy (str, optional): The context search strategy to be used. |
|
Can be 'full' or 'multiple'. Defaults to 'multiple'. |
|
|
|
Returns: |
|
str: A string containing the retrieved contexts, joined by newlines. |
|
Returns an empty string if no context is found or if the strategy is invalid. |
|
|
|
Raises: |
|
ValueError: If the provided search strategy is invalid. |
|
""" |
|
|
|
retrieved_contexts_with_distance = [] |
|
|
|
if search_strategy == 'full': |
|
|
|
|
|
retrieved_contexts_with_distance = search_with_full_query( |
|
input_phrase, documents, index, embedder, k=5 |
|
) |
|
elif search_strategy == 'multiple': |
|
|
|
|
|
retrieved_contexts_with_distance = search_with_multiple_sentences( |
|
input_phrase, documents, index, embedder, k_per_sentence=3 |
|
) |
|
else: |
|
raise ValueError( |
|
f"Estratégia de busca de contexto inválida: '{search_strategy}'. Use 'completo' ou 'multiplo'." |
|
|
|
) |
|
|
|
|
|
|
|
context_texts = [text for _, text, _ in retrieved_contexts_with_distance] |
|
context_string = "\n".join(context_texts) |
|
return context_string |
|
|
|
def generate_ollama_response(input_phrase: str, context: str) -> str: |
|
""" |
|
Generates a response using the Ollama language model locally. |
|
|
|
Constructs a detailed prompt with the user's input phrase and the retrieved |
|
context to guide the model in generating an informed response about ICF. |
|
|
|
Args: |
|
input_phrase (str): The user's original phrase or question. |
|
context (str): A string containing the relevant context retrieved from RAG. |
|
|
|
Returns: |
|
str: The response generated by the Ollama model. |
|
|
|
Raises: |
|
ollama.ResponseError: If there is an error communicating with the Ollama server. |
|
Exception: For other unexpected errors during response generation. |
|
""" |
|
|
|
|
|
prompt_text = icf_classifier_prompt(context, input_phrase) |
|
print("\n--- Prompt Gerado para Ollama ---") |
|
print(prompt_text) |
|
print("--- Fim do Prompt Ollama ---") |
|
|
|
try: |
|
|
|
response_data = ollama.generate(model='gemma2:latest', prompt=prompt_text) |
|
return response_data.get('response', 'Nenhuma resposta gerada pelo Ollama.') |
|
except ollama.ResponseError as e: |
|
print(f"Erro de resposta do Ollama: {e}") |
|
return f"Desculpe, ocorreu um erro ao gerar a resposta com Ollama: {e}" |
|
except Exception as e: |
|
print(f"Erro inesperado ao gerar resposta com Ollama: {e}") |
|
return f"Desculpe, ocorreu um erro inesperado: {e}" |
|
|
|
def generate_gemini_response(input_phrase: str, context: str) -> str: |
|
""" |
|
Generates a response using the Google Gemini API. |
|
|
|
Connects to the Gemini API, constructs a model-specific prompt, |
|
and sends the request to obtain a response based on the user's phrase and context. |
|
|
|
Args: |
|
input_phrase (str): The user's original phrase or question. |
|
context (str): A string containing the relevant context retrieved from RAG. |
|
|
|
Returns: |
|
str: The response generated by the Gemini model. |
|
""" |
|
if not GEMINI_API_KEY: |
|
return "Erro: Chave de API do Gemini não configurada. Por favor, defina a variável de ambiente 'GEMINI_API_KEY'." |
|
|
|
|
|
try: |
|
client = genai.Client(api_key=GEMINI_API_KEY) |
|
|
|
|
|
|
|
gemini_prompt_text = icf_gemini_prompt(context, input_phrase) |
|
|
|
print("\n--- Prompt Gerado para Gemini ---") |
|
print(gemini_prompt_text) |
|
print("--- Fim do Prompt Gemini ---") |
|
|
|
|
|
|
|
model_name = "gemini-2.0-flash-001" |
|
api_response = client.models.generate_content( |
|
model=model_name, contents=gemini_prompt_text |
|
) |
|
|
|
return api_response.text |
|
|
|
except Exception as e: |
|
print(f"Erro da API do Gemini: {e}. Verifique sua GEMINI_API_KEY e os detalhes do erro.") |
|
|
|
if "Authentication" in str(e) or "API key" in str(e): |
|
return "Erro de autenticação com a API do Gemini. Verifique sua chave de API e permissões." |
|
return f"Desculpe, ocorreu um erro na API do Gemini: {e}" |
|
|
|
|
|
|
|
def generate_response_with_llm( |
|
input_phrase: str, |
|
documents: list, |
|
index: faiss.Index, |
|
embedder: SentenceTransformer, |
|
llm_choice: str = 'gemini', |
|
rag_strategy: str = 'multiple' |
|
) -> str: |
|
""" |
|
Main function to generate a response using an LLM (Ollama or Gemini), |
|
based on context retrieved via RAG. |
|
|
|
Args: |
|
input_phrase (str): The user's original phrase or question. |
|
documents (list): A list of strings, representing the documents from which context will be retrieved. |
|
index (faiss.Index): The pre-built FAISS index for similarity search in embeddings. |
|
embedder (SentenceTransformer): The embedding model. |
|
llm_choice (str, optional): The LLM to be used ('ollama' or 'gemini'). Defaults to 'gemini'. |
|
rag_strategy (str, optional): The context search strategy ('full' or 'multiple'). Defaults to 'multiple'. |
|
|
|
Returns: |
|
str: The response generated by the LLM. |
|
""" |
|
|
|
|
|
retrieved_context = "" |
|
try: |
|
retrieved_context = _generate_context_for_llm( |
|
input_phrase, documents, index, embedder, search_strategy=rag_strategy |
|
) |
|
except ValueError as e: |
|
|
|
|
|
return str(e) |
|
except Exception as e: |
|
|
|
return f"Erro ao recuperar contexto: {e}" |
|
|
|
|
|
if not retrieved_context: |
|
return "Não foi possível encontrar contexto relevante para a sua pergunta. Por favor, reformule ou forneça mais detalhes." |
|
|
|
|
|
|
|
|
|
if llm_choice.lower() == 'ollama': |
|
return generate_ollama_response(input_phrase, retrieved_context) |
|
elif llm_choice.lower() == 'gemini': |
|
return generate_gemini_response(input_phrase, retrieved_context) |
|
else: |
|
|
|
return f"Erro: Escolha de LLM inválida ('{llm_choice}'). Opções válidas são 'ollama' ou 'gemini'." |
|
|