File size: 9,727 Bytes
ef6d407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import ollama
import faiss
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from utils.rag_retriever import search_with_full_query, search_with_multiple_sentences # Assuming these are the new names
from utils.prompts import icf_classifier_prompt, icf_gemini_prompt # Assuming these are the new names
# Carrega a chave de API do Gemini de variáveis de ambiente para segurança
# Certifique-se de que a variável de ambiente 'GEMINI_API_KEY' esteja definida no seu sistema.
load_dotenv()
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
if not GEMINI_API_KEY:
print("AVISO: A variável de ambiente 'GEMINI_API_KEY' não está definida. A API do Gemini pode não funcionar.")
# WARNING: The 'GEMINI_API_KEY' environment variable is not set. The Gemini API may not work.
def _generate_context_for_llm(
input_phrase: str,
documents: list,
index: faiss.Index,
embedder: SentenceTransformer,
search_strategy: str = 'multiple'
) -> str:
"""
Generates relevant context for the input phrase using the RAG system.
This function allows choosing between two context search strategies:
'full': Searches for the most relevant contexts for the entire question.
'multiple': Segments the question into sentences and searches for multiple contexts, ensuring uniqueness.
Args:
input_phrase (str): The user's phrase or question for which context will be generated.
documents (list): A list of strings, representing the documents/texts from which context will be retrieved.
index (faiss.Index): The pre-built FAISS index for similarity search in document embeddings.
embedder (SentenceTransformer): The embedding model used to convert text into vectors.
search_strategy (str, optional): The context search strategy to be used.
Can be 'full' or 'multiple'. Defaults to 'multiple'.
Returns:
str: A string containing the retrieved contexts, joined by newlines.
Returns an empty string if no context is found or if the strategy is invalid.
Raises:
ValueError: If the provided search strategy is invalid.
"""
# contexts_with_distance = [] # Original variable, can be removed if not used elsewhere
retrieved_contexts_with_distance = [] # More descriptive name
if search_strategy == 'full':
# k=5 como padrão para contexto completo
# k=5 as default for full context
retrieved_contexts_with_distance = search_with_full_query(
input_phrase, documents, index, embedder, k=5
)
elif search_strategy == 'multiple':
# k_por_frase=3 para múltiplos
# k_per_sentence=3 for multiple contexts
retrieved_contexts_with_distance = search_with_multiple_sentences(
input_phrase, documents, index, embedder, k_per_sentence=3
)
else:
raise ValueError(
f"Estratégia de busca de contexto inválida: '{search_strategy}'. Use 'completo' ou 'multiplo'."
# Invalid context search strategy: '{search_strategy}'. Use 'full' or 'multiple'.
)
# Extrai apenas o texto dos documentos da lista de tuplas (índice, texto, distância)
# Extracts only the text from the documents in the list of tuples (index, text, distance)
context_texts = [text for _, text, _ in retrieved_contexts_with_distance]
context_string = "\n".join(context_texts)
return context_string
def generate_ollama_response(input_phrase: str, context: str) -> str:
"""
Generates a response using the Ollama language model locally.
Constructs a detailed prompt with the user's input phrase and the retrieved
context to guide the model in generating an informed response about ICF.
Args:
input_phrase (str): The user's original phrase or question.
context (str): A string containing the relevant context retrieved from RAG.
Returns:
str: The response generated by the Ollama model.
Raises:
ollama.ResponseError: If there is an error communicating with the Ollama server.
Exception: For other unexpected errors during response generation.
"""
# Prompt com instruções detalhadas sobre CIF
# Prompt with detailed instructions about ICF
prompt_text = icf_classifier_prompt(context, input_phrase)
print("\n--- Prompt Gerado para Ollama ---") # Generated Prompt for Ollama
print(prompt_text)
print("--- Fim do Prompt Ollama ---") # End of Ollama Prompt
try:
# Assume 'gemma2:latest' or similar appropriate model for Ollama
response_data = ollama.generate(model='gemma2:latest', prompt=prompt_text)
return response_data.get('response', 'Nenhuma resposta gerada pelo Ollama.') # No response generated by Ollama.
except ollama.ResponseError as e:
print(f"Erro de resposta do Ollama: {e}") # Ollama response error
return f"Desculpe, ocorreu um erro ao gerar a resposta com Ollama: {e}" # Sorry, an error occurred while generating the response with Ollama
except Exception as e:
print(f"Erro inesperado ao gerar resposta com Ollama: {e}") # Unexpected error generating response with Ollama
return f"Desculpe, ocorreu um erro inesperado: {e}" # Sorry, an unexpected error occurred
def generate_gemini_response(input_phrase: str, context: str) -> str:
"""
Generates a response using the Google Gemini API.
Connects to the Gemini API, constructs a model-specific prompt,
and sends the request to obtain a response based on the user's phrase and context.
Args:
input_phrase (str): The user's original phrase or question.
context (str): A string containing the relevant context retrieved from RAG.
Returns:
str: The response generated by the Gemini model.
"""
if not GEMINI_API_KEY:
return "Erro: Chave de API do Gemini não configurada. Por favor, defina a variável de ambiente 'GEMINI_API_KEY'."
# Error: Gemini API key not configured. Please set the 'GEMINI_API_KEY' environment variable.
try:
client = genai.Client(api_key=GEMINI_API_KEY)
# Prompt com instruções detalhadas sobre CIF
# Prompt with detailed instructions about ICF
gemini_prompt_text = icf_gemini_prompt(context, input_phrase)
print("\n--- Prompt Gerado para Gemini ---") # Generated Prompt for Gemini
print(gemini_prompt_text)
print("--- Fim do Prompt Gemini ---") # End of Gemini Prompt
# Configuração da requisição para o modelo Gemini
#model_name = "gemini-2.5-flash-preview-05-20"
model_name = "gemini-2.0-flash-001"
api_response = client.models.generate_content(
model=model_name, contents=gemini_prompt_text
)
return api_response.text
except Exception as e:
print(f"Erro da API do Gemini: {e}. Verifique sua GEMINI_API_KEY e os detalhes do erro.")
if "Authentication" in str(e) or "API key" in str(e):
return "Erro de autenticação com a API do Gemini. Verifique sua chave de API e permissões."
return f"Desculpe, ocorreu um erro na API do Gemini: {e}"
# Função unificada para gerar resposta, permitindo escolher o LLM
# Unified function to generate response, allowing LLM choice
def generate_response_with_llm(
input_phrase: str,
documents: list,
index: faiss.Index,
embedder: SentenceTransformer,
llm_choice: str = 'gemini',
rag_strategy: str = 'multiple'
) -> str:
"""
Main function to generate a response using an LLM (Ollama or Gemini),
based on context retrieved via RAG.
Args:
input_phrase (str): The user's original phrase or question.
documents (list): A list of strings, representing the documents from which context will be retrieved.
index (faiss.Index): The pre-built FAISS index for similarity search in embeddings.
embedder (SentenceTransformer): The embedding model.
llm_choice (str, optional): The LLM to be used ('ollama' or 'gemini'). Defaults to 'gemini'.
rag_strategy (str, optional): The context search strategy ('full' or 'multiple'). Defaults to 'multiple'.
Returns:
str: The response generated by the LLM.
"""
# 1. Gerar o contexto
# 1. Generate context
retrieved_context = ""
try:
retrieved_context = _generate_context_for_llm(
input_phrase, documents, index, embedder, search_strategy=rag_strategy
)
except ValueError as e:
# Retorna a mensagem de erro da estratégia inválida
# Returns the error message for invalid strategy
return str(e)
except Exception as e:
# Error retrieving context
return f"Erro ao recuperar contexto: {e}"
if not retrieved_context:
return "Não foi possível encontrar contexto relevante para a sua pergunta. Por favor, reformule ou forneça mais detalhes."
# Could not find relevant context for your question. Please rephrase or provide more details.
# 2. Gerar a resposta usando o LLM escolhido
# 2. Generate response using the chosen LLM
if llm_choice.lower() == 'ollama':
return generate_ollama_response(input_phrase, retrieved_context)
elif llm_choice.lower() == 'gemini':
return generate_gemini_response(input_phrase, retrieved_context)
else:
# Error: Invalid LLM choice ('{llm_choice}'). Valid options are 'ollama' or 'gemini'.
return f"Erro: Escolha de LLM inválida ('{llm_choice}'). Opções válidas são 'ollama' ou 'gemini'."
|