import os import re from typing import Tuple, List from dotenv import load_dotenv from msal import ConfidentialClientApplication from langchain.schema import format_document def init_env(): try: load_dotenv() except: pass def get_token() -> str | None: app = ConfidentialClientApplication( client_id=os.getenv("CLIENT_ID"), client_credential=os.getenv("CLIENT_SECRET"), authority=f"https://login.microsoftonline.com/{os.getenv('TENANT_ID')}", ) result = app.acquire_token_for_client(scopes=[os.getenv("SCOPE")]) if result is not None: return result["access_token"] def get_llm(): os.environ["OPENAI_API_KEY"] = get_token() os.environ["AZURE_OPENAI_ENDPOINT"] = ( f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_ID')}/chat/completions?api-version={os.getenv('OPENAI_API_VERSION')}" ) return AzureChatOpenAI() def _combine_documents(docs, document_prompt, document_separator="\n\n"): doc_strings = [ f"Document {i}: \n'''\n{format_document(doc, document_prompt)}\n'''" for i, doc in enumerate(docs, 1) ] return document_separator.join(doc_strings) def _format_chat_history(chat_history: List[Tuple]) -> str: turn = 1 buffer = [] for dialogue in chat_history: buffer.append(("Human: " if turn else "Assistant: ") + dialogue.content) turn ^= 1 return "\n".join(buffer) + "\n" def make_pairs(lst): """from a list of even lenght, make tupple pairs""" return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)] def make_html_source(i, doc): if doc.metadata["source"] == "ESRS": return f"""
{doc.page_content}
{doc.page_content}