Spaces:

tonneli
/

EureCA

Sleeping

File size: 5,334 Bytes

import os
from pathlib import Path
from tempfile import NamedTemporaryFile
import pandas as pd
import streamlit as st
from pdf2image import convert_from_path
from core import init_llm, invoke
import pickle 

st.set_page_config(layout="wide")

# LLM initialization
init_llm()
PROOF_LIMIT_SIZE = 296
HISTORY_FILE_NAME = "prompt-history.pkl"

def get_llm_response(input_text):
    response = invoke(input_text)
    return response

def save_history():
    with open(HISTORY_FILE_NAME, "wb") as fn:
        pickle.dump(st.session_state.messages, fn)

def get_all_external_files():
    all_files = pd.read_csv("./files.csv", sep=";")
    res = []
    for _, row in all_files.iterrows():
        res.append({
            "name": row["file_name"],
            "external-link": row["url"],
            "internal-link": row["file"],
            "tags": row["tags"],
        })
    return pd.DataFrame(res)

def show_all_external_files():
    st.write("<h6>Elevage bovin</h6>", unsafe_allow_html=True)
    for _, row in all_files.iterrows():
        if row["tags"] in options:
            st.write(f"<a href=\'{row['external-link']}\'>{row['name']}</a>", unsafe_allow_html=True)

def click_proof(lien, full_content):
    st.session_state.messages.append({"role": "assistant", "content": full_content})
    path_to_file = None
    for _, f in all_files.iterrows():
        stemed_fn = Path(f["internal-link"].split("/")[-1]).stem
        if stemed_fn  == Path(lien).stem:
            path_to_file = Path(f["internal-link"])
    if path_to_file and path_to_file.is_file():
        page_num = lien.split("PAGENUMBER")
        if len(page_num) == 2:
            page_num = int(page_num[1])
        else:
            page_num = 0
        show_pdf(path_to_file, page_num)
    else:
        print(f"File {path_to_file} not found")
    with column_proof:
        update_proofs(st.session_state.proofs)

def show_pdf(link, page_num):
    pdf_images = convert_from_path(link)[page_num]
    tmp_f_name = NamedTemporaryFile(delete=False)
    pdf_images.save(tmp_f_name.name, 'PNG')
    st.session_state.messages.append({"role": "ai", "content": tmp_f_name.name})
    save_history()
    print(f"Successfully converted PDF ({link} page {page_num}) to images ({tmp_f_name.name})")

def update_proofs(proofs):
    if len(proofs):
        proofs = proofs[-1]
        for it, (proof, source) in enumerate(zip(proofs["justifications"], proofs["sources"])):
            limited_content = f"[{it+1}] - {proof[:min(PROOF_LIMIT_SIZE, len(proof))]} ..."
            st.button(limited_content, on_click=click_proof, use_container_width=True, kwargs={"lien": source, "full_content": proof})

all_files = get_all_external_files()

# Initialize chat history
if "messages" not in st.session_state:
    if Path(HISTORY_FILE_NAME).is_file():
        with open(HISTORY_FILE_NAME, "rb") as fn:
            history = pickle.load(fn)
    else:
        history = []
    st.session_state.messages = history

if "proof" not in st.session_state:
    st.session_state.proofs = []

if "curent_result" not in st.session_state:
    st.session_state.curent_result = []

st.image("./resources/logo_sebastien.png")
st.title('Hackathon Ferme Digitale 2024 - Crédit Agricole')
st.subheader("Equipe Crédit Agricole")
st.divider()
column_file_view, column_chat, column_proof = st.columns([0.2, 0.6, 0.2])

with column_file_view:
    st.write("<h5>Ressources externes</h5>", unsafe_allow_html=True)
    options = st.multiselect(
        'Sélectionnez les thématiques',
        all_files["tags"].unique(),
        all_files["tags"].unique(),
    )
    show_all_external_files()


with column_chat:
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            if message["role"] == "ai":
                st.image(message["content"])
            else:
                st.markdown(message["content"])

with column_proof:
    update_proofs(st.session_state.proofs)

prompt_element = st.chat_input("Comment puis je vous aider aujourd'hui?")

st.markdown("""
    <style> textarea.st-co {color: #212121 } </style>
""", unsafe_allow_html=True)
st.markdown("""
    <style> textarea.st-dv { caret-color: red } </style>
""", unsafe_allow_html=True)
st.markdown(
    """<style> .st-emotion-cache-15vaqhh a {color: #CCCCCC } </style>""",
    unsafe_allow_html=True
)

with column_chat:
    if prompt := prompt_element:
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)
        placeholder = st.empty()
        placeholder.status("Veuillez patienter ...", expanded=False)
        response = get_llm_response(prompt)
        st.session_state.messages.append({"role": "assistant", "content": response["content"]})
        st.session_state.proofs.append({"justifications": response["justifications"], "sources": response["sources"]})
        st.session_state.curent_result.append({
            "prompt": prompt,
            "content": response["content"],
            "justifications": response["justifications"],
            "sources": response["sources"]

        })
        save_history()
        with placeholder.chat_message("assistant"):
            st.markdown(response["content"])
        with column_proof:
            update_proofs(st.session_state.proofs)