File size: 5,334 Bytes
65b715e
c32a46b
 
 
 
 
 
46be023
c32a46b
 
 
 
 
 
46be023
c32a46b
 
 
 
 
46be023
 
 
 
c32a46b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46be023
 
c32a46b
 
 
 
 
 
059b43a
 
 
 
 
 
c32a46b
 
 
 
 
059b43a
 
c32a46b
 
 
46be023
059b43a
c32a46b
 
 
 
 
 
46be023
c32a46b
 
 
 
 
46be023
 
 
 
 
 
c32a46b
 
 
 
 
 
 
5099aac
c32a46b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5099aac
cbca936
5099aac
cbca936
 
 
 
 
 
 
5099aac
c32a46b
 
 
 
 
 
059b43a
c32a46b
 
 
 
 
 
 
 
 
 
46be023
c32a46b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
from pathlib import Path
from tempfile import NamedTemporaryFile
import pandas as pd
import streamlit as st
from pdf2image import convert_from_path
from core import init_llm, invoke
import pickle 

st.set_page_config(layout="wide")

# LLM initialization
init_llm()
PROOF_LIMIT_SIZE = 296
HISTORY_FILE_NAME = "prompt-history.pkl"

def get_llm_response(input_text):
    response = invoke(input_text)
    return response

def save_history():
    with open(HISTORY_FILE_NAME, "wb") as fn:
        pickle.dump(st.session_state.messages, fn)

def get_all_external_files():
    all_files = pd.read_csv("./files.csv", sep=";")
    res = []
    for _, row in all_files.iterrows():
        res.append({
            "name": row["file_name"],
            "external-link": row["url"],
            "internal-link": row["file"],
            "tags": row["tags"],
        })
    return pd.DataFrame(res)

def show_all_external_files():
    st.write("<h6>Elevage bovin</h6>", unsafe_allow_html=True)
    for _, row in all_files.iterrows():
        if row["tags"] in options:
            st.write(f"<a href=\'{row['external-link']}\'>{row['name']}</a>", unsafe_allow_html=True)

def click_proof(lien, full_content):
    st.session_state.messages.append({"role": "assistant", "content": full_content})
    path_to_file = None
    for _, f in all_files.iterrows():
        stemed_fn = Path(f["internal-link"].split("/")[-1]).stem
        if stemed_fn  == Path(lien).stem:
            path_to_file = Path(f["internal-link"])
    if path_to_file and path_to_file.is_file():
        page_num = lien.split("PAGENUMBER")
        if len(page_num) == 2:
            page_num = int(page_num[1])
        else:
            page_num = 0
        show_pdf(path_to_file, page_num)
    else:
        print(f"File {path_to_file} not found")
    with column_proof:
        update_proofs(st.session_state.proofs)

def show_pdf(link, page_num):
    pdf_images = convert_from_path(link)[page_num]
    tmp_f_name = NamedTemporaryFile(delete=False)
    pdf_images.save(tmp_f_name.name, 'PNG')
    st.session_state.messages.append({"role": "ai", "content": tmp_f_name.name})
    save_history()
    print(f"Successfully converted PDF ({link} page {page_num}) to images ({tmp_f_name.name})")

def update_proofs(proofs):
    if len(proofs):
        proofs = proofs[-1]
        for it, (proof, source) in enumerate(zip(proofs["justifications"], proofs["sources"])):
            limited_content = f"[{it+1}] - {proof[:min(PROOF_LIMIT_SIZE, len(proof))]} ..."
            st.button(limited_content, on_click=click_proof, use_container_width=True, kwargs={"lien": source, "full_content": proof})

all_files = get_all_external_files()

# Initialize chat history
if "messages" not in st.session_state:
    if Path(HISTORY_FILE_NAME).is_file():
        with open(HISTORY_FILE_NAME, "rb") as fn:
            history = pickle.load(fn)
    else:
        history = []
    st.session_state.messages = history

if "proof" not in st.session_state:
    st.session_state.proofs = []

if "curent_result" not in st.session_state:
    st.session_state.curent_result = []

st.image("./resources/logo_sebastien.png")
st.title('Hackathon Ferme Digitale 2024 - Crédit Agricole')
st.subheader("Equipe Crédit Agricole")
st.divider()
column_file_view, column_chat, column_proof = st.columns([0.2, 0.6, 0.2])

with column_file_view:
    st.write("<h5>Ressources externes</h5>", unsafe_allow_html=True)
    options = st.multiselect(
        'Sélectionnez les thématiques',
        all_files["tags"].unique(),
        all_files["tags"].unique(),
    )
    show_all_external_files()


with column_chat:
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            if message["role"] == "ai":
                st.image(message["content"])
            else:
                st.markdown(message["content"])

with column_proof:
    update_proofs(st.session_state.proofs)

prompt_element = st.chat_input("Comment puis je vous aider aujourd'hui?")

st.markdown("""
    <style> textarea.st-co {color: #212121 } </style>
""", unsafe_allow_html=True)
st.markdown("""
    <style> textarea.st-dv { caret-color: red } </style>
""", unsafe_allow_html=True)
st.markdown(
    """<style> .st-emotion-cache-15vaqhh a {color: #CCCCCC } </style>""",
    unsafe_allow_html=True
)

with column_chat:
    if prompt := prompt_element:
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)
        placeholder = st.empty()
        placeholder.status("Veuillez patienter ...", expanded=False)
        response = get_llm_response(prompt)
        st.session_state.messages.append({"role": "assistant", "content": response["content"]})
        st.session_state.proofs.append({"justifications": response["justifications"], "sources": response["sources"]})
        st.session_state.curent_result.append({
            "prompt": prompt,
            "content": response["content"],
            "justifications": response["justifications"],
            "sources": response["sources"]

        })
        save_history()
        with placeholder.chat_message("assistant"):
            st.markdown(response["content"])
        with column_proof:
            update_proofs(st.session_state.proofs)