File size: 4,429 Bytes
95fca27
21583be
 
2145fa2
 
 
95fca27
 
2145fa2
95fca27
2145fa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95fca27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21583be
 
2343455
95fca27
 
 
 
 
 
 
 
 
 
 
 
2145fa2
95fca27
2343455
95fca27
 
 
 
 
 
 
 
 
 
 
2343455
95fca27
21583be
95fca27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21583be
 
ab33443
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import time
import streamlit as st
import pandas as pd
import os
from dotenv import load_dotenv
import search  # Import the search module
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from docx import Document

load_dotenv()

st.set_page_config(
        page_title="DocGPT GT",
        page_icon="speech_balloon",
        layout="wide",
    )

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            footer:after {
	content:'2023'; 
	visibility: visible;
	display: block;
	position: relative;
	padding: 5px;
	top: 2px;
}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

def save_as_pdf(conversation):
    pdf_filename = "conversation.pdf"
    c = canvas.Canvas(pdf_filename, pagesize=letter)
   
    c.drawString(100, 750, "Conversation:")
    y_position = 730
    for q, a in conversation:
        c.drawString(120, y_position, f"Q: {q}")
        c.drawString(120, y_position - 20, f"A: {a}")
        y_position -= 40
   
    c.save()
   
    st.markdown(f"Download [PDF](./{pdf_filename})")

def save_as_docx(conversation):
    doc = Document()
    doc.add_heading('Conversation', 0)
   
    for q, a in conversation:
        doc.add_paragraph(f'Q: {q}')
        doc.add_paragraph(f'A: {a}')
   
    doc_filename = "conversation.docx"
    doc.save(doc_filename)
   
    st.markdown(f"Download [DOCX](./{doc_filename})")

def save_as_xlsx(conversation):
    df = pd.DataFrame(conversation, columns=["Question", "Answer"])
    xlsx_filename = "conversation.xlsx"
    df.to_excel(xlsx_filename, index=False)
   
    st.markdown(f"Download [XLSX](./{xlsx_filename})")

def save_as_txt(conversation):
    txt_filename = "conversation.txt"
    with open(txt_filename, "w") as txt_file:
        for q, a in conversation:
            txt_file.write(f"Q: {q}\nA: {a}\n\n")
   
    st.markdown(f"Download [TXT](./{txt_filename})")

def main():
    st.markdown('<h1>Ask anything from Legal Texts</h1><p style="font-size: 12; color: gray;"></p>', unsafe_allow_html=True)
    st.markdown("<h2>Upload documents</h2>", unsafe_allow_html=True)
    uploaded_files = st.file_uploader("Upload one or more documents", type=['pdf', 'docx'], accept_multiple_files=True)
    question = st.text_input("Ask a question based on the documents", key="question_input")

    progress = st.progress(0)
    for i in range(100):
        progress.progress(i + 1)
        time.sleep(0.01)

    if uploaded_files:
        df = pd.DataFrame(columns=["page_num", "paragraph_num", "content", "tokens"])
        for uploaded_file in uploaded_files:
            paragraphs = search.read_pdf_pdfminer(uploaded_file) if uploaded_file.type == "application/pdf" else search.read_docx(uploaded_file)
            temp_df = pd.DataFrame(
                [(p.page_num, p.paragraph_num, p.content, search.count_tokens(p.content))
                for p in paragraphs],
                columns=["page_num", "paragraph_num", "content", "tokens"]
            )
            df = pd.concat([df, temp_df], ignore_index=True)

        if "interactions" not in st.session_state:
            st.session_state["interactions"] = []

        answer = ""
        if question != st.session_state.get("last_question", ""):
            st.text("Searching...")
            answer = search.answer_query_with_context(question, df)
            st.session_state["interactions"].append((question, answer))
            st.write(answer)

        st.markdown("### Interaction History")
        for q, a in st.session_state["interactions"]:
            st.write(f"**Q:** {q}\n\n**A:** {a}")

        st.session_state["last_question"] = question

        st.markdown("<h2>Sample paragraphs</h2>", unsafe_allow_html=True)
        sample_size = min(len(df), 5)
        st.dataframe(df.sample(n=sample_size))  

        if st.button("Save as PDF"):
            save_as_pdf(st.session_state["interactions"])
        if st.button("Save as DOCX"):
            save_as_docx(st.session_state["interactions"])
        if st.button("Save as XLSX"):
            save_as_xlsx(st.session_state["interactions"])
        if st.button("Save as TXT"):
            save_as_txt(st.session_state["interactions"])


    else:
        st.markdown("<h2>Please upload a document to proceed.</h2>", unsafe_allow_html=True)

if __name__ == "__main__":
    main()