|
import time |
|
import streamlit as st |
|
import pandas as pd |
|
import os |
|
from dotenv import load_dotenv |
|
import search |
|
from reportlab.lib.pagesizes import letter |
|
from reportlab.pdfgen import canvas |
|
from docx import Document |
|
|
|
load_dotenv() |
|
|
|
st.set_page_config( |
|
page_title="DocGPT GT", |
|
page_icon="speech_balloon", |
|
layout="wide", |
|
) |
|
|
|
hide_streamlit_style = """ |
|
<style> |
|
#MainMenu {visibility: hidden;} |
|
footer {visibility: hidden;} |
|
footer:after { |
|
content:'2023'; |
|
visibility: visible; |
|
display: block; |
|
position: relative; |
|
padding: 5px; |
|
top: 2px; |
|
} |
|
</style> |
|
""" |
|
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |
|
|
|
def save_as_pdf(conversation): |
|
pdf_filename = "conversation.pdf" |
|
c = canvas.Canvas(pdf_filename, pagesize=letter) |
|
|
|
c.drawString(100, 750, "Conversation:") |
|
y_position = 730 |
|
for q, a in conversation: |
|
c.drawString(120, y_position, f"Q: {q}") |
|
c.drawString(120, y_position - 20, f"A: {a}") |
|
y_position -= 40 |
|
|
|
c.save() |
|
|
|
st.markdown(f"Download [PDF](./{pdf_filename})") |
|
|
|
def save_as_docx(conversation): |
|
doc = Document() |
|
doc.add_heading('Conversation', 0) |
|
|
|
for q, a in conversation: |
|
doc.add_paragraph(f'Q: {q}') |
|
doc.add_paragraph(f'A: {a}') |
|
|
|
doc_filename = "conversation.docx" |
|
doc.save(doc_filename) |
|
|
|
st.markdown(f"Download [DOCX](./{doc_filename})") |
|
|
|
def save_as_xlsx(conversation): |
|
df = pd.DataFrame(conversation, columns=["Question", "Answer"]) |
|
xlsx_filename = "conversation.xlsx" |
|
df.to_excel(xlsx_filename, index=False) |
|
|
|
st.markdown(f"Download [XLSX](./{xlsx_filename})") |
|
|
|
def save_as_txt(conversation): |
|
txt_filename = "conversation.txt" |
|
with open(txt_filename, "w") as txt_file: |
|
for q, a in conversation: |
|
txt_file.write(f"Q: {q}\nA: {a}\n\n") |
|
|
|
st.markdown(f"Download [TXT](./{txt_filename})") |
|
|
|
def main(): |
|
st.markdown('<h1>Ask anything from Legal Texts</h1><p style="font-size: 12; color: gray;"></p>', unsafe_allow_html=True) |
|
st.markdown("<h2>Upload documents</h2>", unsafe_allow_html=True) |
|
uploaded_files = st.file_uploader("Upload one or more documents", type=['pdf', 'docx'], accept_multiple_files=True) |
|
question = st.text_input("Ask a question based on the documents", key="question_input") |
|
|
|
progress = st.progress(0) |
|
for i in range(100): |
|
progress.progress(i + 1) |
|
time.sleep(0.01) |
|
|
|
if uploaded_files: |
|
df = pd.DataFrame(columns=["page_num", "paragraph_num", "content", "tokens"]) |
|
for uploaded_file in uploaded_files: |
|
paragraphs = search.read_pdf_pdfminer(uploaded_file) if uploaded_file.type == "application/pdf" else search.read_docx(uploaded_file) |
|
temp_df = pd.DataFrame( |
|
[(p.page_num, p.paragraph_num, p.content, search.count_tokens(p.content)) |
|
for p in paragraphs], |
|
columns=["page_num", "paragraph_num", "content", "tokens"] |
|
) |
|
df = pd.concat([df, temp_df], ignore_index=True) |
|
|
|
if "interactions" not in st.session_state: |
|
st.session_state["interactions"] = [] |
|
|
|
answer = "" |
|
if question != st.session_state.get("last_question", ""): |
|
st.text("Searching...") |
|
answer = search.answer_query_with_context(question, df) |
|
st.session_state["interactions"].append((question, answer)) |
|
st.write(answer) |
|
|
|
st.markdown("### Interaction History") |
|
for q, a in st.session_state["interactions"]: |
|
st.write(f"**Q:** {q}\n\n**A:** {a}") |
|
|
|
st.session_state["last_question"] = question |
|
|
|
st.markdown("<h2>Sample paragraphs</h2>", unsafe_allow_html=True) |
|
sample_size = min(len(df), 5) |
|
st.dataframe(df.sample(n=sample_size)) |
|
|
|
if st.button("Save as PDF"): |
|
save_as_pdf(st.session_state["interactions"]) |
|
if st.button("Save as DOCX"): |
|
save_as_docx(st.session_state["interactions"]) |
|
if st.button("Save as XLSX"): |
|
save_as_xlsx(st.session_state["interactions"]) |
|
if st.button("Save as TXT"): |
|
save_as_txt(st.session_state["interactions"]) |
|
|
|
|
|
else: |
|
st.markdown("<h2>Please upload a document to proceed.</h2>", unsafe_allow_html=True) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|