File size: 2,841 Bytes
2198d6d
 
bd4e82f
2198d6d
 
bd4e82f
 
2198d6d
47d3f08
bd4e82f
b988347
2198d6d
1076eae
2198d6d
bd4e82f
b988347
 
d93a313
bd4e82f
cd03df2
bd4e82f
 
 
 
156aa34
bd4e82f
c143b5c
a132eda
 
bd4e82f
 
 
2198d6d
bd4e82f
 
2198d6d
bd4e82f
2198d6d
c143b5c
2198d6d
 
 
 
 
 
 
 
 
 
c143b5c
2198d6d
 
 
 
bd4e82f
2198d6d
bd4e82f
2198d6d
 
bd4e82f
2198d6d
bd4e82f
 
 
 
 
 
 
 
 
a132eda
bd4e82f
 
a132eda
bd4e82f
11173d1
bd4e82f
 
11173d1
bd4e82f
 
 
2198d6d
bd4e82f
 
2198d6d
 
bd4e82f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core import Document
import google.generativeai as genai
import os
import PyPDF2
from io import BytesIO
from llama_index.embeddings.fastembed import FastEmbedEmbedding



# Configure Google Gemini
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")

Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")

def write_to_file(content, filename="./files/test.pdf"):
    with open(filename, "wb") as f:
        f.write(content)

def ingest_documents():
    reader = SimpleDirectoryReader("./files/")
    documents = reader.load_data()
    print(documents)
    return documents

def load_data(documents):
    index = VectorStoreIndex.from_documents(documents)
    return index

# Generate legal document summary
def generate_summary(index, document_text):
    query_engine = index.as_query_engine()
    response = query_engine.query(f"""
    You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document.
    Analyze the following document and summarize it:
    {document_text}
    
    Please cover the following aspects:
    1. Document type and purpose
    2. Key parties involved
    3. Main clauses and provisions
    4. Important dates and deadlines
    5. Potential legal implications
    6. Any notable or unusual elements
    
    Provide a clear, concise, and professional summary
    """)
    return response.response

# Streamlit app
def main():
    st.title("Legal Document Summarizer")
    st.write("Upload a legal document, and let our AI summarize it!")

    # File uploader
    uploaded_file = st.file_uploader("Choose a legal document file", type=["txt", "pdf"])

    if uploaded_file is not None:
        # Read file contents
        if uploaded_file.type == "application/pdf":
            pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue()))
            document_text = ""
            for page in pdf_reader.pages:
                document_text += page.extract_text()
        else:
            document_text = uploaded_file.getvalue().decode("utf-8")

        # Write content to file
        write_to_file(uploaded_file.getvalue())

        st.write("Analyzing legal document...")

        # Ingest documents using SimpleDirectoryReader
        documents = ingest_documents()

        # Load data and generate summary
        index = load_data(documents)
        summary = generate_summary(index, document_text)

        st.write("## Legal Document Summary")
        st.write(summary)

if __name__ == "__main__":
    main()