File size: 1,607 Bytes
f58e385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5e4047
f58e385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from dotenv import load_dotenv
# from langchain_google_genai import GoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
load_dotenv()

class Summary:
    def __init__(self):
        self.model =llm = ChatGroq(api_key=os.getenv("GROQ_API"),model=os.getenv("MODEL"))
         #GoogleGenerativeAI(api_key = os.getenv("API_KEY"),model = os.getenv("MODEL"))

    def load_doc(self,doc_path:str):
        try:
            # print("doc-path :",doc_path)
            doc_loader = PyMuPDFLoader(file_path=doc_path)
            return doc_loader.load()
        except Exception as e:
            print(e)

    def doc_chunk(self,docs_path:list,CHUNK_SIZE:int= 3000,CHUNK_OVERLAP:int = 100):
        splitter = RecursiveCharacterTextSplitter(chunk_size = CHUNK_SIZE,chunk_overlap = CHUNK_OVERLAP)
        chunks = splitter.split_documents(docs_path)
        return chunks,len(chunks)

    def get_summary(self,document:list, len_document:int):
        try:
            if len_document == 1:
                chain = load_summarize_chain(chain_type="stuff",llm = self.model)
                result = chain.invoke(document)
                return result["output_text"]
            else:
                chain = load_summarize_chain(chain_type="map_reduce",llm = self.model)
                result = chain.invoke(document)
                return result["output_text"]
        except Exception as e:
            print(e)