|
import os |
|
from dotenv import load_dotenv |
|
|
|
from langchain_groq import ChatGroq |
|
from langchain_community.document_loaders import PyMuPDFLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.chains.summarize import load_summarize_chain |
|
load_dotenv() |
|
|
|
class Summary: |
|
def __init__(self): |
|
self.model =llm = ChatGroq(api_key=os.getenv("GROQ_API"),model=os.getenv("MODEL")) |
|
|
|
|
|
def load_doc(self,doc_path:str): |
|
try: |
|
|
|
doc_loader = PyMuPDFLoader(file_path=doc_path) |
|
return doc_loader.load() |
|
except Exception as e: |
|
print(e) |
|
|
|
def doc_chunk(self,docs_path:list,CHUNK_SIZE:int= 3000,CHUNK_OVERLAP:int = 100): |
|
splitter = RecursiveCharacterTextSplitter(chunk_size = CHUNK_SIZE,chunk_overlap = CHUNK_OVERLAP) |
|
chunks = splitter.split_documents(docs_path) |
|
return chunks,len(chunks) |
|
|
|
def get_summary(self,document:list, len_document:int): |
|
try: |
|
if len_document == 1: |
|
chain = load_summarize_chain(chain_type="stuff",llm = self.model) |
|
result = chain.invoke(document) |
|
return result["output_text"] |
|
else: |
|
chain = load_summarize_chain(chain_type="map_reduce",llm = self.model) |
|
result = chain.invoke(document) |
|
return result["output_text"] |
|
except Exception as e: |
|
print(e) |