File size: 1,607 Bytes
f58e385 f5e4047 f58e385 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import os
from dotenv import load_dotenv
# from langchain_google_genai import GoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
load_dotenv()
class Summary:
def __init__(self):
self.model =llm = ChatGroq(api_key=os.getenv("GROQ_API"),model=os.getenv("MODEL"))
#GoogleGenerativeAI(api_key = os.getenv("API_KEY"),model = os.getenv("MODEL"))
def load_doc(self,doc_path:str):
try:
# print("doc-path :",doc_path)
doc_loader = PyMuPDFLoader(file_path=doc_path)
return doc_loader.load()
except Exception as e:
print(e)
def doc_chunk(self,docs_path:list,CHUNK_SIZE:int= 3000,CHUNK_OVERLAP:int = 100):
splitter = RecursiveCharacterTextSplitter(chunk_size = CHUNK_SIZE,chunk_overlap = CHUNK_OVERLAP)
chunks = splitter.split_documents(docs_path)
return chunks,len(chunks)
def get_summary(self,document:list, len_document:int):
try:
if len_document == 1:
chain = load_summarize_chain(chain_type="stuff",llm = self.model)
result = chain.invoke(document)
return result["output_text"]
else:
chain = load_summarize_chain(chain_type="map_reduce",llm = self.model)
result = chain.invoke(document)
return result["output_text"]
except Exception as e:
print(e) |