Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader | |
from langchain_community.document_loaders import UnstructuredPowerPointLoader | |
from langchain_cohere.llms import Cohere | |
from langchain.chains.summarize import load_summarize_chain | |
from pathlib import Path | |
def summarize_files(method, files): | |
# Initialize the LLM | |
llm = Cohere(temperature=0) | |
summaries = [] | |
# Load and read each file | |
for file in files: | |
ext = Path(file).suffix.lower() | |
if ext == '.pdf': | |
loader = PyPDFLoader(file) | |
elif ext == '.docx': | |
loader = Docx2txtLoader(file) | |
elif ext == '.pptx': | |
loader = UnstructuredPowerPointLoader(file) | |
else: | |
raise ValueError(f"Unsupported file extension: {ext}") | |
docs = loader.load_and_split() | |
# Initialize a summarization chain with the specified method | |
summarization_chain = load_summarize_chain(llm=llm, chain_type=method) | |
summary = summarization_chain.run(docs) | |
summaries.append(summary) | |
return summaries | |