Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader | |
from langchain_community.document_loaders import UnstructuredPowerPointLoader | |
from langchain_cohere.llms import Cohere | |
from langchain.chains.summarize import load_summarize_chain | |
from pathlib import Path | |
def summarize_file(method, file): | |
# Initialize the LLM | |
llm = Cohere(temperature=0) | |
ext = Path(file).suffix.lower() | |
if ext == '.pdf': | |
loader = PyPDFLoader(file) | |
elif ext == '.docx': | |
loader = Docx2txtLoader(file) | |
elif ext == '.pptx': | |
loader = UnstructuredPowerPointLoader(file) | |
else: | |
raise ValueError(f"Unsupported file extension: {ext}") | |
docs = loader.load_and_split() | |
# Initialize a summarization chain with the specified method | |
summarization_chain = load_summarize_chain(llm=llm, chain_type=method) | |
summary = summarization_chain.run(docs) | |
return summary | |
# def summarize_files(method, files): | |
# # Initialize the LLM | |
# llm = Cohere(temperature=0) | |
# summaries = [] | |
# # Load and read each file | |
# for file in files: | |
# ext = Path(file).suffix.lower() | |
# if ext == '.pdf': | |
# loader = PyPDFLoader(file) | |
# elif ext == '.docx': | |
# loader = Docx2txtLoader(file) | |
# elif ext == '.pptx': | |
# loader = UnstructuredPowerPointLoader(file) | |
# else: | |
# raise ValueError(f"Unsupported file extension: {ext}") | |
# docs = loader.load_and_split() | |
# # Initialize a summarization chain with the specified method | |
# summarization_chain = load_summarize_chain(llm=llm, chain_type=method) | |
# summary = summarization_chain.run(docs) | |
# summaries.append(summary) | |
# return summaries | |