Spaces:
Runtime error
Runtime error
File size: 2,432 Bytes
63337f5 7d0a6ff 11ef280 63337f5 c312545 6f9cc9b 7de3632 078b2c2 7de3632 078b2c2 6f9cc9b 63337f5 6f9cc9b 7de3632 63337f5 7de3632 d4376fd 63337f5 30bc38f 63337f5 9018ff8 63337f5 c312545 63337f5 e89f971 db38720 63337f5 db38720 e89f971 0c332ef db38720 63337f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
import torch
import base64
import tempfile
#import os
#from dotenv import load_dotenv
#from huggingface_hub import HfApi
#api = HfApi()
#token = api.retrieve_token("secret_token") # Replace with your secret name
#load_dotenv()
#token = os.environ.get("HF_TOKEN")
checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
#model and tokenizer loading
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
#file loader and preprocessing
def file_preprocessing(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_texts = ""
for text in texts:
print(text)
final_texts = final_texts + text.page_content
return final_texts
#LLM pipeline
def llm_pipeline(filepath):
pipe_sum = pipeline(
'summarization',
model = base_model,
tokenizer = tokenizer,
max_length = 500,
min_length = 50)
input_text = file_preprocessing(filepath)
result = pipe_sum(input_text)
result = result[0]['summary_text']
return result
def main():
st.title("Document Summarization App")
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
if uploaded_file is not None:
if st.button("Summarize"):
col2 = st.columns(1)
# Use a temporary filename directly
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file.flush() # Ensure contents are written to disk
filepath = temp_file.name
try:
summary = llm_pipeline(filepath)
st.success(summary) # Display only the summary
except Exception as e:
st.error(f"An error occurred during summarization: {e}")
# Clean up the temporary file
os.remove(filepath)
if __name__ == "__main__":
main() |