Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| from transformers import pipeline | |
| import base64 | |
| def upload_and_extract_text(): | |
| uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"]) | |
| if uploaded_file is not None: | |
| ocr_pipeline = pipeline("text2text-generation", model="google/t5-v1_1-large") | |
| extracted_text = ocr_pipeline(uploaded_file.read(), max_length=1024, do_sample=False)[0]["generated_text"] | |
| return extracted_text | |
| def text_to_dataframe(text): | |
| lines = text.split("\n") | |
| data = [] | |
| for line in lines: | |
| data.append([line]) | |
| df = pd.DataFrame(data, columns=["Text"]) | |
| return df | |
| def main(): | |
| st.title("PDF to Spreadsheet Converter") | |
| st.write("Upload a PDF file to extract text and save it as a spreadsheet.") | |
| extracted_text = upload_and_extract_text() | |
| if extracted_text is not None: | |
| st.write("### Extracted Text") | |
| st.write(extracted_text) | |
| df = text_to_dataframe(extracted_text) | |
| st.write("### Spreadsheet Preview") | |
| st.write(df) | |
| csv = df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() | |
| href = f'<a href="data:file/csv;base64,{b64}" download="extracted_text.csv">Download Extracted Text as CSV</a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| if __name__ == "__main__": | |
| main() | |