Spaces:
Sleeping
Sleeping
import gradio as gr | |
import io | |
import fitz # PyMuPDF | |
def redact_submission_ids(input_pdf): | |
"""Redacts Submission IDs and places a white rectangle above 'Document Details' on the first page.""" | |
filename = input_pdf.name # Extract the original filename | |
doc = fitz.open(input_pdf) | |
# Pattern to find Submission IDs in the format trn:oid::: | |
pattern = r"Submission ID trn:oid:::\d+:\d+" | |
for page_num, page in enumerate(doc): | |
# Redact Submission IDs | |
text_instances = page.search_for("Submission ID trn:oid:::") | |
for inst in text_instances: | |
rect = fitz.Rect(inst.x0, inst.y0, inst.x1 + 100, inst.y1) # Expand width as needed | |
page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle | |
# If it's the first page, place a white rectangle above "Document Details" | |
if page_num == 0: | |
details_instances = page.search_for("Document Details") | |
for inst in details_instances: | |
# Draw above "Document Details" with increased width | |
rect = fitz.Rect(0, inst.y0 - 50, page.rect.x1, inst.y0) # Extend width fully | |
page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle | |
# Save the redacted PDF to an in-memory byte stream | |
output_pdf = io.BytesIO() | |
doc.save(output_pdf) | |
output_pdf.seek(0) | |
return output_pdf, filename | |
# Create Gradio Interface | |
iface = gr.Interface( | |
fn=redact_submission_ids, | |
inputs=gr.File(label="Upload PDF"), # Updated from gr.inputs.File | |
outputs=[gr.File(label="Download Redacted PDF"), gr.Textbox(label="File Name")], # Updated from gr.outputs.File | |
live=False, | |
title="Redact Submission IDs" | |
) | |
if __name__ == '__main__': | |
iface.launch(debug=True) | |