iiif_downloader / app.py
Gabriel's picture
Update app.py
a874957 verified
raw
history blame
3.07 kB
import os
import requests
import shutil
import gradio as gr
from concurrent.futures import ThreadPoolExecutor
from zipfile import ZipFile
def get_image_ids(batch_id: str) -> list[str]:
"""A list of image IDs in the given batch"""
response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest")
response.raise_for_status()
response = response.json()
return [item["id"].split("!")[1][:14] for item in response["items"]]
def download_image(url: str, dest: str) -> None:
"""
Download an image
Arguments:
url: Image url
dest: Destination file name
"""
response = requests.get(url, stream=True)
with open(dest, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
def download_image_by_image_id(image_id: str):
"""
Download the image with the given image ID
Creates a directory named after the batch ID and saves the image in
that directory.
"""
batch_id = image_id[:8]
os.makedirs(batch_id, exist_ok=True)
url = f"https://iiifintern.ra.se/arkis!{image_id}/full/max/0/default.jpg"
dest = os.path.join(batch_id, image_id + ".jpg")
download_image(url, dest)
def download_batch_images(batch_id: str, workers: int = 2, progress=None):
image_ids = get_image_ids(batch_id)
total_images = len(image_ids)
if progress:
progress(0, desc=f"Starting download for {batch_id}...")
with ThreadPoolExecutor(max_workers=workers) as executor:
for image_id in image_ids:
executor.submit(download_image_by_image_id, image_id)
# Zip the folder with downloaded images
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
zipf.write(img_path, arcname=os.path.basename(img_path))
if progress:
progress(1, desc=f"Completed {batch_id}")
return zip_filename
def gradio_interface(batch_ids_input, progress=gr.Progress()):
batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
zip_files = []
try:
for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
zip_file = download_batch_images(batch_id, progress=progress)
zip_files.append(zip_file)
return zip_files # Return the list of zip files for download
except Exception as e:
return str(e)
with gr.Blocks() as app:
gr.Markdown("# Batch Image Downloader")
with gr.Row():
with gr.Column():
batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
download_button = gr.Button("Download Images")
with gr.Column():
output_files = gr.File(label="Download Zip Files", file_count="multiple")
download_button.click(
gradio_interface,
inputs=[batch_ids_input],
outputs=[output_files]
)
app.queue()
app.launch()