iiif_downloader / ifff_downloader.py
Gabriel's picture
Create ifff_downloader.py
73c784b verified
raw
history blame
2.63 kB
import os
import requests
import shutil
import gradio as gr
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from zipfile import ZipFile
def get_image_ids(batch_id: str) -> list[str]:
"""A list of image IDs in the given batch"""
response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest")
response.raise_for_status()
response = response.json()
return [item["id"].split("!")[1][:14] for item in response["items"]]
def download_image(url: str, dest: str) -> None:
"""
Download an image
Arguments:
url: Image url
dest: Destination file name
"""
response = requests.get(url, stream=True)
with open(dest, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
def download_image_by_image_id(image_id: str, progress=None):
"""
Download the image with the given image ID
Creates a directory named after the batch ID and saves the image in
that directory.
"""
batch_id = image_id[:8]
os.makedirs(batch_id, exist_ok=True)
url = f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg"
dest = os.path.join(batch_id, image_id + ".jpg")
download_image(url, dest)
if progress:
progress.update(1)
def download_batch_images(batch_id: str, workers: int = 2):
image_ids = get_image_ids(batch_id)
total_images = len(image_ids)
progress = tqdm(total=total_images, desc=f"Downloading {batch_id}", leave=False)
with ThreadPoolExecutor(max_workers=workers) as executor:
for image_id in image_ids:
executor.submit(download_image_by_image_id, image_id, progress)
progress.close()
# Zip the folder with downloaded images
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
zipf.write(img_path, arcname=os.path.basename(img_path))
return zip_filename
def gradio_interface(batch_id):
try:
zip_file = download_batch_images(batch_id)
return zip_file # Return the zip file path for download
except Exception as e:
return str(e)
with gr.Blocks() as app:
gr.Markdown("# Batch Image Downloader")
with gr.Row():
batch_id_input = gr.Textbox(label="Batch ID")
output_file = gr.File(label="Download Zip File")
download_button = gr.Button("Download Images")
download_button.click(
gradio_interface,
inputs=[batch_id_input],
outputs=[output_file]
)
app.launch()