Spaces:
Running
Running
import os | |
import requests | |
import shutil | |
import gradio as gr | |
from tqdm import tqdm | |
from concurrent.futures import ThreadPoolExecutor | |
from zipfile import ZipFile | |
def get_image_ids(batch_id: str) -> list[str]: | |
"""A list of image IDs in the given batch""" | |
response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest") | |
response.raise_for_status() | |
response = response.json() | |
return [item["id"].split("!")[1][:14] for item in response["items"]] | |
def download_image(url: str, dest: str) -> None: | |
""" | |
Download an image | |
Arguments: | |
url: Image url | |
dest: Destination file name | |
""" | |
response = requests.get(url, stream=True) | |
with open(dest, "wb") as out_file: | |
shutil.copyfileobj(response.raw, out_file) | |
del response | |
def download_image_by_image_id(image_id: str, progress=None): | |
""" | |
Download the image with the given image ID | |
Creates a directory named after the batch ID and saves the image in | |
that directory. | |
""" | |
batch_id = image_id[:8] | |
os.makedirs(batch_id, exist_ok=True) | |
url = f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg" | |
dest = os.path.join(batch_id, image_id + ".jpg") | |
download_image(url, dest) | |
if progress: | |
progress.update(1) | |
def download_batch_images(batch_id: str, workers: int = 2): | |
image_ids = get_image_ids(batch_id) | |
total_images = len(image_ids) | |
progress = tqdm(total=total_images, desc=f"Downloading {batch_id}", leave=False) | |
with ThreadPoolExecutor(max_workers=workers) as executor: | |
for image_id in image_ids: | |
executor.submit(download_image_by_image_id, image_id, progress) | |
progress.close() | |
# Zip the folder with downloaded images | |
zip_filename = f"{batch_id}.zip" | |
with ZipFile(zip_filename, 'w') as zipf: | |
for image_id in image_ids: | |
img_path = os.path.join(batch_id, f"{image_id}.jpg") | |
zipf.write(img_path, arcname=os.path.basename(img_path)) | |
return zip_filename | |
def gradio_interface(batch_id): | |
try: | |
zip_file = download_batch_images(batch_id) | |
return zip_file # Return the zip file path for download | |
except Exception as e: | |
return str(e) | |
with gr.Blocks() as app: | |
gr.Markdown("# Batch Image Downloader") | |
with gr.Row(): | |
batch_id_input = gr.Textbox(label="Batch ID") | |
output_file = gr.File(label="Download Zip File") | |
download_button = gr.Button("Download Images") | |
download_button.click( | |
gradio_interface, | |
inputs=[batch_id_input], | |
outputs=[output_file] | |
) | |
app.launch() | |