Spaces:

VinitT
/

ImageHarvester

Sleeping

App Files Files Community

VinitT commited on Aug 12, 2024

Commit

f4cd3ce

verified ·

1 Parent(s): bcbac0e

Create app.py

Browse files

Files changed (1) hide show

app.py +281 -0

app.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+import urllib.parse
+import mimetypes
+import io
+import zipfile
+import re
+# Page configuration
+st.set_page_config(page_title="ImageHarvesters", layout="wide")
+# Custom CSS
+st.markdown("""
+<style>
+    .main {
+        padding: 1rem;
+        border-radius: 0.5rem;
+        background-color: #f0f2f6;
+    }
+    .stButton>button {
+        width: 100%;
+    }
+    .image-card {
+        background: white;
+        border-radius: 8px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.2);
+        overflow: hidden;
+        margin-bottom: 20px;
+        transition: transform 0.2s, box-shadow 0.2s, border 0.2s;
+    }
+    .image-container {
+        position: relative;
+        padding-top: 75%; /* 4:3 Aspect Ratio */
+    }
+    .image-container img {
+        position: absolute;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        object-fit: cover;
+    }
+    .image-info {
+        padding: 10px;
+        font-size: 14px;
+    }
+    h1 {
+        color: #1e3a8a;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .stSuccess {
+        background-color: #d1fae5;
+        color: #065f46;
+    }
+    .stWarning {
+        background-color: #fef3c7;
+        color: #92400e;
+    }
+    .stError {
+        background-color: #fee2e2;
+        color: #991b1b;
+    }
+    .selected {
+        border: 2px solid #000000;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5);
+        transform: scale(1.05);
+    }
+    .subtitle {
+        color: #1e3a8a;
+        text-align: center;
+        margin-bottom: 2rem;
+        font-size: 1.2rem;
+    }
+    .url-input, .number-input {
+        border: 2px solid #1e3a8a;
+        border-radius: 4px;
+        padding: 0.5rem;
+        margin-bottom: 1rem;
+        width: 100%;
+        box-sizing: border-box;
+    }
+    @media only screen and (max-width: 600px) {
+        .main {
+            padding: 1rem;
+        }
+        .stButton>button {
+            width: 100%;
+        }
+        .image-card {
+            margin-bottom: 10px;
+        }
+    }
+</style>
+""", unsafe_allow_html=True)
+st.title("ImageHarvester")
+# Initialize session state for URLs
+if 'urls' not in st.session_state:
+    st.session_state.urls = ['']
+def add_url():
+    st.session_state.urls.append('')
+def remove_url(index):
+    st.session_state.urls.pop(index)
+def is_valid_url(url):
+    regex = re.compile(
+        r'^(?:http|ftp)s?://' # http:// or https://
+        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
+        r'localhost|' # localhost...
+        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4
+        r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6
+        r'(?::\d+)?' # optional port
+        r'(?:/?|[/?]\S+)$', re.IGNORECASE)
+    return re.match(regex, url) is not None
+def get_file_extension(content_type):
+    extension = mimetypes.guess_extension(content_type)
+    return extension if extension else '.jpg'
+def fetch_images(url, max_images):
+    if not is_valid_url(url):
+        st.warning(f"Invalid URL: {url}")
+        return []
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Referer': url
+        }
+        response = session.get(url, headers=headers)
+        st.info(f"Status code for {url}: {response.status_code}")
+        if response.status_code != 200:
+            st.warning(f"Unexpected status code for {url}: {response.status_code}. Attempting to proceed anyway.")
+        soup = BeautifulSoup(response.content, 'html.parser')
+        img_tags = soup.find_all('img')
+        if not img_tags:
+            st.warning(f"No images found on {url}.")
+            return []
+        images = []
+        for i, img in enumerate(img_tags):
+            if i >= max_images:
+                break
+            img_url = img.get('src')
+            if img_url:
+                if not img_url.startswith(('http://', 'https://')):
+                    img_url = urllib.parse.urljoin(url, img_url)
+                images.append(img_url)
+        return images
+    except requests.exceptions.RequestException as e:
+        st.error(f"An error occurred for {url}: {str(e)}")
+        return []
+def download_images(selected_images):
+    try:
+        zip_buffer = io.BytesIO()
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Referer': url
+        }
+        with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
+            for i, img_url in enumerate(selected_images):
+                img_response = session.get(img_url, headers=headers, timeout=10)
+                img_response.raise_for_status()
+                content_type = img_response.headers.get('content-type', '').split(';')[0].strip()
+                file_extension = get_file_extension(content_type)
+                file_name = f'image_{i+1}{file_extension}'
+                zip_file.writestr(file_name, img_response.content)
+        zip_buffer.seek(0)
+        return zip_buffer
+    except requests.exceptions.RequestException as e:
+        st.error(f"An error occurred while downloading images: {str(e)}")
+        return None
+# Initialize the requests session
+session = requests.Session()
+# Input fields for URLs
+st.subheader("Enter Website URLs")
+for i, url in enumerate(st.session_state.urls):
+    col1, col2 = st.columns([10, 1])
+    with col1:
+        st.session_state.urls[i] = st.text_input(f"URL {i+1}", value=url, key=f"url_{i}", help="Enter the URL of the website from which you want to download images.", placeholder="https://example.com", )
+    with col2:
+        if st.button("Remove", key=f"remove_{i}"):
+            remove_url(i)
+            st.experimental_rerun()
+if st.button("Add URL"):
+    add_url()
+max_images_per_url = st.number_input("Max images per URL:", min_value=1, value=10, step=1)
+if st.button("Fetch Images", key="fetch"):
+    all_images = []
+    for url in st.session_state.urls:
+        if not is_valid_url(url):
+            st.warning(f"Invalid URL: {url}")
+            continue
+        with st.spinner(f"Fetching images from {url}..."):
+            images = fetch_images(url, max_images_per_url)
+            all_images.extend(images)
+    if all_images:
+        st.session_state.images = all_images
+        st.session_state.selected_images = [False] * len(all_images)
+        st.success(f"Found {len(all_images)} images in total. Select the images you want to download.")
+    else:
+        st.warning("No images found or could not fetch images from any of the provided URLs.")
+if 'images' in st.session_state:
+    st.subheader("Fetched Images")
+    # Buttons for Select All and Clear Selection
+    col1, col2, col3 = st.columns([1, 1, 1])
+    with col1:
+        if st.button("Select All"):
+            st.session_state.selected_images = [True] * len(st.session_state.images)
+    with col2:
+        if st.button("Clear"):
+            st.session_state.selected_images = [False] * len(st.session_state.images)
+    # Calculate the number of columns
+    num_cols = 4
+    columns = st.columns(num_cols)
+    selected_images = []
+    for i, img_url in enumerate(st.session_state.images):
+        checkbox_key = f"check_{i}"
+        # Determine the column to place the image in
+        col = columns[i % num_cols]
+        # Display the image and checkbox in the determined column
+        with col:
+            st.session_state.selected_images[i] = st.checkbox("Select Image", key=checkbox_key, value=st.session_state.selected_images[i])
+            img_class = "selected" if st.session_state.selected_images[i] else ""
+            st.markdown(f"""
+            <div class="image-card {img_class}">
+                <div class="image-container">
+                    <img src="{img_url}" alt="image_{i+1}">
+                </div>
+                <div class="image-info">
+                    {f"image_{i+1}"}
+                </div>
+            </div>
+            """, unsafe_allow_html=True)
+            if st.session_state.selected_images[i]:
+                selected_images.append(img_url)
+    if selected_images:
+        if st.button("Download Selected Images"):
+            with st.spinner("Preparing download..."):
+                zip_buffer = download_images(selected_images)
+            if zip_buffer:
+                st.download_button(
+                    label="Download ZIP",
+                    data=zip_buffer,
+                    file_name="selected_images.zip",
+                    mime="application/zip"
+                )
+            else:
+                st.error("Failed to prepare the download. Please try again.")
+    else:
+        st.info("Select one or more images to download.")