VinitT commited on
Commit
f4cd3ce
·
verified ·
1 Parent(s): bcbac0e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -0
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import urllib.parse
5
+ import mimetypes
6
+ import io
7
+ import zipfile
8
+ import re
9
+
10
+ # Page configuration
11
+ st.set_page_config(page_title="ImageHarvesters", layout="wide")
12
+
13
+ # Custom CSS
14
+ st.markdown("""
15
+ <style>
16
+ .main {
17
+ padding: 1rem;
18
+ border-radius: 0.5rem;
19
+ background-color: #f0f2f6;
20
+ }
21
+ .stButton>button {
22
+ width: 100%;
23
+ }
24
+ .image-card {
25
+ background: white;
26
+ border-radius: 8px;
27
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2);
28
+ overflow: hidden;
29
+ margin-bottom: 20px;
30
+ transition: transform 0.2s, box-shadow 0.2s, border 0.2s;
31
+ }
32
+ .image-container {
33
+ position: relative;
34
+ padding-top: 75%; /* 4:3 Aspect Ratio */
35
+ }
36
+ .image-container img {
37
+ position: absolute;
38
+ top: 0;
39
+ left: 0;
40
+ width: 100%;
41
+ height: 100%;
42
+ object-fit: cover;
43
+ }
44
+ .image-info {
45
+ padding: 10px;
46
+ font-size: 14px;
47
+ }
48
+ h1 {
49
+ color: #1e3a8a;
50
+ text-align: center;
51
+ margin-bottom: 2rem;
52
+ }
53
+ .stSuccess {
54
+ background-color: #d1fae5;
55
+ color: #065f46;
56
+ }
57
+ .stWarning {
58
+ background-color: #fef3c7;
59
+ color: #92400e;
60
+ }
61
+ .stError {
62
+ background-color: #fee2e2;
63
+ color: #991b1b;
64
+ }
65
+ .selected {
66
+ border: 2px solid #000000;
67
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5);
68
+ transform: scale(1.05);
69
+ }
70
+ .subtitle {
71
+ color: #1e3a8a;
72
+ text-align: center;
73
+ margin-bottom: 2rem;
74
+ font-size: 1.2rem;
75
+ }
76
+ .url-input, .number-input {
77
+ border: 2px solid #1e3a8a;
78
+ border-radius: 4px;
79
+ padding: 0.5rem;
80
+ margin-bottom: 1rem;
81
+ width: 100%;
82
+ box-sizing: border-box;
83
+ }
84
+ @media only screen and (max-width: 600px) {
85
+ .main {
86
+ padding: 1rem;
87
+ }
88
+ .stButton>button {
89
+ width: 100%;
90
+ }
91
+ .image-card {
92
+ margin-bottom: 10px;
93
+ }
94
+ }
95
+ </style>
96
+ """, unsafe_allow_html=True)
97
+
98
+ st.title("ImageHarvester")
99
+
100
+ # Initialize session state for URLs
101
+ if 'urls' not in st.session_state:
102
+ st.session_state.urls = ['']
103
+
104
+ def add_url():
105
+ st.session_state.urls.append('')
106
+
107
+ def remove_url(index):
108
+ st.session_state.urls.pop(index)
109
+
110
+ def is_valid_url(url):
111
+ regex = re.compile(
112
+ r'^(?:http|ftp)s?://' # http:// or https://
113
+ r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
114
+ r'localhost|' # localhost...
115
+ r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4
116
+ r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6
117
+ r'(?::\d+)?' # optional port
118
+ r'(?:/?|[/?]\S+)$', re.IGNORECASE)
119
+ return re.match(regex, url) is not None
120
+
121
+ def get_file_extension(content_type):
122
+ extension = mimetypes.guess_extension(content_type)
123
+ return extension if extension else '.jpg'
124
+
125
+ def fetch_images(url, max_images):
126
+ if not is_valid_url(url):
127
+ st.warning(f"Invalid URL: {url}")
128
+ return []
129
+
130
+ try:
131
+ headers = {
132
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
133
+ 'Accept-Language': 'en-US,en;q=0.9',
134
+ 'Referer': url
135
+ }
136
+ response = session.get(url, headers=headers)
137
+ st.info(f"Status code for {url}: {response.status_code}")
138
+
139
+ if response.status_code != 200:
140
+ st.warning(f"Unexpected status code for {url}: {response.status_code}. Attempting to proceed anyway.")
141
+
142
+ soup = BeautifulSoup(response.content, 'html.parser')
143
+ img_tags = soup.find_all('img')
144
+
145
+ if not img_tags:
146
+ st.warning(f"No images found on {url}.")
147
+ return []
148
+
149
+ images = []
150
+ for i, img in enumerate(img_tags):
151
+ if i >= max_images:
152
+ break
153
+
154
+ img_url = img.get('src')
155
+ if img_url:
156
+ if not img_url.startswith(('http://', 'https://')):
157
+ img_url = urllib.parse.urljoin(url, img_url)
158
+ images.append(img_url)
159
+
160
+ return images
161
+
162
+ except requests.exceptions.RequestException as e:
163
+ st.error(f"An error occurred for {url}: {str(e)}")
164
+ return []
165
+
166
+ def download_images(selected_images):
167
+ try:
168
+ zip_buffer = io.BytesIO()
169
+ headers = {
170
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
171
+ 'Accept-Language': 'en-US,en;q=0.9',
172
+ 'Referer': url
173
+ }
174
+ with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
175
+ for i, img_url in enumerate(selected_images):
176
+ img_response = session.get(img_url, headers=headers, timeout=10)
177
+ img_response.raise_for_status()
178
+
179
+ content_type = img_response.headers.get('content-type', '').split(';')[0].strip()
180
+ file_extension = get_file_extension(content_type)
181
+
182
+ file_name = f'image_{i+1}{file_extension}'
183
+
184
+ zip_file.writestr(file_name, img_response.content)
185
+
186
+ zip_buffer.seek(0)
187
+ return zip_buffer
188
+ except requests.exceptions.RequestException as e:
189
+ st.error(f"An error occurred while downloading images: {str(e)}")
190
+ return None
191
+
192
+ # Initialize the requests session
193
+ session = requests.Session()
194
+
195
+ # Input fields for URLs
196
+ st.subheader("Enter Website URLs")
197
+ for i, url in enumerate(st.session_state.urls):
198
+ col1, col2 = st.columns([10, 1])
199
+ with col1:
200
+ st.session_state.urls[i] = st.text_input(f"URL {i+1}", value=url, key=f"url_{i}", help="Enter the URL of the website from which you want to download images.", placeholder="https://example.com", )
201
+ with col2:
202
+ if st.button("Remove", key=f"remove_{i}"):
203
+ remove_url(i)
204
+ st.experimental_rerun()
205
+
206
+ if st.button("Add URL"):
207
+ add_url()
208
+
209
+ max_images_per_url = st.number_input("Max images per URL:", min_value=1, value=10, step=1)
210
+
211
+ if st.button("Fetch Images", key="fetch"):
212
+ all_images = []
213
+ for url in st.session_state.urls:
214
+ if not is_valid_url(url):
215
+ st.warning(f"Invalid URL: {url}")
216
+ continue
217
+ with st.spinner(f"Fetching images from {url}..."):
218
+ images = fetch_images(url, max_images_per_url)
219
+ all_images.extend(images)
220
+ if all_images:
221
+ st.session_state.images = all_images
222
+ st.session_state.selected_images = [False] * len(all_images)
223
+ st.success(f"Found {len(all_images)} images in total. Select the images you want to download.")
224
+ else:
225
+ st.warning("No images found or could not fetch images from any of the provided URLs.")
226
+
227
+ if 'images' in st.session_state:
228
+ st.subheader("Fetched Images")
229
+
230
+ # Buttons for Select All and Clear Selection
231
+ col1, col2, col3 = st.columns([1, 1, 1])
232
+ with col1:
233
+ if st.button("Select All"):
234
+ st.session_state.selected_images = [True] * len(st.session_state.images)
235
+ with col2:
236
+ if st.button("Clear"):
237
+ st.session_state.selected_images = [False] * len(st.session_state.images)
238
+
239
+ # Calculate the number of columns
240
+ num_cols = 4
241
+ columns = st.columns(num_cols)
242
+
243
+ selected_images = []
244
+ for i, img_url in enumerate(st.session_state.images):
245
+ checkbox_key = f"check_{i}"
246
+
247
+ # Determine the column to place the image in
248
+ col = columns[i % num_cols]
249
+
250
+ # Display the image and checkbox in the determined column
251
+ with col:
252
+ st.session_state.selected_images[i] = st.checkbox("Select Image", key=checkbox_key, value=st.session_state.selected_images[i])
253
+ img_class = "selected" if st.session_state.selected_images[i] else ""
254
+ st.markdown(f"""
255
+ <div class="image-card {img_class}">
256
+ <div class="image-container">
257
+ <img src="{img_url}" alt="image_{i+1}">
258
+ </div>
259
+ <div class="image-info">
260
+ {f"image_{i+1}"}
261
+ </div>
262
+ </div>
263
+ """, unsafe_allow_html=True)
264
+ if st.session_state.selected_images[i]:
265
+ selected_images.append(img_url)
266
+
267
+ if selected_images:
268
+ if st.button("Download Selected Images"):
269
+ with st.spinner("Preparing download..."):
270
+ zip_buffer = download_images(selected_images)
271
+ if zip_buffer:
272
+ st.download_button(
273
+ label="Download ZIP",
274
+ data=zip_buffer,
275
+ file_name="selected_images.zip",
276
+ mime="application/zip"
277
+ )
278
+ else:
279
+ st.error("Failed to prepare the download. Please try again.")
280
+ else:
281
+ st.info("Select one or more images to download.")