image download functionality working
Browse files- ebird_taxonomy_v2023.csv +0 -0
- fetch_img.py +65 -1
ebird_taxonomy_v2023.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
fetch_img.py
CHANGED
@@ -2,4 +2,68 @@ import os
|
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
4 |
from urllib.parse import urljoin
|
5 |
-
import urllib.request
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
4 |
from urllib.parse import urljoin
|
5 |
+
import urllib.request
|
6 |
+
import pandas as pd
|
7 |
+
import warnings
|
8 |
+
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
9 |
+
|
10 |
+
warnings.simplefilter('ignore', InsecureRequestWarning)
|
11 |
+
|
12 |
+
REQ_FMT = {
|
13 |
+
"url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
|
14 |
+
"params" : {
|
15 |
+
'species': 'CHANGE THIS TO SPECIES CODE'
|
16 |
+
},
|
17 |
+
"headers" : {
|
18 |
+
'X-eBirdApiToken': 'id1a0e3q2lt3'
|
19 |
+
}
|
20 |
+
}
|
21 |
+
bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")
|
22 |
+
|
23 |
+
|
24 |
+
def scientific_to_species_code(scientific_name: str):
|
25 |
+
scode = bird_df.loc[bird_df['SCI_NAME'] == scientific_name]['SPECIES_CODE']
|
26 |
+
return scode.array[0]
|
27 |
+
|
28 |
+
# Gets taxonomical info on bird. (Is not actually used)
|
29 |
+
def get_bird_info(species_code : str):
|
30 |
+
|
31 |
+
REQ_FMT['params'] = {"species": species_code}
|
32 |
+
response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False)
|
33 |
+
data = response.content
|
34 |
+
return data
|
35 |
+
|
36 |
+
def download_images(url, folder_path='assets'):
|
37 |
+
# Create a folder to save images if it doesn't exist
|
38 |
+
if not os.path.exists(folder_path):
|
39 |
+
os.makedirs(folder_path)
|
40 |
+
|
41 |
+
# Fetch the HTML content of the webpage
|
42 |
+
response = requests.get(url, verify=False)
|
43 |
+
if response.status_code != 200:
|
44 |
+
print(f"Failed to retrieve the page. Status code: {response.status_code}")
|
45 |
+
return
|
46 |
+
|
47 |
+
# Parse the HTML content
|
48 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
49 |
+
|
50 |
+
# Find all image tags
|
51 |
+
img_tags = soup.find_all('img')
|
52 |
+
|
53 |
+
# Extract image URLs
|
54 |
+
img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]
|
55 |
+
img_urls = [el for el in img_urls if "api" in el and "asset" in el]
|
56 |
+
# Download each image
|
57 |
+
for i, img_url in enumerate(img_urls):
|
58 |
+
try:
|
59 |
+
img_path = os.path.join(folder_path, f'image_{i+1}.jpg')
|
60 |
+
urllib.request.urlretrieve(img_url, img_path)
|
61 |
+
print(f"Downloaded: {img_url}")
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Failed to download {img_url}. Error: {e}")
|
64 |
+
|
65 |
+
if __name__ == '__main__':
|
66 |
+
bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv")
|
67 |
+
scode = scientific_to_species_code("Melanocharis striativentris")
|
68 |
+
print(get_bird_info(scode))
|
69 |
+
download_images(f"https://ebird.org/species/{scode}")
|