Spaces:
biodivx
/
Sleeping

amroa commited on
Commit
9bed9a7
·
1 Parent(s): a9a9482

image download functionality working

Browse files
Files changed (2) hide show
  1. ebird_taxonomy_v2023.csv +0 -0
  2. fetch_img.py +65 -1
ebird_taxonomy_v2023.csv ADDED
The diff for this file is too large to render. See raw diff
 
fetch_img.py CHANGED
@@ -2,4 +2,68 @@ import os
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urljoin
5
- import urllib.request
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urljoin
5
+ import urllib.request
6
+ import pandas as pd
7
+ import warnings
8
+ from requests.packages.urllib3.exceptions import InsecureRequestWarning
9
+
10
+ warnings.simplefilter('ignore', InsecureRequestWarning)
11
+
12
+ REQ_FMT = {
13
+ "url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
14
+ "params" : {
15
+ 'species': 'CHANGE THIS TO SPECIES CODE'
16
+ },
17
+ "headers" : {
18
+ 'X-eBirdApiToken': 'id1a0e3q2lt3'
19
+ }
20
+ }
21
+ bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")
22
+
23
+
24
+ def scientific_to_species_code(scientific_name: str):
25
+ scode = bird_df.loc[bird_df['SCI_NAME'] == scientific_name]['SPECIES_CODE']
26
+ return scode.array[0]
27
+
28
+ # Gets taxonomical info on bird. (Is not actually used)
29
+ def get_bird_info(species_code : str):
30
+
31
+ REQ_FMT['params'] = {"species": species_code}
32
+ response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False)
33
+ data = response.content
34
+ return data
35
+
36
+ def download_images(url, folder_path='assets'):
37
+ # Create a folder to save images if it doesn't exist
38
+ if not os.path.exists(folder_path):
39
+ os.makedirs(folder_path)
40
+
41
+ # Fetch the HTML content of the webpage
42
+ response = requests.get(url, verify=False)
43
+ if response.status_code != 200:
44
+ print(f"Failed to retrieve the page. Status code: {response.status_code}")
45
+ return
46
+
47
+ # Parse the HTML content
48
+ soup = BeautifulSoup(response.content, 'html.parser')
49
+
50
+ # Find all image tags
51
+ img_tags = soup.find_all('img')
52
+
53
+ # Extract image URLs
54
+ img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]
55
+ img_urls = [el for el in img_urls if "api" in el and "asset" in el]
56
+ # Download each image
57
+ for i, img_url in enumerate(img_urls):
58
+ try:
59
+ img_path = os.path.join(folder_path, f'image_{i+1}.jpg')
60
+ urllib.request.urlretrieve(img_url, img_path)
61
+ print(f"Downloaded: {img_url}")
62
+ except Exception as e:
63
+ print(f"Failed to download {img_url}. Error: {e}")
64
+
65
+ if __name__ == '__main__':
66
+ bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv")
67
+ scode = scientific_to_species_code("Melanocharis striativentris")
68
+ print(get_bird_info(scode))
69
+ download_images(f"https://ebird.org/species/{scode}")