voj

Sleeping

App Files Files Community

amroa commited on Jun 8, 2024

Commit

9bed9a7

1 Parent(s): a9a9482

image download functionality working

Browse files

Files changed (2) hide show

ebird_taxonomy_v2023.csv +0 -0
fetch_img.py +65 -1

ebird_taxonomy_v2023.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

fetch_img.py CHANGED Viewed

@@ -2,4 +2,68 @@ import os
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
-import urllib.request

 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
+import urllib.request
+import pandas as pd
+import warnings
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+warnings.simplefilter('ignore', InsecureRequestWarning)
+REQ_FMT =  {
+    "url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
+    "params" : {
+        'species': 'CHANGE THIS TO SPECIES CODE'
+    },
+    "headers" : {
+        'X-eBirdApiToken': 'id1a0e3q2lt3'
+    }
+}
+bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")
+def scientific_to_species_code(scientific_name: str):
+    scode = bird_df.loc[bird_df['SCI_NAME'] == scientific_name]['SPECIES_CODE']
+    return scode.array[0]
+# Gets taxonomical info on bird. (Is not actually used)
+def get_bird_info(species_code : str):
+    REQ_FMT['params'] = {"species": species_code}
+    response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False)
+    data = response.content
+    return data
+def download_images(url, folder_path='assets'):
+    # Create a folder to save images if it doesn't exist
+    if not os.path.exists(folder_path):
+        os.makedirs(folder_path)
+    # Fetch the HTML content of the webpage
+    response = requests.get(url, verify=False)
+    if response.status_code != 200:
+        print(f"Failed to retrieve the page. Status code: {response.status_code}")
+        return
+    # Parse the HTML content
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # Find all image tags
+    img_tags = soup.find_all('img')
+    # Extract image URLs
+    img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]
+    img_urls = [el for el in img_urls if "api" in el and "asset" in el]
+    # Download each image
+    for i, img_url in enumerate(img_urls):
+        try:
+            img_path = os.path.join(folder_path, f'image_{i+1}.jpg')
+            urllib.request.urlretrieve(img_url, img_path)
+            print(f"Downloaded: {img_url}")
+        except Exception as e:
+            print(f"Failed to download {img_url}. Error: {e}")
+if __name__ == '__main__':
+    bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv")
+    scode = scientific_to_species_code("Melanocharis striativentris")
+    print(get_bird_info(scode))
+    download_images(f"https://ebird.org/species/{scode}")