Spaces:
biodivx
/
Sleeping

voj / fetch_img.py
amroa's picture
remove local storage of images
503f310
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import urllib.request
import pandas as pd
import warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning
warnings.simplefilter('ignore', InsecureRequestWarning)
REQ_FMT = {
"url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
"params" : {
'species': 'CHANGE THIS TO SPECIES CODE'
},
"headers" : {
'X-eBirdApiToken': 'id1a0e3q2lt3'
}
}
bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")
def scientific_to_species_code(scientific_name: str):
scode = bird_df[bird_df['SCI_NAME'].str.contains(scientific_name, na=False)]['SPECIES_CODE']
if not scode.array:
return []
else:
return scode.array[0]
# Gets taxonomical info on bird. (Is not actually used)
def get_bird_info(species_code : str):
REQ_FMT['params'] = {"species": species_code}
response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False)
data = response.content
return data
def download_images(url):
# Fetch the HTML content of the webpage
response = requests.get(url, verify=False)
if response.status_code != 200:
print(f"Failed to retrieve the page. Status code: {response.status_code}")
return
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Find all image tags
img_tags = soup.find_all('img')
# Extract image URLs
img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]
img_urls = [el for el in img_urls if "api" in el and "asset" in el]
return img_urls
if __name__ == '__main__':
bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv")
scode = scientific_to_species_code("Melanocharis striativentris")
print(get_bird_info(scode))
download_images(f"https://ebird.org/species/{scode}")