Nicky Nicolson
commited on
Commit
•
16e5414
1
Parent(s):
c3dab06
Get download metadata from GBIF, write doi/licence to datasette metadata
Browse files- Dockerfile +1 -1
- getdDownloadMetadata.py +23 -0
- metadata.json +3 -1
- requirements.txt +2 -1
Dockerfile
CHANGED
@@ -25,6 +25,6 @@ RUN sqlite-utils enable-fts /code/gbifocc.db gbifocc collectorNameAndNumber
|
|
25 |
|
26 |
RUN chmod 755 /code/gbifocc.db
|
27 |
|
28 |
-
|
29 |
|
30 |
CMD ["datasette", "/code/gbifocc.db", "-m", "/code/metadata.json", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
25 |
|
26 |
RUN chmod 755 /code/gbifocc.db
|
27 |
|
28 |
+
RUN python getDownloadMetadata.py ./metadata.json /code/metadata.json --download_id=$GBIF_DOWNLOAD_ID
|
29 |
|
30 |
CMD ["datasette", "/code/gbifocc.db", "-m", "/code/metadata.json", "--host", "0.0.0.0", "--port", "7860"]
|
getdDownloadMetadata.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from pygbif import occurrences as occ
|
3 |
+
import json
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
parser = argparse.ArgumentParser()
|
7 |
+
parser.add_argument("inputfile")
|
8 |
+
parser.add_argument("--download_id", type=str)
|
9 |
+
parser.add_argument("outputfile")
|
10 |
+
|
11 |
+
args = parser.parse_args()
|
12 |
+
|
13 |
+
datasette_metadata = None
|
14 |
+
with open(args.inputfile, 'r') as f_in:
|
15 |
+
datasette_metadata = json.load(f_in)
|
16 |
+
|
17 |
+
gbif_metadata = occ.download_meta(key = args.download_id)
|
18 |
+
datasette_metadata['licence'] = gbif_metadata['license']
|
19 |
+
datasette_metadata['source_url'] = 'https://doi.org{}'.format(gbif_metadata['doi'])
|
20 |
+
|
21 |
+
datasette_metadata_json = json.dumps(datasette_metadata)
|
22 |
+
with open(args.outputfile, 'w') as f_out:
|
23 |
+
f_out.write(datasette_metadata_json)
|
metadata.json
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
{
|
2 |
-
"title": "
|
|
|
|
|
3 |
"databases": {
|
4 |
"gbifocc": {
|
5 |
"tables": {
|
|
|
1 |
{
|
2 |
+
"title": "GBIF-mediated specimen occurrences",
|
3 |
+
"description": "This is a datasette instance containing GBIF-mediated specimen occurrences. It can be used to browse specimen records (with options to filter and facet records) and to run SQL queries. It is also configured to run an Open Refine compatible reconciliation service on collector name and number, allowing a user to easily link specimen references (as found in taxonomic literature) to these specimen records.",
|
4 |
+
"source": "Global Biodiversity Information Facility (GBIF)",
|
5 |
"databases": {
|
6 |
"gbifocc": {
|
7 |
"tables": {
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ csvs-to-sqlite
|
|
6 |
pandas==1.5.3
|
7 |
bananompy
|
8 |
datasette-jellyfish
|
9 |
-
tqdm
|
|
|
|
6 |
pandas==1.5.3
|
7 |
bananompy
|
8 |
datasette-jellyfish
|
9 |
+
tqdm
|
10 |
+
pygbif
|