giswqs commited on
Commit
1da1c98
1 Parent(s): 615838e

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ data/*.csv
12
+ .vscode/
13
+ private/
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ pip-wheel-metadata/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Qiusheng Wu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run streamlit_app.py
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import scholarpy
2
+ import streamlit as st
3
+ from multiapp import MultiApp
4
+ from apps import grant, home, journal, orcid, publication, researcher
5
+
6
+ st.set_page_config(layout="wide")
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+ apps = MultiApp()
12
+
13
+ # Add all your application here
14
+
15
+ apps.add_app("Home", home.app)
16
+ apps.add_app("Grant", grant.app)
17
+ apps.add_app("Journal", journal.app)
18
+ apps.add_app("Publication", publication.app)
19
+ apps.add_app("Researcher", researcher.app)
20
+ apps.add_app("ORCID", orcid.app)
21
+
22
+ # The main app
23
+ apps.run()
apps/google.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scholarpy
3
+ import tempfile
4
+ import pandas as pd
5
+ import streamlit as st
6
+ import leafmap.foliumap as leafmap
7
+ import plotly.express as px
8
+ from scholarly import scholarly
9
+
10
+ # if "dsl" not in st.session_state:
11
+ # st.session_state["dsl"] = scholarpy.Dsl()
12
+
13
+
14
+ def app():
15
+
16
+ st.title("Search Google Scholar")
17
+
18
+ row1_col1, row1_col2 = st.columns([1, 1])
19
+ placeholder = st.empty()
20
+
21
+ with row1_col1:
22
+ name = st.text_input("Enter a researcher name:", "")
23
+
24
+ if name:
25
+ placeholder.text("Searching...")
26
+ if name not in st.session_state:
27
+ authors = scholarpy.get_author_list(name)
28
+ st.session_state[name] = authors
29
+ else:
30
+ authors = st.session_state[name]
31
+ placeholder.empty()
32
+
33
+ if len(authors) == 0:
34
+ with row1_col1:
35
+ st.write("No results found")
36
+ else:
37
+ with row1_col1:
38
+ st.write("Found {} results:".format(len(authors)))
39
+
40
+ author = st.selectbox("Select a researcher:", authors)
41
+
42
+ if author:
43
+ placeholder.text("Retrieving data...")
44
+ id = author.split("|")[1].strip()
45
+ if id not in st.session_state:
46
+ record = scholarpy.get_author_record(id=id)
47
+ st.session_state[id] = record
48
+ else:
49
+ record = st.session_state[id]
50
+ basics = scholarpy.get_author_basics(
51
+ record=record, return_df=True)
52
+ out_csv = os.path.join(tempfile.gettempdir(), "basics.csv")
53
+ basics.to_csv(out_csv, sep="\t", index=False)
54
+ df = pd.read_csv(out_csv, sep="\t")
55
+ with row1_col1:
56
+ st.header("Basic information")
57
+ markdown = f"""Google Scholar Profile: <https://scholar.google.com/citations?user={id}>"""
58
+ st.markdown(markdown)
59
+ if "url_picture" in record and len(record["url_picture"]) > 0:
60
+ st.image(record["url_picture"])
61
+ st.dataframe(df)
62
+ leafmap.st_download_button(
63
+ "Download data", df, csv_sep="\t")
64
+
65
+ pubs = scholarpy.get_author_pubs(record=record, return_df=True)
66
+ with row1_col1:
67
+ st.header("Publications")
68
+ st.text(f"Total number of publications: {len(pubs)}")
69
+ st.dataframe(pubs)
70
+ leafmap.st_download_button(
71
+ "Download data", pubs, csv_sep="\t")
72
+
73
+ pubs_stats, pubs_fig = scholarpy.author_pubs_by_year(
74
+ record=record, return_plot=True)
75
+ citations_stats, citations_fig = scholarpy.author_citations_by_year(
76
+ record=record, return_plot=True)
77
+
78
+ with row1_col2:
79
+ st.header("Plots")
80
+ st.plotly_chart(pubs_fig)
81
+ leafmap.st_download_button("Download data", pubs_stats,
82
+ file_name="data.csv", csv_sep="\t")
83
+ st.plotly_chart(citations_fig)
84
+ leafmap.st_download_button(
85
+ "Download data", citations_stats, file_name="data.csv", csv_sep="\t")
86
+ if len(record["coauthors"]) > 0:
87
+ st.header("Co-authors")
88
+ st.text(
89
+ "Co-authors listed on Google Scholar profile only.")
90
+ coauthors = scholarpy.get_author_coauthors(
91
+ record=record, return_df=True)
92
+ st.dataframe(coauthors)
93
+ leafmap.st_download_button(
94
+ "Download data", coauthors, file_name="data.csv", csv_sep="\t")
95
+ placeholder.empty()
apps/grant.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scholarpy
3
+ import pandas as pd
4
+ import streamlit as st
5
+ import leafmap.foliumap as leafmap
6
+ import plotly.express as px
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+
12
+ def app():
13
+ st.title("Search Grants")
14
+ dsl = st.session_state["dsl"]
15
+
16
+ (
17
+ row1_col1,
18
+ row1_col2,
19
+ row1_col3,
20
+ row1_col4,
21
+ row1_col5,
22
+ ) = st.columns([1, 0.5, 1, 1, 1])
23
+
24
+ (
25
+ row2_col1,
26
+ row2_col2,
27
+ row2_col3,
28
+ row2_col4,
29
+ row2_col5,
30
+ ) = st.columns([1, 0.5, 1, 1, 1])
31
+
32
+ with row1_col1:
33
+ keywords = st.text_input("Enter a keyword to search for")
34
+
35
+ with row1_col2:
36
+ exact_match = st.checkbox("Exact match", True)
37
+
38
+ with row1_col3:
39
+ scope = st.selectbox(
40
+ "Select a search scope",
41
+ [
42
+ "concepts",
43
+ "full_data",
44
+ "investigators",
45
+ "title_abstract_only",
46
+ "title_only",
47
+ ],
48
+ index=4,
49
+ )
50
+ with row1_col4:
51
+ years = st.slider("Select the start and end year:", 1950, 2030, (2010, 2025))
52
+
53
+ with row1_col5:
54
+ limit = st.slider("Select the number of grants to return", 1, 1000, 100)
55
+
56
+ if keywords:
57
+ result = dsl.search_grants_by_keyword(
58
+ keywords,
59
+ exact_match,
60
+ scope,
61
+ start_year=years[0],
62
+ end_year=years[1],
63
+ limit=limit,
64
+ )
65
+ df = scholarpy.json_to_df(result)
66
+ if limit > result.count_total:
67
+ limit = result.count_total
68
+ markdown = f"""
69
+ Returned grants: {limit} (total = {result.count_total})
70
+
71
+ """
72
+ with row2_col1:
73
+ st.markdown(markdown)
74
+
75
+ with row2_col2:
76
+ filter = st.checkbox("Apply a filter")
77
+
78
+ if filter:
79
+ countries = []
80
+ for row in df.itertuples():
81
+ countries.append(eval(row.funder_countries)[0]["name"])
82
+ df["funder_country"] = countries
83
+ with row2_col3:
84
+ filter_by = st.selectbox(
85
+ "Select a filter",
86
+ [
87
+ "funder_country",
88
+ "funding_org_name",
89
+ "funding_org_acronym",
90
+ "research_org_name",
91
+ ],
92
+ )
93
+
94
+ df["funding_org_acronym"] = df["funding_org_acronym"].astype(str)
95
+ df["research_org_name"] = df["research_org_name"].astype(str)
96
+ options = df[filter_by].unique()
97
+ options.sort()
98
+
99
+ with row2_col4:
100
+ selected = st.selectbox("Select a filter value", options)
101
+ df = df[df[filter_by] == selected]
102
+
103
+ with row2_col5:
104
+ st.write("")
105
+
106
+ if df is not None:
107
+ st.dataframe(df)
108
+ leafmap.st_download_button("Download data", df, csv_sep="\t")
apps/h_index.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dimcli
2
+ import pandas as pd
3
+ import sys
4
+ import os
5
+ import streamlit as st
6
+ import scholarpy
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+
12
+ @st.cache
13
+ def the_H_function(sorted_citations_list, n=1):
14
+ """from a list of integers [n1, n2 ..] representing publications citations,
15
+ return the max list-position which is >= integer
16
+
17
+ eg
18
+ >>> the_H_function([10, 8, 5, 4, 3]) => 4
19
+ >>> the_H_function([25, 8, 5, 3, 3]) => 3
20
+ >>> the_H_function([1000, 20]) => 2
21
+ """
22
+ if sorted_citations_list and sorted_citations_list[0] >= n:
23
+ return the_H_function(sorted_citations_list[1:], n + 1)
24
+ else:
25
+ return n - 1
26
+
27
+
28
+ def dim_login(key=None, endpoint=None):
29
+
30
+ if key is None:
31
+ KEY = os.environ.get("DIM_TOKEN")
32
+
33
+ if endpoint is None:
34
+ ENDPOINT = "https://app.dimensions.ai"
35
+
36
+ try:
37
+ dimcli.login(key=KEY, endpoint=ENDPOINT)
38
+ dsl = dimcli.Dsl()
39
+ return dsl
40
+ except:
41
+ raise Exception("Failed to login to Dimensions")
42
+
43
+
44
+ @st.cache
45
+ def get_pubs_df(dsl, researcher_id):
46
+
47
+ q = """search publications where researchers.id = "{}" return publications[id+title+doi+times_cited] sort by times_cited limit 1000"""
48
+
49
+ pubs = dsl.query(q.format(researcher_id))
50
+ return pubs.as_dataframe()
51
+
52
+
53
+ @st.cache
54
+ def get_citations(df):
55
+ return list(df.fillna(0)["times_cited"])
56
+
57
+
58
+ def app():
59
+
60
+ dsl = st.session_state["dsl"]
61
+
62
+ researchER_id = st.text_input("Enter researcher ID:", "ur.013632443777.66")
63
+ df = get_pubs_df(dsl, researchER_id)
64
+ st.dataframe(df)
65
+ citations = get_citations(df)
66
+ h_index = the_H_function(citations)
67
+ st.write(f"H-index: {h_index}")
apps/home.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ def app():
5
+ st.title("Home")
6
+ st.text(
7
+ "Welcome to the Scholar Web App. Click on the left sidebar menu to explore."
8
+ )
9
+ markdown = """
10
+ Disclaimer: The data records are pulled from the [Dimensions.ai database](https://app.dimensions.ai), which might not be the most complete bibliometric database.
11
+ We plan to incorporate [Scopus](https://www.scopus.com) and [Google Scholar](https://scholar.google.com) in the near future. Don't be surprised if you see that
12
+ your publication records are not the same as your Google Scholar profile. This is a very preliminary version. A lot more features will be added in the future.
13
+ We would welcome any feedback. Please send feedback to Qiusheng Wu (qwu18@utk.edu).
14
+ """
15
+ st.info(markdown)
16
+ st.image("https://i.imgur.com/ZNUJ9fF.gif")
apps/journal.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import dimcli
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import streamlit as st
7
+ import scholarpy
8
+ import leafmap.foliumap as leafmap
9
+
10
+ if "dsl" not in st.session_state:
11
+ st.session_state["dsl"] = scholarpy.Dsl()
12
+
13
+ # create output data folder
14
+ FOLDER_NAME = "data"
15
+ if not (os.path.exists(FOLDER_NAME)):
16
+ os.mkdir(FOLDER_NAME)
17
+
18
+
19
+ def save(df, filename_dot_csv):
20
+ df.to_csv(FOLDER_NAME + "/" + filename_dot_csv, index=False)
21
+
22
+
23
+ def read(filename_dot_csv):
24
+ df = pd.read_csv(FOLDER_NAME + "/" + filename_dot_csv)
25
+ return df
26
+
27
+
28
+ @st.cache
29
+ def get_token():
30
+
31
+ return os.environ.get("DIM_TOKEN")
32
+
33
+
34
+ @st.cache
35
+ def get_journals():
36
+
37
+ with open("data/journals.json") as f:
38
+ journals = json.load(f)
39
+
40
+ return journals
41
+
42
+
43
+ @st.cache
44
+ def read_excel(sheet_name):
45
+
46
+ df = pd.read_excel(
47
+ "data/journals.xlsx", sheet_name=sheet_name, index_col=False, engine="openpyxl"
48
+ )
49
+ df.set_index("Rank", inplace=True)
50
+ return df
51
+
52
+
53
+ def app():
54
+
55
+ st.title("Search Journals")
56
+ dsl = st.session_state["dsl"]
57
+ search_type = st.radio(
58
+ "Select a search type",
59
+ ["Search by journal title", "List Google Scholar journal categories"],
60
+ )
61
+
62
+ if search_type == "Search by journal title":
63
+ row1_col1, row1_col2, row1_col3, _ = st.columns([1, 1, 2, 1])
64
+ with row1_col1:
65
+ name = st.text_input("Enter a journal title")
66
+
67
+ with row1_col2:
68
+ exact_match = st.checkbox("Exact match")
69
+
70
+ with row1_col3:
71
+ options = [
72
+ "book",
73
+ "book_series",
74
+ "proceeding",
75
+ "journal",
76
+ "preprint_platform",
77
+ ]
78
+ types = st.multiselect(
79
+ "Select journal types", options, ["journal", "book_series"]
80
+ )
81
+
82
+ if name:
83
+ result = dsl.search_journal_by_title(name, exact_match=exact_match)
84
+ if result is not None:
85
+ titles = result.as_dataframe()
86
+ titles = titles[titles["type"].isin(types)]
87
+ titles.sort_values("title", inplace=True)
88
+ else:
89
+ titles = pd.DataFrame()
90
+ # titles = titles.astype({"start_year": int})
91
+ if not titles.empty:
92
+
93
+ markdown = f"""
94
+ Returned Journals: {len(titles)}
95
+
96
+ """
97
+ st.markdown(markdown)
98
+
99
+ st.dataframe(titles)
100
+ titles["uid"] = (
101
+ titles["id"] + " | " + titles["type"] + " | " + titles["title"]
102
+ )
103
+
104
+ row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
105
+ [2.4, 1, 0.6, 1, 1]
106
+ )
107
+
108
+ with row2_col1:
109
+ title = st.selectbox(
110
+ "Select a journal title", titles["uid"].values.tolist()
111
+ )
112
+
113
+ with row2_col2:
114
+ keyword = st.text_input("Enter a keyword to search for")
115
+
116
+ with row2_col3:
117
+ exact_match = st.checkbox("Exact match", True)
118
+
119
+ with row2_col4:
120
+ scope = st.selectbox(
121
+ "Select a search scope",
122
+ [
123
+ "authors",
124
+ "concepts",
125
+ "full_data",
126
+ "full_data_exact",
127
+ "title_abstract_only",
128
+ "title_only",
129
+ ],
130
+ index=5,
131
+ )
132
+
133
+ with row2_col5:
134
+ years = st.slider(
135
+ "Select the start and end year:", 1950, 2021, (1980, 2021)
136
+ )
137
+
138
+ if title:
139
+ journal_id = title.split(" | ")[0]
140
+ if keyword:
141
+ pubs = dsl.search_pubs_by_keyword(
142
+ keyword, exact_match, scope, years[0], years[1], journal_id
143
+ )
144
+ else:
145
+ pubs = dsl.search_pubs_by_journal_id(
146
+ journal_id, years[0], years[1]
147
+ )
148
+ pubs_df = pubs.as_dataframe()
149
+ if pubs_df is not None and (not pubs_df.empty):
150
+ st.write(
151
+ f"Total number of pulications: {pubs.count_total:,}. Display {min(pubs.count_total, 1000)} publications below."
152
+ )
153
+ try:
154
+ st.dataframe(pubs_df)
155
+ except Exception as e:
156
+ st.dataframe(scholarpy.json_to_df(pubs))
157
+ # st.error("An error occurred: " + str(e))
158
+ leafmap.st_download_button(
159
+ "Download data", pubs_df, csv_sep="\t"
160
+ )
161
+ else:
162
+ st.text("No results found")
163
+
164
+ elif search_type == "List Google Scholar journal categories":
165
+
166
+ st.markdown(
167
+ """
168
+ The journal categories are adopted from [Google Scholar](https://scholar.google.com/citations?view_op=top_venues&hl=en&inst=9897619243961157265).
169
+ See the list of journals [here](https://docs.google.com/spreadsheets/d/1uCEi3TsJCWl9QEZimvjlM8wjt7hNq3QvMqHGeT44HXQ/edit?usp=sharing).
170
+ """
171
+ )
172
+
173
+ st.session_state["orcids"] = None
174
+ # dsl = st.session_state["dsl"]
175
+ # token = get_token()
176
+ # dimcli.login(key=token, endpoint="https://app.dimensions.ai")
177
+ # dsl = dimcli.Dsl()
178
+
179
+ categories = get_journals()
180
+
181
+ row1_col1, row1_col2, _, row1_col3 = st.columns([1, 1, 0.05, 1])
182
+
183
+ with row1_col1:
184
+ category = st.selectbox("Select a category:", categories.keys())
185
+
186
+ if category:
187
+ with row1_col2:
188
+ journal = st.selectbox("Select a journal:", categories[category].keys())
189
+
190
+ with row1_col3:
191
+ years = st.slider(
192
+ "Select the start and end year:", 1950, 2021, (1980, 2021)
193
+ )
194
+
195
+ if journal:
196
+ pubs = read_excel(sheet_name=category)
197
+ with st.expander("Show journal metrics"):
198
+ st.dataframe(pubs)
199
+
200
+ journal_id = categories[category][journal]
201
+ if journal_id is not None and str(journal_id).startswith("jour"):
202
+ q_template = """search publications where
203
+ journal.id="{}" and
204
+ year>={} and
205
+ year<={}
206
+ return publications[id+title+doi+year+authors+type+pages+journal+issue+volume+altmetric+times_cited]
207
+ limit 1000"""
208
+ q = q_template.format(journal_id, years[0], years[1])
209
+ else:
210
+ q_template = """search publications where
211
+ journal.title="{}" and
212
+ year>={} and
213
+ year<={}
214
+ return publications[id+title+doi+year+authors+type+pages+journal+issue+volume+altmetric+times_cited]
215
+ limit 1000"""
216
+ q = q_template.format(journal, years[0], years[1])
217
+
218
+ pubs = dsl.query(q)
219
+ if pubs.count_total > 0:
220
+ st.header("Publications")
221
+ st.write(
222
+ f"Total number of pulications: {pubs.count_total:,}. Display 1,000 publications below."
223
+ )
224
+ df_pubs = pubs.as_dataframe()
225
+ save(df_pubs, "publications.csv")
226
+ df_pubs = read("publications.csv")
227
+ st.dataframe(df_pubs)
228
+
229
+ st.header("Authors")
230
+ authors = pubs.as_dataframe_authors()
231
+ st.write(
232
+ f"Total number of authors of the 1,000 pubs shown above: {authors.shape[0]:,}"
233
+ )
234
+ save(authors, "authors.csv")
235
+ df_authors = read("authors.csv")
236
+ st.dataframe(df_authors)
237
+
238
+ df_authors_orcid = df_authors[~df_authors["orcid"].isna()]
239
+ # st.dataframe(df_authors_orcid)
240
+ orcids = list(set(df_authors_orcid["orcid"].values.tolist()))
241
+ orcids = [i[2:21] for i in orcids]
242
+ orcids.sort()
243
+ # st.write(orcids)
244
+ st.session_state["orcids"] = orcids
245
+
246
+ st.header("Affiliations")
247
+ affiliations = pubs.as_dataframe_authors_affiliations()
248
+ st.write(
249
+ f"Total number of affiliations of the 1,000 pubs shown above: {affiliations.shape[0]:,}"
250
+ )
251
+ save(affiliations, "affiliations.csv")
252
+ df_affiliations = read("affiliations.csv")
253
+ st.dataframe(df_affiliations)
254
+
255
+ researchers = authors.query("researcher_id!=''")
256
+ #
257
+ df_researchers = pd.DataFrame(
258
+ {
259
+ "measure": [
260
+ "Authors in total (non unique)",
261
+ "Authors with a researcher ID",
262
+ "Authors with a researcher ID (unique)",
263
+ ],
264
+ "count": [
265
+ len(authors),
266
+ len(researchers),
267
+ researchers["researcher_id"].nunique(),
268
+ ],
269
+ }
270
+ )
271
+ fig_researchers = px.bar(
272
+ df_researchers,
273
+ x="measure",
274
+ y="count",
275
+ title=f"Author Research ID stats for {journal} ({years[0]}-{years[1]})",
276
+ )
277
+
278
+ orcids = authors.query("orcid!=''")
279
+ #
280
+ df_orcids = pd.DataFrame(
281
+ {
282
+ "measure": [
283
+ "Authors in total (non unique)",
284
+ "Authors with a ORCID",
285
+ "Authors with a ORCID (unique)",
286
+ ],
287
+ "count": [
288
+ len(authors),
289
+ len(orcids),
290
+ orcids["orcid"].nunique(),
291
+ ],
292
+ }
293
+ )
294
+ fig_orcids = px.bar(
295
+ df_orcids,
296
+ x="measure",
297
+ y="count",
298
+ title=f"Author ORCID stats for {journal} ({years[0]}-{years[1]})",
299
+ )
300
+
301
+ st.header("Stats")
302
+
303
+ row2_col1, row1_col2 = st.columns(2)
304
+ with row2_col1:
305
+ st.plotly_chart(fig_researchers)
306
+ with row1_col2:
307
+ st.plotly_chart(fig_orcids)
308
+
309
+ else:
310
+ st.warning("No publications found")
apps/orcid.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from geemap.common import ee_initialize
2
+ import requests
3
+ import folium
4
+ import scholarpy
5
+ import streamlit as st
6
+ import geemap.foliumap as geemap
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+
12
+ def get_orcid_data(orcid, info_type=None):
13
+ """Retrieve ORCID data based on an ORCID and information type.
14
+
15
+ Args:
16
+ orcid (str): The ORCID to retrieve data for, e.g., 0000-0001-5437-4073
17
+ info_type (str): The type of information to retrieve, e.g., educations, employments, works
18
+
19
+ Returns:
20
+ dict: The ORCID data as a dictionary.
21
+ """
22
+ headers = {
23
+ "Accept": "application/vnd.orcid+json",
24
+ }
25
+
26
+ if info_type is not None:
27
+ url = f"https://pub.orcid.org/v3.0/{orcid}/{info_type}"
28
+ else:
29
+ url = f"https://pub.orcid.org/v3.0/{orcid}"
30
+
31
+ response = requests.get(url, headers=headers)
32
+ return response.json()
33
+
34
+
35
+ def get_education_data(orcid):
36
+
37
+ result = get_orcid_data(orcid, "educations")
38
+ affiliations = result["affiliation-group"]
39
+ info_dict = {}
40
+
41
+ try:
42
+
43
+ for affiliation in affiliations:
44
+ summary = affiliation["summaries"][0]["education-summary"]
45
+ name = summary["source"]["source-name"]["value"]
46
+ role = summary["role-title"]
47
+
48
+ organization = summary["organization"]["name"]
49
+ start_year = summary["start-date"]["year"]["value"]
50
+ end_year = summary["end-date"]["year"]["value"]
51
+ # start_date = (
52
+ # summary["start-date"]["year"]["value"]
53
+ # + "-"
54
+ # + summary["start-date"]["month"]["value"]
55
+ # + "-"
56
+ # + summary["start-date"]["day"]["value"]
57
+ # )
58
+ # end_date = (
59
+ # summary["end-date"]["year"]["value"]
60
+ # + "-"
61
+ # + summary["end-date"]["month"]["value"]
62
+ # + "-"
63
+ # + summary["end-date"]["day"]["value"]
64
+ # )
65
+ city = summary["organization"]["address"]["city"]
66
+ region = summary["organization"]["address"]["region"]
67
+ country = summary["organization"]["address"]["country"]
68
+ address_list = [city, region, country]
69
+ address = ", ".join([i for i in address_list if i])
70
+
71
+ # address = city + ", " + region + ", " + country
72
+ coords = geemap.geocode(address)[0]
73
+ lat = coords.lat
74
+ lng = coords.lng
75
+
76
+ info_dict[role] = {
77
+ "name": name,
78
+ "organization": organization,
79
+ "start_year": start_year,
80
+ "end_year": end_year,
81
+ "city": city,
82
+ "region": region,
83
+ "country": country,
84
+ "address": address,
85
+ "lat": lat,
86
+ "lng": lng,
87
+ }
88
+ except:
89
+ pass
90
+
91
+ return info_dict
92
+
93
+
94
+ def app():
95
+
96
+ dsl = st.session_state["dsl"]
97
+ st.title("Retrieve ORCID Data")
98
+ m = geemap.Map(center=(20, 0), zoom=2, ee_initialize=False)
99
+
100
+ row1_col1, row1_col2 = st.columns(2)
101
+
102
+ with row1_col1:
103
+ name = st.text_input("Enter a researcher name", "")
104
+
105
+ if name:
106
+ orcids = dsl.search_orcid_by_name(name, return_list=True)
107
+ with row1_col2:
108
+ if orcids is not None:
109
+ selected = st.selectbox("Select an ORCID", orcids)
110
+ else:
111
+ selected = None
112
+ st.write("No ORCID found.")
113
+
114
+ # orcids = ["0000-0001-5437-4073", "0000-0001-6157-5519"]
115
+ # if st.session_state.get("orcids", []) is not None:
116
+ # orcids = orcids + st.session_state.get("orcids", [])
117
+ # selected_orcid = st.selectbox("Select an ORCID:", orcids)
118
+
119
+ # with row1_col2:
120
+ # orcid = st.text_input("Enter an ORCID:", selected_orcid)
121
+
122
+ row2_col1, row2_col2 = st.columns([1, 1])
123
+
124
+ if selected is not None:
125
+ orcid = selected.split("|")[1].strip()
126
+ education_data = get_education_data(orcid)
127
+ roles = list(education_data.keys())
128
+
129
+ for role in roles:
130
+ popup = f"<b>Name: </b>{education_data[role]['name']}<br><b>Organization: </b>{education_data[role]['organization']}<br><b>Degree: </b>{role}"
131
+ marker = folium.Marker(
132
+ [education_data[role]["lat"], education_data[role]["lng"]],
133
+ popup=popup,
134
+ )
135
+ marker.add_to(m)
136
+
137
+ with row2_col1:
138
+ markdown = f"""ORCID URL: <https://orcid.org/{orcid}>"""
139
+ st.markdown(markdown)
140
+ if len(education_data) > 0:
141
+ st.write("Education:")
142
+ st.write(education_data)
143
+ else:
144
+ st.write("No education data found.")
145
+ with row2_col2:
146
+ m.to_streamlit()
apps/organization.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scholarpy
3
+ import pandas as pd
4
+ import streamlit as st
5
+ import leafmap.foliumap as leafmap
6
+ import plotly.express as px
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+
12
+ def app():
13
+
14
+ st.title("Search Organizations")
15
+ dsl = st.session_state["dsl"]
16
+ row1_col1, row1_col2 = st.columns([1, 1])
17
+
18
+ with row1_col1:
19
+ name = st.text_input("Enter an organization name:", "")
20
+
21
+ if name:
22
+ orgs = dsl.search_org_by_name(
23
+ name, exact_match=False, return_list=True)
24
+
25
+ if orgs is not None:
26
+ with row1_col1:
27
+ selected_org = st.selectbox("Select a organization id:", orgs)
28
+ org_id = selected_org.split("|")[0].strip()
29
+
30
+ id_info = dsl.search_org_by_id(org_id)
31
+
32
+ info_df = scholarpy.json_to_df(id_info, transpose=True)
33
+ info_df.rename(
34
+ columns={info_df.columns[0]: "Type",
35
+ info_df.columns[1]: "Value"},
36
+ inplace=True,
37
+ )
38
+ with row1_col1:
39
+ st.header("Organization Information")
40
+ if not info_df.empty:
41
+ st.dataframe(info_df)
42
+ leafmap.st_download_button(
43
+ "Download data", info_df, csv_sep="\t"
44
+ )
45
+ else:
46
+ st.text("No information found")
47
+
48
+ with row1_col2:
49
+ years = st.slider(
50
+ "Select the start and end year:", 1950, 2030, (1980, 2021))
51
+ st.header("Publications by year")
52
+
53
+ pubs, fig = dsl.org_pubs_annual_stats(
54
+ org_id, start_year=years[0], end_year=years[1], return_plot=True)
55
+
56
+ st.text(
57
+ f'Total number of publications: {pubs["count"].sum():,}')
58
+
59
+ if fig is not None:
60
+ st.plotly_chart(fig)
61
+
62
+ leafmap.st_download_button(
63
+ "Download data",
64
+ pubs,
65
+ file_name="data.csv",
66
+ csv_sep="\t",
67
+ )
68
+ else:
69
+ st.text("No publications found")
70
+
71
+ with row1_col1:
72
+ st.header("Top funders")
73
+
74
+ funder_count = st.slider(
75
+ "Select the number of funders:", 1, 100, 20)
76
+
77
+ funders, fig = dsl.org_grant_funders(
78
+ org_id, limit=funder_count, return_plot=True)
79
+ st.text(
80
+ f'Total funding amount: ${funders["funding"].sum():,}')
81
+ if fig is not None:
82
+ st.plotly_chart(fig)
83
+ leafmap.st_download_button(
84
+ "Download data",
85
+ funders,
86
+ file_name="data.csv",
87
+ csv_sep="\t",
88
+ )
89
+ else:
90
+ st.text("No funders found")
91
+
92
+ with row1_col2:
93
+ st.header("The number of grants by year")
94
+ grants, fig_count, fig_amount = dsl.org_grants_annual_stats(
95
+ org_id, start_year=years[0], end_year=years[1], return_plot=True)
96
+
97
+ st.plotly_chart(fig_count)
98
+ st.plotly_chart(fig_amount)
99
+ leafmap.st_download_button(
100
+ "Download data",
101
+ grants,
102
+ file_name="data.csv",
103
+ csv_sep="\t",
104
+ )
105
+
106
+ with row1_col1:
107
+ st.header("List of grants")
108
+ st.text("Only the first 1000 grants are shown")
109
+ result = dsl.search_grants_by_org(
110
+ org_id, start_year=years[0], end_year=years[1])
111
+ df = result.as_dataframe()
112
+ if not df.empty:
113
+ st.dataframe(df)
114
+ leafmap.st_download_button(
115
+ "Download data", df, file_name="data.csv", csv_sep="\t"
116
+ )
117
+
118
+ with row1_col1:
119
+ st.header("Publications most cited in last 2 years")
120
+ result = dsl.org_pubs_most_cited(org_id, recent=True, limit=100)
121
+ df = scholarpy.json_to_df(result, transpose=False)
122
+ if not df.empty:
123
+ st.dataframe(df)
124
+ leafmap.st_download_button(
125
+ "Download data", df, file_name="data.csv", csv_sep="\t"
126
+ )
127
+
128
+ with row1_col2:
129
+ st.header("Publications most cited - all time")
130
+ result = dsl.org_pubs_most_cited(org_id, recent=False, limit=100)
131
+ df = scholarpy.json_to_df(result, transpose=False)
132
+ if not df.empty:
133
+ st.dataframe(df)
134
+ leafmap.st_download_button(
135
+ "Download data", df, file_name="data.csv", csv_sep="\t"
136
+ )
137
+
138
+ df, area_fig, journal_fig = dsl.org_pubs_top_areas(org_id, return_plot=True)
139
+ if not df.empty:
140
+ with row1_col1:
141
+ st.header("Research areas of most cited publications")
142
+ st.plotly_chart(area_fig)
143
+ # leafmap.st_download_button(
144
+ # "Download data", df, file_name="data.csv", csv_sep="\t"
145
+ # )
146
+ with row1_col2:
147
+ st.header("Journals of most cited publications")
148
+ st.plotly_chart(journal_fig)
149
+ leafmap.st_download_button(
150
+ "Download data", df, file_name="data.csv", csv_sep="\t"
151
+ )
152
+ else:
153
+ st.text("No organizations found")
apps/publication.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scholarpy
3
+ import pandas as pd
4
+ import streamlit as st
5
+ import leafmap.foliumap as leafmap
6
+ import plotly.express as px
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+
12
+ def app():
13
+ st.title("Search Publications")
14
+ dsl = st.session_state["dsl"]
15
+
16
+ (
17
+ row1_col1,
18
+ row1_col2,
19
+ row1_col3,
20
+ row1_col4,
21
+ row1_col5,
22
+ ) = st.columns([1, 0.7, 1, 1, 1])
23
+
24
+ row2_col1, row2_col2, row2_col3, row2_col4, row2_col5 = st.columns(
25
+ [1, 0.7, 1, 1, 1]
26
+ )
27
+
28
+ with row1_col1:
29
+ keywords = st.text_input("Enter a keyword to search for")
30
+
31
+ with row1_col2:
32
+ exact_match = st.checkbox("Exact match", True)
33
+
34
+ with row1_col3:
35
+ scope = st.selectbox(
36
+ "Select a search scope",
37
+ [
38
+ "authors",
39
+ "concepts",
40
+ "full_data",
41
+ "full_data_exact",
42
+ "title_abstract_only",
43
+ "title_only",
44
+ ],
45
+ index=5,
46
+ )
47
+
48
+ with row1_col4:
49
+ years = st.slider("Select the start and end year:", 1950, 2030, (1980, 2022))
50
+
51
+ with row1_col5:
52
+ limit = st.slider("Select the number of publications to return", 1, 1000, 100)
53
+
54
+ if keywords:
55
+ result = dsl.search_pubs_by_keyword(
56
+ keywords,
57
+ exact_match,
58
+ scope,
59
+ start_year=years[0],
60
+ end_year=years[1],
61
+ limit=limit,
62
+ )
63
+
64
+ df = scholarpy.json_to_df(result)
65
+ affiliations = result.as_dataframe_authors_affiliations()
66
+ country_df = affiliations.groupby(['pub_id'])['aff_country'].unique()
67
+ df = df.merge(country_df, left_on='id', right_on='pub_id')
68
+
69
+ countries = [c[c.astype(bool)].size for c in df['aff_country']]
70
+ df['country_count'] = countries
71
+
72
+ journal_counts = df.copy()["journal.title"].value_counts()
73
+ if limit > result.count_total:
74
+ limit = result.count_total
75
+ markdown = f"""
76
+ Returned Publications: {limit} (total = {result.count_total})
77
+
78
+ """
79
+
80
+ with row2_col1:
81
+ st.markdown(markdown)
82
+
83
+ with row2_col2:
84
+ filter = st.checkbox("Filter by journal")
85
+
86
+ if filter:
87
+ df["journal.title"] = df["journal.title"].astype(str)
88
+ journals = df["journal.title"].unique()
89
+ journals.sort()
90
+ with row2_col3:
91
+ journal = st.selectbox("Select a journal", journals)
92
+ df = df[df["journal.title"] == journal]
93
+
94
+ with row2_col4:
95
+ st.write("")
96
+
97
+ with row2_col5:
98
+ st.write("")
99
+
100
+ if df is not None:
101
+ st.dataframe(df)
102
+ leafmap.st_download_button("Download data", df, csv_sep="\t")
103
+
104
+ summary = pd.DataFrame(
105
+ {"Journal": journal_counts.index, "Count": journal_counts}
106
+ ).reset_index(drop=True)
107
+ markdown = f"""
108
+ - Total number of journals: **{len(summary)}**
109
+ """
110
+
111
+ row3_col1, row3_col2 = st.columns([1, 1])
112
+
113
+ with row3_col1:
114
+ st.markdown(markdown)
115
+ st.dataframe(summary)
116
+ leafmap.st_download_button("Download data", summary, csv_sep="\t")
117
+
118
+ with row3_col2:
119
+ fig = px.box(df, x='year', y='country_count', title='Country Counts')
120
+ st.plotly_chart(fig)
121
+
122
+
apps/researcher.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scholarpy
3
+ import pandas as pd
4
+ import streamlit as st
5
+ import leafmap.foliumap as leafmap
6
+ import plotly.express as px
7
+
8
+ if "dsl" not in st.session_state:
9
+ st.session_state["dsl"] = scholarpy.Dsl()
10
+
11
+
12
+ @st.cache(allow_output_mutation=True)
13
+ def get_geonames():
14
+ return scholarpy.get_geonames()
15
+
16
+
17
+ def json_to_df(json_data, transpose=False):
18
+ df = json_data.as_dataframe()
19
+ if not df.empty:
20
+ if transpose:
21
+ df = df.transpose()
22
+
23
+ out_csv = leafmap.temp_file_path(".csv")
24
+ df.to_csv(out_csv, index=transpose)
25
+ df = pd.read_csv(out_csv)
26
+ os.remove(out_csv)
27
+ return df
28
+ else:
29
+ return None
30
+
31
+
32
+ def annual_pubs(pubs, col="year"):
33
+ if pubs is not None:
34
+ df = pubs[col].value_counts().sort_index()
35
+ df2 = pd.DataFrame({"year": df.index, "publications": df.values})
36
+ return df2
37
+ else:
38
+ return None
39
+
40
+
41
+ def annual_collaborators(pubs, col="year"):
42
+ if pubs is not None:
43
+ df = pubs.groupby([col]).sum()
44
+ df2 = pd.DataFrame(
45
+ {"year": df.index, "collaborators": df["authors_count"].values}
46
+ )
47
+ fig = px.bar(
48
+ df2,
49
+ x="year",
50
+ y="collaborators",
51
+ )
52
+ return fig
53
+ else:
54
+ return None
55
+
56
+
57
+ def annual_citations(pubs, col="year"):
58
+ if pubs is not None:
59
+ df = pubs.groupby([col]).sum()
60
+ df2 = pd.DataFrame(
61
+ {"year": df.index, "citations": df["times_cited"].values})
62
+ fig = px.bar(
63
+ df2,
64
+ x="year",
65
+ y="citations",
66
+ )
67
+ return fig
68
+ else:
69
+ return None
70
+
71
+
72
+ def the_H_function(sorted_citations_list, n=1):
73
+ """from a list of integers [n1, n2 ..] representing publications citations,
74
+ return the max list-position which is >= integer
75
+
76
+ eg
77
+ >>> the_H_function([10, 8, 5, 4, 3]) => 4
78
+ >>> the_H_function([25, 8, 5, 3, 3]) => 3
79
+ >>> the_H_function([1000, 20]) => 2
80
+ """
81
+ if sorted_citations_list and sorted_citations_list[0] >= n:
82
+ return the_H_function(sorted_citations_list[1:], n + 1)
83
+ else:
84
+ return n - 1
85
+
86
+
87
+ def app():
88
+
89
+ st.title("Search Researchers")
90
+ dsl = st.session_state["dsl"]
91
+ row1_col1, row1_col2 = st.columns([1, 1])
92
+
93
+ with row1_col1:
94
+ name = st.text_input("Enter a researcher name:", "")
95
+
96
+ if name:
97
+
98
+ ids, names = dsl.search_researcher_by_name(name, return_list=True)
99
+ if ids.count_total > 0:
100
+ # options = ids.as_dataframe()["id"].values.tolist()
101
+ with row1_col1:
102
+ name = st.selectbox("Select a researcher id:", names)
103
+
104
+ if name:
105
+ id = name.split("|")[1].strip()
106
+ id_info = dsl.search_researcher_by_id(id, return_df=False)
107
+
108
+ info_df = json_to_df(id_info, transpose=True)
109
+ info_df.rename(
110
+ columns={info_df.columns[0]: "Type",
111
+ info_df.columns[1]: "Value"},
112
+ inplace=True,
113
+ )
114
+ with row1_col1:
115
+ st.header("Researcher Information")
116
+ if not info_df.empty:
117
+ st.dataframe(info_df)
118
+ leafmap.st_download_button(
119
+ "Download data", info_df, csv_sep="\t"
120
+ )
121
+ else:
122
+ st.text("No information found")
123
+
124
+ pubs = dsl.search_pubs_by_researcher_id(id)
125
+ df = json_to_df(pubs)
126
+ # annual_df = annual_pubs(df)
127
+ if df is not None:
128
+ df1, df2 = dsl.researcher_annual_stats(
129
+ pubs, geonames_df=get_geonames()
130
+ )
131
+ df3 = scholarpy.collaborator_locations(df2)
132
+
133
+ with row1_col2:
134
+ st.header("Researcher statistics")
135
+ columns = ["pubs", "collaborators",
136
+ "institutions", "cities"]
137
+ selected_columns = st.multiselect(
138
+ "Select attributes to display:", columns, columns
139
+ )
140
+ if selected_columns:
141
+ fig = scholarpy.annual_stats_barplot(
142
+ df1, selected_columns)
143
+ st.plotly_chart(fig)
144
+ leafmap.st_download_button(
145
+ "Download data",
146
+ df1,
147
+ file_name="data.csv",
148
+ csv_sep="\t",
149
+ )
150
+
151
+ st.header("Map of collaborator institutions")
152
+ markdown = f"""
153
+ - Total number of collaborator institutions: **{len(df3)}**
154
+ """
155
+ st.markdown(markdown)
156
+ m = leafmap.Map(
157
+ center=[0, 0],
158
+ zoom_start=1,
159
+ latlon_control=False,
160
+ draw_control=False,
161
+ measure_control=False,
162
+ locate_control=True,
163
+ )
164
+ m.add_points_from_xy(df3)
165
+ m.to_streamlit(height=420)
166
+ leafmap.st_download_button(
167
+ "Download data",
168
+ df3,
169
+ file_name="data.csv",
170
+ csv_sep="\t",
171
+ )
172
+
173
+ st.header("Publication counts with collaborators")
174
+ collaborators = dsl.search_researcher_collaborators(
175
+ id, pubs)
176
+ markdown = f"""
177
+ - Total number of collaborators: **{len(collaborators)}**
178
+ """
179
+ st.markdown(markdown)
180
+ st.dataframe(collaborators)
181
+ leafmap.st_download_button(
182
+ "Download data",
183
+ collaborators,
184
+ file_name="data.csv",
185
+ csv_sep="\t",
186
+ )
187
+ else:
188
+ st.text("No publications found")
189
+
190
+ with row1_col1:
191
+ st.header("Publications")
192
+ if df is not None:
193
+ citations = df["times_cited"].values.tolist()
194
+ citations.sort(reverse=True)
195
+ h_index = the_H_function(citations)
196
+ markdown = f"""
197
+ - Total number of publications: **{len(df)}**
198
+ - Total number of citations: **{df["times_cited"].sum()}**
199
+ - i10-index: **{len(df[df["times_cited"]>=10])}**
200
+ - h-index: **{h_index}**
201
+ """
202
+ st.markdown(markdown)
203
+ st.dataframe(df)
204
+ leafmap.st_download_button(
205
+ "Download data", df, file_name="data.csv", csv_sep="\t"
206
+ )
207
+
208
+ if "journal.title" in df.columns:
209
+ st.header("Publication counts by journal")
210
+ journals = df["journal.title"].value_counts()
211
+ summary = pd.DataFrame(
212
+ {"Journal": journals.index, "Count": journals}
213
+ ).reset_index(drop=True)
214
+ markdown = f"""
215
+ - Total number of journals: **{len(summary)}**
216
+ """
217
+ st.markdown(markdown)
218
+ st.dataframe(summary)
219
+ leafmap.st_download_button(
220
+ "Download data",
221
+ summary,
222
+ file_name="data.csv",
223
+ csv_sep="\t",
224
+ )
225
+ else:
226
+ st.text("No journal publications")
227
+
228
+ else:
229
+ st.text("No publications found")
230
+
231
+ grants = dsl.search_grants_by_researcher(id)
232
+ df = grants.as_dataframe()
233
+ if not df.empty:
234
+ st.header("Grants")
235
+ st.dataframe(df)
236
+ leafmap.st_download_button(
237
+ "Download data", df, file_name="data.csv", csv_sep="\t"
238
+ )
239
+ else:
240
+ st.text("No results found.")
data/journals.json ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "All Categories": {
3
+ "Nature": "jour.1018957",
4
+ "The New England Journal of Medicine": "jour.1014075",
5
+ "Science": "jour.1346339",
6
+ "IEEE/CVF Conference on Computer Vision and Pattern Recognition": null,
7
+ "The Lancet": "jour.1077219",
8
+ "Advanced Materials": "jour.1129018",
9
+ "Cell": "jour.1019114",
10
+ "Nature Communications": null,
11
+ "Chemical Reviews": "jour.1077147",
12
+ "International Conference on Learning Representations": null,
13
+ "JAMA": "jour.1081531",
14
+ "Neural Information Processing Systems": null,
15
+ "Proceedings of the National Academy of Sciences": null,
16
+ "Journal of the American Chemical Society": null,
17
+ "Angewandte Chemie": null,
18
+ "Chemical Society Reviews": null,
19
+ "Nucleic Acids Research": null,
20
+ "Renewable and Sustainable Energy Reviews": null,
21
+ "Journal of Clinical Oncology": null,
22
+ "Physical Review Letters": null,
23
+ "Advanced Energy Materials": null,
24
+ "Nature Medicine": null,
25
+ "International Conference on Machine Learning": null,
26
+ "Energy & Environmental Science": null,
27
+ "ACS nullo": null,
28
+ "Scientific Reports": null,
29
+ "European Conference on Computer Vision": null,
30
+ "The Lancet Oncology": null,
31
+ "Advanced Functional Materials": null,
32
+ "PLoS ONE": null,
33
+ "IEEE/CVF International Conference on Computer Vision": null,
34
+ "Nature Genetics": null,
35
+ "Journal of Cleaner Production": null,
36
+ "Nature Materials": null,
37
+ "Science of The Total Environment": null,
38
+ "Circulation": "jour.1009570",
39
+ "BMJ": "jour.1017377",
40
+ "Journal of the American College of Cardiology": null,
41
+ "Applied Catalysis B: Environmental": null,
42
+ "Science Advances": null,
43
+ "nullo Letters": null,
44
+ "Nature Energy": null,
45
+ "ACS Applied Materials & Interfaces": null,
46
+ "Journal of Materials Chemistry A": null,
47
+ "IEEE Access": null,
48
+ "Nature Biotechnology": null,
49
+ "nullo Energy": null,
50
+ "Nature Methods": null,
51
+ "Nature nullotechnology": null,
52
+ "Cochrane Database of Systematic Reviews": null,
53
+ "The Astrophysical Journal": null,
54
+ "The Lancet Infectious Diseases": null,
55
+ "Applied Energy": null,
56
+ "European Heart Journal": null,
57
+ "Blood": "jour.1085025",
58
+ "American Economic Review": null,
59
+ "Immunity": "jour.1112054",
60
+ "Meeting of the Association for Computational Linguistics (ACL)": null,
61
+ "AAAI Conference on Artificial Intelligence": null,
62
+ "Gastroenterology": "jour.1017616",
63
+ "Neuron": "jour.1098485",
64
+ "Journal of High Energy Physics": null,
65
+ "IEEE Communications Surveys & Tutorials": null,
66
+ "Nature Neuroscience": null,
67
+ "Computers in Human Behavior": null,
68
+ "Chemical engineering journal": null,
69
+ "ACS Catalysis": null,
70
+ "Nature Reviews. Molecular Cell Biology": null,
71
+ "International Journal of Molecular Sciences": null,
72
+ "IEEE Transactions on Pattern Analysis and Machine Intelligence": null,
73
+ "Environmental Science & Technology": null,
74
+ "Monthly Notices of the Royal Astronomical Society": null,
75
+ "Cell Metabolism": null,
76
+ "Nature Physics": null,
77
+ "Physical Review D": null,
78
+ "Accounts of Chemical Research": null,
79
+ "Nature Photonics": null,
80
+ "Nature Climate Change": null,
81
+ "Chemistry of Materials": null,
82
+ "Molecular Cell": null,
83
+ "Clinical Infectious Diseases": null,
84
+ "Morbidity and Mortality Weekly Report": null,
85
+ "Nature Reviews Immunology": null,
86
+ "Gut": "jour.1077125",
87
+ "Annals of Oncology": null,
88
+ "Cell Reports": null,
89
+ "Journal of Business Research": null,
90
+ "Clinical Cancer Research": null,
91
+ "Frontiers in Microbiology": null,
92
+ "Journal of Hepatology": null,
93
+ "eLife": "jour.1046517",
94
+ "Bioinformatics": "jour.1345383",
95
+ "The Journal of Clinical Investigation": null,
96
+ "Science Translational Medicine": null,
97
+ "Water Research": null,
98
+ "Frontiers in Immunology": null,
99
+ "Small": "jour.1034711",
100
+ "Nature Immunology": null,
101
+ "JAMA Oncology": null,
102
+ "The Lancet Neurology": null
103
+ },
104
+ "Business, Economics & Management": {
105
+ "American Economic Review": "jour.1056580",
106
+ "Journal of Business Research": "jour.1028262",
107
+ "Tourism Management": null,
108
+ "Journal of Business Ethics": null,
109
+ "Journal of Finullcial Economics": null,
110
+ "The Quarterly Journal of Economics": null,
111
+ "The Review of Finullcial Studies": null,
112
+ "Technological Forecasting and Social Change": null,
113
+ "International Journal of Information Management": null,
114
+ "Management Science": null,
115
+ "Journal of Political Economy": null,
116
+ "International Journal of Production Economics": null,
117
+ "The Journal of Finullce": null,
118
+ "Journal of Management": null,
119
+ "Strategic Management Journal": null,
120
+ "World Development": null,
121
+ "Journal of Retailing and Consumer Services": null,
122
+ "Academy of Management Journal": null,
123
+ "International Journal of Project Management": null,
124
+ "Energy Economics": null
125
+ },
126
+ "Chemical & Material Sciences": {
127
+ "Advanced Materials": "jour.1129018",
128
+ "Chemical Reviews": null,
129
+ "Journal of the American Chemical Society": null,
130
+ "Angewandte Chemie": null,
131
+ "Chemical Society Reviews": null,
132
+ "Advanced Energy Materials": null,
133
+ "Energy & Environmental Science": null,
134
+ "ACS nullo": null,
135
+ "Advanced Functional Materials": null,
136
+ "Nature Materials": null,
137
+ "Applied Catalysis B: Environmental": null,
138
+ "nullo Letters": null,
139
+ "Nature Energy": null,
140
+ "ACS Applied Materials & Interfaces": null,
141
+ "Journal of Materials Chemistry A": null,
142
+ "nullo Energy": null,
143
+ "Nature nullotechnology": null,
144
+ "Chemical engineering journal": null,
145
+ "ACS Catalysis": null,
146
+ "Accounts of Chemical Research": null
147
+ },
148
+ "Engineering & Computer Science": {
149
+ "IEEE/CVF Conference on Computer Vision and Pattern Recognition": null,
150
+ "Advanced Materials": "jour.1129018",
151
+ "International Conference on Learning Representations": null,
152
+ "Neural Information Processing Systems": null,
153
+ "Renewable and Sustainable Energy Reviews": null,
154
+ "Advanced Energy Materials": null,
155
+ "International Conference on Machine Learning": null,
156
+ "Energy & Environmental Science": null,
157
+ "ACS nullo": null,
158
+ "European Conference on Computer Vision": null,
159
+ "Advanced Functional Materials": null,
160
+ "IEEE/CVF International Conference on Computer Vision": null,
161
+ "Journal of Cleaner Production": null,
162
+ "Nature Materials": null,
163
+ "Applied Catalysis B: Environmental": null,
164
+ "nullo Letters": null,
165
+ "Nature Energy": null,
166
+ "ACS Applied Materials & Interfaces": null,
167
+ "Journal of Materials Chemistry A": null,
168
+ "IEEE Access": null
169
+ },
170
+ "Health & Medical Sciences": {
171
+ "The New England Journal of Medicine": "jour.1014075",
172
+ "The Lancet": "jour.1077219",
173
+ "Cell": "jour.1019114",
174
+ "JAMA": "jour.1081531",
175
+ "Proceedings of the National Academy of Sciences": null,
176
+ "Journal of Clinical Oncology": null,
177
+ "Nature Medicine": null,
178
+ "The Lancet Oncology": null,
179
+ "PLoS ONE": null,
180
+ "Nature Genetics": null,
181
+ "Circulation": "jour.1009570",
182
+ "BMJ": "jour.1017377",
183
+ "Journal of the American College of Cardiology": null,
184
+ "Cochrane Database of Systematic Reviews": null,
185
+ "The Lancet Infectious Diseases": null,
186
+ "European Heart Journal": null,
187
+ "Blood": "jour.1085025",
188
+ "Immunity": "jour.1112054",
189
+ "Gastroenterology": "jour.1017616",
190
+ "Neuron": "jour.1098485"
191
+ },
192
+ "Humanities, Literature & Arts": {
193
+ "Digital Journalism": null,
194
+ "Journal of Communication": null,
195
+ "Journalism Studies": null,
196
+ "International Journal of Communication": null,
197
+ "Journalism": "jour.1138763",
198
+ "System": "jour.1137434",
199
+ "The Modern Language Journal": null,
200
+ "Media, Culture & Society": null,
201
+ "Synthese": "jour.1284232",
202
+ "Political Communication": null,
203
+ "Applied Linguistics": null,
204
+ "Language Learning": null,
205
+ "Public Opinion Quarterly": null,
206
+ "TESOL Quarterly": null,
207
+ "Journalism Practice": null,
208
+ "Feminist Media Studies": null,
209
+ "Studies in Second Language Acquisition": null,
210
+ "English Language Teaching": null,
211
+ "Language Teaching": null,
212
+ "Race Ethnicity and Education": null
213
+ },
214
+ "Life Sciences & Earth Sciences": {
215
+ "Nature": "jour.1018957",
216
+ "Science": "jour.1346339",
217
+ "Cell": "jour.1019114",
218
+ "Nature Communications": null,
219
+ "Proceedings of the National Academy of Sciences": null,
220
+ "Nucleic Acids Research": null,
221
+ "Scientific Reports": null,
222
+ "PLoS ONE": null,
223
+ "Nature Genetics": null,
224
+ "Science of The Total Environment": null,
225
+ "Science Advances": null,
226
+ "Nature Biotechnology": null,
227
+ "Nature Methods": null,
228
+ "Neuron": "jour.1098485",
229
+ "Nature Reviews. Molecular Cell Biology": null,
230
+ "International Journal of Molecular Sciences": null,
231
+ "Environmental Science & Technology": null,
232
+ "Cell Metabolism": null,
233
+ "Nature Climate Change": null,
234
+ "Molecular Cell": null
235
+ },
236
+ "Physics & Mathematics": {
237
+ "Physical Review Letters": null,
238
+ "The Astrophysical Journal": null,
239
+ "Journal of High Energy Physics": null,
240
+ "Monthly Notices of the Royal Astronomical Society": null,
241
+ "Nature Physics": null,
242
+ "Physical Review D": null,
243
+ "Nature Photonics": null,
244
+ "Physical Review B": null,
245
+ "Physical Review X": null,
246
+ "Astronomy & Astrophysics": null,
247
+ "The European Physical Journal C": null,
248
+ "Journal of Molecular Liquids": null,
249
+ "IEEE Transactions on Automatic Control": null,
250
+ "International Journal of Heat and Mass Transfer": null,
251
+ "Physics Letters B": null,
252
+ "IEEE Transactions on Geoscience and Remote Sensing": null,
253
+ "Reviews of Modern Physics": null,
254
+ "IEEE Transactions on Signal Processing": null,
255
+ "Geophysical Research Letters": null,
256
+ "Optica": "jour.1050828"
257
+ },
258
+ "Social Sciences": {
259
+ "Journal of Business Ethics": null,
260
+ "Computers & Education": null,
261
+ "Research Policy": null,
262
+ "New Media & Society": null,
263
+ "American Journal of Public Health": null,
264
+ "Global Environmental Change": null,
265
+ "Nature Human Behaviour": null,
266
+ "Health Affairs": null,
267
+ "Social Science & Medicine": null,
268
+ "Teaching and Teacher Education": null,
269
+ "Energy Research & Social Science": null,
270
+ "Information, Communication & Society": null,
271
+ "Land Use Policy": null,
272
+ "Academic Medicine": null,
273
+ "Studies in Higher Education": null,
274
+ "American Journal of Political Science": null,
275
+ "Review of Educational Research": null,
276
+ "Annals of Tourism Research": null,
277
+ "Cities": "jour.1027483",
278
+ "Business Strategy and the Environment": null
279
+ }
280
+ }
data/journals.xlsx ADDED
Binary file (25.6 kB). View file
 
multiapp.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Frameworks for running multiple Streamlit applications as a single app.
2
+ """
3
+ import streamlit as st
4
+
5
+ # app_state = st.experimental_get_query_params()
6
+ # app_state = {k: v[0] if isinstance(v, list) else v for k, v in app_state.items()} # fetch the first item in each query string as we don't have multiple values for each query string key in this example
7
+
8
+
9
+ class MultiApp:
10
+ """Framework for combining multiple streamlit applications.
11
+ Usage:
12
+ def foo():
13
+ st.title("Hello Foo")
14
+ def bar():
15
+ st.title("Hello Bar")
16
+ app = MultiApp()
17
+ app.add_app("Foo", foo)
18
+ app.add_app("Bar", bar)
19
+ app.run()
20
+ It is also possible keep each application in a separate file.
21
+ import foo
22
+ import bar
23
+ app = MultiApp()
24
+ app.add_app("Foo", foo.app)
25
+ app.add_app("Bar", bar.app)
26
+ app.run()
27
+ """
28
+
29
+ def __init__(self):
30
+ self.apps = []
31
+
32
+ def add_app(self, title, func):
33
+ """Adds a new application.
34
+ Parameters
35
+ ----------
36
+ func:
37
+ the python function to render this app.
38
+ title:
39
+ title of the app. Appears in the dropdown in the sidebar.
40
+ """
41
+ self.apps.append({"title": title, "function": func})
42
+
43
+ def run(self):
44
+ app_state = st.experimental_get_query_params()
45
+ app_state = {
46
+ k: v[0] if isinstance(v, list) else v for k, v in app_state.items()
47
+ } # fetch the first item in each query string as we don't have multiple values for each query string key in this example
48
+
49
+ # st.write('before', app_state)
50
+
51
+ titles = [a["title"] for a in self.apps]
52
+ functions = [a["function"] for a in self.apps]
53
+ default_radio = titles.index(app_state["page"]) if "page" in app_state else 0
54
+
55
+ st.sidebar.title("Navigation")
56
+
57
+ title = st.sidebar.radio("Go To", titles, index=default_radio, key="radio")
58
+
59
+ app_state["page"] = st.session_state.radio
60
+ # st.write('after', app_state)
61
+
62
+ st.experimental_set_query_params(**app_state)
63
+ # st.experimental_set_query_params(**st.session_state.to_dict())
64
+ functions[titles.index(title)]()
65
+
66
+ st.sidebar.title("About")
67
+ st.sidebar.info(
68
+ """
69
+ The web app URL: <https://scholar.gishub.org>. If you have any questions regarding this web app, please contact [Qiusheng Wu](https://wetlands.io) (qwu18@utk.edu).
70
+ """
71
+ )
packages.txt ADDED
File without changes
postBuild ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # enable nbserverproxy
2
+ jupyter serverextension enable --sys-prefix nbserverproxy
3
+ # streamlit launches at startup
4
+ mv streamlit_call.py ${NB_PYTHON_PREFIX}/lib/python*/site-packages/
5
+ # enable streamlit extension
6
+ jupyter serverextension enable --sys-prefix streamlit_call
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dimcli
2
+ geopandas
3
+ # jupyter-server-proxy
4
+ # keplergl
5
+ # nbserverproxy
6
+ openpyxl
7
+ streamlit
8
+ streamlit-option-menu
9
+ leafmap
10
+ scholarpy
11
+ geemap
12
+ # git+https://github.com/giswqs/leafmap
13
+ # git+https://github.com/giswqs/scholarpy
14
+ # git+https://github.com/giswqs/geemap
setup.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+ echo "\
3
+ [server]\n\
4
+ headless = true\n\
5
+ port = $PORT\n\
6
+ enableCORS = false\n\
7
+ \n\
8
+ " > ~/.streamlit/config.toml
streamlit_app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import scholarpy
2
+ import streamlit as st
3
+ from streamlit_option_menu import option_menu
4
+ from apps import grant, home, google, journal, orcid, organization, publication, researcher
5
+
6
+ st.set_page_config(page_title="Scholar Web App",
7
+ page_icon="chart_with_upwards_trend",
8
+ layout="wide")
9
+
10
+ # A dictionary of apps in the format of {"App title": "App icon"}
11
+ # More icons can be found here: https://icons.getbootstrap.com
12
+
13
+ apps = {"home": {"title": "Home", "icon": "house"},
14
+ "grant": {"title": "Grant", "icon": "coin"},
15
+ "journal": {"title": "Journal", "icon": "journals"},
16
+ "publication": {"title": "Publication", "icon": "journal"},
17
+ "researcher": {"title": "Researcher", "icon": "person-circle"},
18
+ "orcid": {"title": "ORCID", "icon": "person-square"},
19
+ "organization": {"title": "Organization", "icon": "building"},
20
+ "google": {"title": "Google Scholar", "icon": "google"},
21
+
22
+ }
23
+
24
+
25
+ titles = [app["title"] for app in apps.values()]
26
+ icons = [app["icon"] for app in apps.values()]
27
+ params = st.experimental_get_query_params()
28
+
29
+ if "page" in params:
30
+ default_index = int(titles.index(params["page"][0].lower()))
31
+ else:
32
+ default_index = 0
33
+
34
+ with st.sidebar:
35
+ selected = option_menu(
36
+ "Main Menu",
37
+ options=titles,
38
+ icons=icons,
39
+ menu_icon="cast",
40
+ default_index=default_index,
41
+ )
42
+
43
+ # st.sidebar.title("About")
44
+ st.sidebar.info(
45
+ """
46
+ **Web App URL:**
47
+ <https://scholar.streamlit.app>
48
+
49
+ **Contact:**
50
+ - [Qiusheng Wu](https://geography.utk.edu/about-us/faculty/dr-qiusheng-wu)
51
+ """
52
+ )
53
+ st.image("https://i.imgur.com/2WhANKg.png")
54
+
55
+ if "dsl" not in st.session_state:
56
+ st.session_state["dsl"] = scholarpy.Dsl()
57
+
58
+ for app in apps:
59
+ if apps[app]["title"] == selected:
60
+ eval(f"{app}.app()")
61
+ break
streamlit_call.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from subprocess import Popen
2
+
3
+
4
+ def load_jupyter_server_extension(nbapp):
5
+ """serve the streamlit app"""
6
+ Popen(
7
+ [
8
+ "streamlit",
9
+ "run",
10
+ "streamlit_app.py",
11
+ "--browser.serverAddress=0.0.0.0",
12
+ "--server.enableCORS=False",
13
+ ]
14
+ )