Spaces:
Running
Running
Upload 2 files
Browse files- OpenAlex4NodeXL.R +266 -0
- code.js +12 -0
OpenAlex4NodeXL.R
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OpenAlex4NodeXL <- function(keywords, pub_start_date, pub_end_date) {
|
2 |
+
|
3 |
+
keywords <- keywords
|
4 |
+
pub_start_date <- pub_start_date
|
5 |
+
pub_end_date <- pub_end_date
|
6 |
+
|
7 |
+
# create search engine function
|
8 |
+
search_engine <- function(keywords, pub_start_date, pub_end_date) {
|
9 |
+
# load software libraries
|
10 |
+
suppressPackageStartupMessages(library(openalexR))
|
11 |
+
suppressPackageStartupMessages(library(tidyverse))
|
12 |
+
|
13 |
+
# set options
|
14 |
+
options(openalexR.mailto = "youremail@email.com") # replace with your email address
|
15 |
+
|
16 |
+
# search engine
|
17 |
+
works_search <- oa_fetch(
|
18 |
+
entity = "works",
|
19 |
+
title.search = c(keywords),
|
20 |
+
cited_by_count = ">50",
|
21 |
+
from_publication_date = pub_start_date,
|
22 |
+
to_publication_date = pub_end_date,
|
23 |
+
options = list(sort = "cited_by_count:desc"),
|
24 |
+
verbose = FALSE
|
25 |
+
)
|
26 |
+
|
27 |
+
return(works_search)
|
28 |
+
}
|
29 |
+
|
30 |
+
# fetch data from openalex.org api
|
31 |
+
search_data <- search_engine(keywords, pub_start_date, pub_end_date)
|
32 |
+
|
33 |
+
# grab authors and group them according to collaboration
|
34 |
+
authors_collaboration_groups <- list()
|
35 |
+
for (i in 1:nrow(search_data)) {
|
36 |
+
authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
|
37 |
+
}
|
38 |
+
|
39 |
+
all_authors <- c()
|
40 |
+
for (i in 1:length(authors_collaboration_groups)) {
|
41 |
+
all_authors <- c(all_authors, authors_collaboration_groups[[i]][[1]])
|
42 |
+
}
|
43 |
+
|
44 |
+
# grab author position
|
45 |
+
authors_position <- list()
|
46 |
+
for (i in 1:nrow(search_data)) {
|
47 |
+
authors_position[[i]] <- search_data$author[[i]][4]
|
48 |
+
}
|
49 |
+
|
50 |
+
all_authors_positions <- c() # grab all authors positions
|
51 |
+
for (i in 1:length(authors_position)) {
|
52 |
+
all_authors_positions <- c(all_authors_positions, authors_position[[i]][[1]])
|
53 |
+
}
|
54 |
+
|
55 |
+
# grab author affiliation
|
56 |
+
authors_affiliation <- list()
|
57 |
+
for (i in 1:nrow(search_data)) {
|
58 |
+
authors_affiliation[[i]] <- search_data$author[[i]][7]
|
59 |
+
}
|
60 |
+
|
61 |
+
all_authors_affiliation <- c() # grab all authors affiliations
|
62 |
+
for (i in 1:length(authors_affiliation)) {
|
63 |
+
all_authors_affiliation <- c(all_authors_affiliation, authors_affiliation[[i]][[1]])
|
64 |
+
}
|
65 |
+
|
66 |
+
# grab authors institution country code
|
67 |
+
authors_institution_country_code <- list()
|
68 |
+
for (i in 1:nrow(search_data)) {
|
69 |
+
authors_institution_country_code[[i]] <- search_data$author[[i]][9]
|
70 |
+
}
|
71 |
+
|
72 |
+
|
73 |
+
all_authors_institution_country_code <- c() # grab all authors institution country code
|
74 |
+
for (i in 1:length(authors_institution_country_code)) {
|
75 |
+
all_authors_institution_country_code <- c(all_authors_institution_country_code, authors_institution_country_code[[i]][[1]])
|
76 |
+
}
|
77 |
+
|
78 |
+
# grab author institution type
|
79 |
+
authors_institution_type <- list()
|
80 |
+
for (i in 1:nrow(search_data)) {
|
81 |
+
authors_institution_type[[i]] <- search_data$author[[i]][10]
|
82 |
+
}
|
83 |
+
|
84 |
+
|
85 |
+
all_authors_institution_type <- c() # grab all authors institution type
|
86 |
+
for (i in 1:length(authors_institution_type)) {
|
87 |
+
all_authors_institution_type <- c(all_authors_institution_type, authors_institution_type[[i]][[1]])
|
88 |
+
}
|
89 |
+
|
90 |
+
# get length of each authors collaboration
|
91 |
+
authors_length <- c()
|
92 |
+
for (authors in 1:length(authors_collaboration_groups)) {
|
93 |
+
authors_length <- c(authors_length, authors_collaboration_groups[[authors]] |> nrow())
|
94 |
+
}
|
95 |
+
|
96 |
+
|
97 |
+
# create authors data frame
|
98 |
+
authorAtt_df <- data.frame(
|
99 |
+
Authors = all_authors,
|
100 |
+
Position = all_authors_positions,
|
101 |
+
Affiliation = all_authors_affiliation,
|
102 |
+
Institution = all_authors_institution_type
|
103 |
+
)
|
104 |
+
|
105 |
+
# I did not want to have to use underscore to separate
|
106 |
+
# the two words (Institution_Country). That is why I
|
107 |
+
# created that column in the data frame using back ticks
|
108 |
+
# instead as shown below
|
109 |
+
authorAtt_df$`Institution Country` <- all_authors_institution_country_code
|
110 |
+
|
111 |
+
# publication attributes
|
112 |
+
# grab all publications
|
113 |
+
|
114 |
+
publications <- list()
|
115 |
+
for (i in 1:nrow(search_data)) {
|
116 |
+
publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
|
117 |
+
}
|
118 |
+
|
119 |
+
all_publications <- c()
|
120 |
+
for (i in 1:length(publications)) {
|
121 |
+
all_publications <- c(all_publications, publications[[i]])
|
122 |
+
}
|
123 |
+
|
124 |
+
# grab all so
|
125 |
+
pub_so <- list()
|
126 |
+
for (i in 1:nrow(search_data)) {
|
127 |
+
pub_so[[i]] <- rep(search_data$so[i], each = authors_length[i])
|
128 |
+
}
|
129 |
+
|
130 |
+
all_so <- c()
|
131 |
+
for (i in 1:length(pub_so)) {
|
132 |
+
all_so <- c(all_so, pub_so[[i]])
|
133 |
+
}
|
134 |
+
|
135 |
+
# grab all host organization
|
136 |
+
hostOrg <- list()
|
137 |
+
for (i in 1:nrow(search_data)) {
|
138 |
+
hostOrg[[i]] <- rep(search_data$host_organization[i], each = authors_length[i])
|
139 |
+
}
|
140 |
+
|
141 |
+
all_hostOrg <- c()
|
142 |
+
for (i in 1:length(hostOrg)) {
|
143 |
+
all_hostOrg <- c(all_hostOrg, hostOrg[[i]])
|
144 |
+
}
|
145 |
+
|
146 |
+
# grab all cited by count
|
147 |
+
citedby_count <- list()
|
148 |
+
for (i in 1:nrow(search_data)) {
|
149 |
+
citedby_count[[i]] <- rep(search_data$cited_by_count[i], each = authors_length[i])
|
150 |
+
}
|
151 |
+
|
152 |
+
all_citedby_count <- c()
|
153 |
+
for (i in 1:length(citedby_count)) {
|
154 |
+
all_citedby_count <- c(all_citedby_count, citedby_count[[i]])
|
155 |
+
}
|
156 |
+
|
157 |
+
# grab all publication year
|
158 |
+
pub_year <- list()
|
159 |
+
for (i in 1:nrow(search_data)) {
|
160 |
+
pub_year[[i]] <- rep(search_data$publication_year[i], each = authors_length[i])
|
161 |
+
}
|
162 |
+
|
163 |
+
all_pub_year <- c()
|
164 |
+
for (i in 1:length(citedby_count)) {
|
165 |
+
all_pub_year <- c(all_pub_year, pub_year[[i]])
|
166 |
+
}
|
167 |
+
|
168 |
+
# grab all type
|
169 |
+
type <- list()
|
170 |
+
for (i in 1:nrow(search_data)) {
|
171 |
+
type[[i]] <- rep(search_data$type[i], each = authors_length[i])
|
172 |
+
}
|
173 |
+
|
174 |
+
all_type <- c()
|
175 |
+
for (i in 1:length(type)) {
|
176 |
+
all_type <- c(all_type, type[[i]])
|
177 |
+
}
|
178 |
+
|
179 |
+
# grab all abstract
|
180 |
+
abstract <- list()
|
181 |
+
for (i in 1:nrow(search_data)) {
|
182 |
+
abstract[[i]] <- rep(search_data$ab[i], each = authors_length[i])
|
183 |
+
}
|
184 |
+
|
185 |
+
all_abstracts <- c()
|
186 |
+
for (i in 1:length(abstract)) {
|
187 |
+
all_abstracts <- c(all_abstracts, abstract[[i]])
|
188 |
+
}
|
189 |
+
|
190 |
+
# grab all referenced works
|
191 |
+
referenced <- list()
|
192 |
+
for (i in 1:nrow(search_data)) {
|
193 |
+
referenced[[i]] <- rep(search_data$referenced_works[i], each = authors_length[i])
|
194 |
+
}
|
195 |
+
|
196 |
+
all_referenced <- c()
|
197 |
+
for (i in 1:length(referenced)) {
|
198 |
+
all_referenced <- c(all_referenced, referenced[[i]])
|
199 |
+
}
|
200 |
+
|
201 |
+
# update the authors data frame
|
202 |
+
{
|
203 |
+
authorAtt_df$Publication <- all_publications
|
204 |
+
authorAtt_df$`Abstract` <- all_abstracts
|
205 |
+
authorAtt_df$`Publication Type` <- all_type
|
206 |
+
authorAtt_df$`Publication Year` <- all_pub_year
|
207 |
+
authorAtt_df$`Cited By Count` <- all_citedby_count
|
208 |
+
authorAtt_df$`Referenced Works` <- all_referenced
|
209 |
+
authorAtt_df$`Host Organization` <- all_hostOrg
|
210 |
+
authorAtt_df$SO <- all_so
|
211 |
+
}
|
212 |
+
|
213 |
+
|
214 |
+
# filter out missing values from the data frame
|
215 |
+
authorAtt_df <- authorAtt_df |>
|
216 |
+
na.omit()
|
217 |
+
|
218 |
+
# move abstract column to behind Publication
|
219 |
+
authorAtt_df <- authorAtt_df |>
|
220 |
+
relocate(Abstract, .after = Publication)
|
221 |
+
|
222 |
+
# rearrange columns for NodeXL flat file csv format
|
223 |
+
authorAtt_df <- authorAtt_df |>
|
224 |
+
relocate(Publication, .after = Authors)
|
225 |
+
|
226 |
+
|
227 |
+
# rename columns
|
228 |
+
colnames(authorAtt_df)[c(1:13)] <- c(
|
229 |
+
"Vertex1",
|
230 |
+
"Vertex2",
|
231 |
+
"Vertex1 Position",
|
232 |
+
"Vertex1 Affiliation",
|
233 |
+
"Vertex1 Institution",
|
234 |
+
"Vertex1 Institution Country",
|
235 |
+
"Vertex2 Abstract",
|
236 |
+
"Vertex2 Type",
|
237 |
+
"Vertex2 Publication Year",
|
238 |
+
"Vertex2 Cited By Count",
|
239 |
+
"Vertex2 Referenced Works",
|
240 |
+
"Vertex2 Host Organization",
|
241 |
+
"Vertex2 SO"
|
242 |
+
)
|
243 |
+
|
244 |
+
list2vec <- function(x){
|
245 |
+
paste(x,collapse = " ")
|
246 |
+
}
|
247 |
+
# convert list column into character column
|
248 |
+
authorAtt_df$`Vertex2 Referenced Works` <- sapply(authorAtt_df$`Vertex2 Referenced Works`,list2vec)
|
249 |
+
|
250 |
+
|
251 |
+
return(authorAtt_df)
|
252 |
+
|
253 |
+
|
254 |
+
|
255 |
+
}
|
256 |
+
|
257 |
+
# test software program
|
258 |
+
# mydata <- OpenAlex4NodeXL(
|
259 |
+
# keywords = c("software", "information"),
|
260 |
+
# pub_start_date = "2019-01-01",
|
261 |
+
# pub_end_date = "2023-09-30"
|
262 |
+
# )
|
263 |
+
|
264 |
+
#
|
265 |
+
# view returned data
|
266 |
+
# mydata |> view()
|
code.js
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// query key press action on keyboard
|
2 |
+
document
|
3 |
+
.addEventListener("keydown",function(event){
|
4 |
+
// if the user presses the "Enter" key on the keyboard
|
5 |
+
if(event.key === "Enter"){
|
6 |
+
// cancel the default action if any
|
7 |
+
event.preventDefault();
|
8 |
+
// trigger the button element with a click
|
9 |
+
document.getElementById("query").click();
|
10 |
+
|
11 |
+
}
|
12 |
+
});
|