Ifeanyi commited on
Commit
fddf30f
1 Parent(s): f8371b1

Upload 2 files

Browse files
Files changed (2) hide show
  1. OpenAlex4NodeXL.R +266 -0
  2. code.js +12 -0
OpenAlex4NodeXL.R ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OpenAlex4NodeXL <- function(keywords, pub_start_date, pub_end_date) {
2
+
3
+ keywords <- keywords
4
+ pub_start_date <- pub_start_date
5
+ pub_end_date <- pub_end_date
6
+
7
+ # create search engine function
8
+ search_engine <- function(keywords, pub_start_date, pub_end_date) {
9
+ # load software libraries
10
+ suppressPackageStartupMessages(library(openalexR))
11
+ suppressPackageStartupMessages(library(tidyverse))
12
+
13
+ # set options
14
+ options(openalexR.mailto = "youremail@email.com") # replace with your email address
15
+
16
+ # search engine
17
+ works_search <- oa_fetch(
18
+ entity = "works",
19
+ title.search = c(keywords),
20
+ cited_by_count = ">50",
21
+ from_publication_date = pub_start_date,
22
+ to_publication_date = pub_end_date,
23
+ options = list(sort = "cited_by_count:desc"),
24
+ verbose = FALSE
25
+ )
26
+
27
+ return(works_search)
28
+ }
29
+
30
+ # fetch data from openalex.org api
31
+ search_data <- search_engine(keywords, pub_start_date, pub_end_date)
32
+
33
+ # grab authors and group them according to collaboration
34
+ authors_collaboration_groups <- list()
35
+ for (i in 1:nrow(search_data)) {
36
+ authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
37
+ }
38
+
39
+ all_authors <- c()
40
+ for (i in 1:length(authors_collaboration_groups)) {
41
+ all_authors <- c(all_authors, authors_collaboration_groups[[i]][[1]])
42
+ }
43
+
44
+ # grab author position
45
+ authors_position <- list()
46
+ for (i in 1:nrow(search_data)) {
47
+ authors_position[[i]] <- search_data$author[[i]][4]
48
+ }
49
+
50
+ all_authors_positions <- c() # grab all authors positions
51
+ for (i in 1:length(authors_position)) {
52
+ all_authors_positions <- c(all_authors_positions, authors_position[[i]][[1]])
53
+ }
54
+
55
+ # grab author affiliation
56
+ authors_affiliation <- list()
57
+ for (i in 1:nrow(search_data)) {
58
+ authors_affiliation[[i]] <- search_data$author[[i]][7]
59
+ }
60
+
61
+ all_authors_affiliation <- c() # grab all authors affiliations
62
+ for (i in 1:length(authors_affiliation)) {
63
+ all_authors_affiliation <- c(all_authors_affiliation, authors_affiliation[[i]][[1]])
64
+ }
65
+
66
+ # grab authors institution country code
67
+ authors_institution_country_code <- list()
68
+ for (i in 1:nrow(search_data)) {
69
+ authors_institution_country_code[[i]] <- search_data$author[[i]][9]
70
+ }
71
+
72
+
73
+ all_authors_institution_country_code <- c() # grab all authors institution country code
74
+ for (i in 1:length(authors_institution_country_code)) {
75
+ all_authors_institution_country_code <- c(all_authors_institution_country_code, authors_institution_country_code[[i]][[1]])
76
+ }
77
+
78
+ # grab author institution type
79
+ authors_institution_type <- list()
80
+ for (i in 1:nrow(search_data)) {
81
+ authors_institution_type[[i]] <- search_data$author[[i]][10]
82
+ }
83
+
84
+
85
+ all_authors_institution_type <- c() # grab all authors institution type
86
+ for (i in 1:length(authors_institution_type)) {
87
+ all_authors_institution_type <- c(all_authors_institution_type, authors_institution_type[[i]][[1]])
88
+ }
89
+
90
+ # get length of each authors collaboration
91
+ authors_length <- c()
92
+ for (authors in 1:length(authors_collaboration_groups)) {
93
+ authors_length <- c(authors_length, authors_collaboration_groups[[authors]] |> nrow())
94
+ }
95
+
96
+
97
+ # create authors data frame
98
+ authorAtt_df <- data.frame(
99
+ Authors = all_authors,
100
+ Position = all_authors_positions,
101
+ Affiliation = all_authors_affiliation,
102
+ Institution = all_authors_institution_type
103
+ )
104
+
105
+ # I did not want to have to use underscore to separate
106
+ # the two words (Institution_Country). That is why I
107
+ # created that column in the data frame using back ticks
108
+ # instead as shown below
109
+ authorAtt_df$`Institution Country` <- all_authors_institution_country_code
110
+
111
+ # publication attributes
112
+ # grab all publications
113
+
114
+ publications <- list()
115
+ for (i in 1:nrow(search_data)) {
116
+ publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
117
+ }
118
+
119
+ all_publications <- c()
120
+ for (i in 1:length(publications)) {
121
+ all_publications <- c(all_publications, publications[[i]])
122
+ }
123
+
124
+ # grab all so
125
+ pub_so <- list()
126
+ for (i in 1:nrow(search_data)) {
127
+ pub_so[[i]] <- rep(search_data$so[i], each = authors_length[i])
128
+ }
129
+
130
+ all_so <- c()
131
+ for (i in 1:length(pub_so)) {
132
+ all_so <- c(all_so, pub_so[[i]])
133
+ }
134
+
135
+ # grab all host organization
136
+ hostOrg <- list()
137
+ for (i in 1:nrow(search_data)) {
138
+ hostOrg[[i]] <- rep(search_data$host_organization[i], each = authors_length[i])
139
+ }
140
+
141
+ all_hostOrg <- c()
142
+ for (i in 1:length(hostOrg)) {
143
+ all_hostOrg <- c(all_hostOrg, hostOrg[[i]])
144
+ }
145
+
146
+ # grab all cited by count
147
+ citedby_count <- list()
148
+ for (i in 1:nrow(search_data)) {
149
+ citedby_count[[i]] <- rep(search_data$cited_by_count[i], each = authors_length[i])
150
+ }
151
+
152
+ all_citedby_count <- c()
153
+ for (i in 1:length(citedby_count)) {
154
+ all_citedby_count <- c(all_citedby_count, citedby_count[[i]])
155
+ }
156
+
157
+ # grab all publication year
158
+ pub_year <- list()
159
+ for (i in 1:nrow(search_data)) {
160
+ pub_year[[i]] <- rep(search_data$publication_year[i], each = authors_length[i])
161
+ }
162
+
163
+ all_pub_year <- c()
164
+ for (i in 1:length(citedby_count)) {
165
+ all_pub_year <- c(all_pub_year, pub_year[[i]])
166
+ }
167
+
168
+ # grab all type
169
+ type <- list()
170
+ for (i in 1:nrow(search_data)) {
171
+ type[[i]] <- rep(search_data$type[i], each = authors_length[i])
172
+ }
173
+
174
+ all_type <- c()
175
+ for (i in 1:length(type)) {
176
+ all_type <- c(all_type, type[[i]])
177
+ }
178
+
179
+ # grab all abstract
180
+ abstract <- list()
181
+ for (i in 1:nrow(search_data)) {
182
+ abstract[[i]] <- rep(search_data$ab[i], each = authors_length[i])
183
+ }
184
+
185
+ all_abstracts <- c()
186
+ for (i in 1:length(abstract)) {
187
+ all_abstracts <- c(all_abstracts, abstract[[i]])
188
+ }
189
+
190
+ # grab all referenced works
191
+ referenced <- list()
192
+ for (i in 1:nrow(search_data)) {
193
+ referenced[[i]] <- rep(search_data$referenced_works[i], each = authors_length[i])
194
+ }
195
+
196
+ all_referenced <- c()
197
+ for (i in 1:length(referenced)) {
198
+ all_referenced <- c(all_referenced, referenced[[i]])
199
+ }
200
+
201
+ # update the authors data frame
202
+ {
203
+ authorAtt_df$Publication <- all_publications
204
+ authorAtt_df$`Abstract` <- all_abstracts
205
+ authorAtt_df$`Publication Type` <- all_type
206
+ authorAtt_df$`Publication Year` <- all_pub_year
207
+ authorAtt_df$`Cited By Count` <- all_citedby_count
208
+ authorAtt_df$`Referenced Works` <- all_referenced
209
+ authorAtt_df$`Host Organization` <- all_hostOrg
210
+ authorAtt_df$SO <- all_so
211
+ }
212
+
213
+
214
+ # filter out missing values from the data frame
215
+ authorAtt_df <- authorAtt_df |>
216
+ na.omit()
217
+
218
+ # move abstract column to behind Publication
219
+ authorAtt_df <- authorAtt_df |>
220
+ relocate(Abstract, .after = Publication)
221
+
222
+ # rearrange columns for NodeXL flat file csv format
223
+ authorAtt_df <- authorAtt_df |>
224
+ relocate(Publication, .after = Authors)
225
+
226
+
227
+ # rename columns
228
+ colnames(authorAtt_df)[c(1:13)] <- c(
229
+ "Vertex1",
230
+ "Vertex2",
231
+ "Vertex1 Position",
232
+ "Vertex1 Affiliation",
233
+ "Vertex1 Institution",
234
+ "Vertex1 Institution Country",
235
+ "Vertex2 Abstract",
236
+ "Vertex2 Type",
237
+ "Vertex2 Publication Year",
238
+ "Vertex2 Cited By Count",
239
+ "Vertex2 Referenced Works",
240
+ "Vertex2 Host Organization",
241
+ "Vertex2 SO"
242
+ )
243
+
244
+ list2vec <- function(x){
245
+ paste(x,collapse = " ")
246
+ }
247
+ # convert list column into character column
248
+ authorAtt_df$`Vertex2 Referenced Works` <- sapply(authorAtt_df$`Vertex2 Referenced Works`,list2vec)
249
+
250
+
251
+ return(authorAtt_df)
252
+
253
+
254
+
255
+ }
256
+
257
+ # test software program
258
+ # mydata <- OpenAlex4NodeXL(
259
+ # keywords = c("software", "information"),
260
+ # pub_start_date = "2019-01-01",
261
+ # pub_end_date = "2023-09-30"
262
+ # )
263
+
264
+ #
265
+ # view returned data
266
+ # mydata |> view()
code.js ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // query key press action on keyboard
2
+ document
3
+ .addEventListener("keydown",function(event){
4
+ // if the user presses the "Enter" key on the keyboard
5
+ if(event.key === "Enter"){
6
+ // cancel the default action if any
7
+ event.preventDefault();
8
+ // trigger the button element with a click
9
+ document.getElementById("query").click();
10
+
11
+ }
12
+ });