-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGsInt.R
More file actions
60 lines (49 loc) · 1.9 KB
/
GsInt.R
File metadata and controls
60 lines (49 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
GsIntLap <- function(int, n){
# crawling a number of scholars' information in google scholar for specific
# field
# 1st argument 'int' refers to the specific field that is interested
# 2nd 'n' is for how many people you want to search
# load package
lapply(c('xml2', 'rvest', 'dplyr', 'ggplot2', 'stringr', 'data.table'), require, character.only=T)
dat <- c()
if(n %% 10 != 0){
iteration <- n %/% 10 + 1
}else{
iteration <- n/10
}
page <- read_html(paste0('https://scholar.google.co.kr/citations?hl=ko&view_op=search_authors&mauthors=label%3A',
int,
'&btnG='),
encoding = 'gb2312')
dat <- lapply(1:iteration, function(i){
authors <- page %>%
html_nodes('#gsc_sa_ccl') %>%
html_nodes('.gs_ai_name') %>%
html_text()
aff <- page %>%
html_nodes('#gsc_sa_ccl') %>%
html_nodes('.gs_ai_aff') %>%
html_text()
cited <- page %>%
html_nodes('#gsc_sa_ccl') %>%
html_nodes('.gs_ai_cby') %>%
html_text() %>%
gsub('\\D', '', .)
dat <- cbind(authors, aff, cited) %>% as.data.frame()
next_botton <- page %>%
html_nodes('#gsc_authors_bottom_pag') %>%
html_nodes('.gsc_pgn') %>%
html_nodes('button') %>%
html_attr('onclick')
next_botton_author <- strsplit(next_botton[2], 'x3d')[[1]][6] %>% str_sub(., 1, 12)
next_botton_num <- strsplit(next_botton[2], 'x3d')[[1]][7] %>% str_sub(., 1, 2)
page <- read_html(paste0('https://scholar.google.co.kr/citations?view_op=search_authors&hl=ko&mauthors=label:',
int,
'&after_author=',
next_botton_author,
'&astart=',
next_botton_num),
encoding = 'gb2312')
return(dat)
}) %>% rbindlist()
}