Skip to content

Commit

Permalink
Run git remote-ls in batches of 1000
Browse files Browse the repository at this point in the history
  • Loading branch information
jeroen committed Nov 25, 2024
1 parent e0f7216 commit d2a834e
Showing 1 changed file with 21 additions and 3 deletions.
24 changes: 21 additions & 3 deletions R/update.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ update_submodules <- function(path = '.', skip = '.registry'){
withr::local_dir(path)
repo <- gert::git_open(path)
submodules <- gert::git_submodule_list(repo = repo)
submodules$upstream <- remote_heads_many(submodules$url, submodules$branch)
submodules$upstream <- remote_heads_in_batches(submodules$url, submodules$branch)
for(i in seq_len(nrow(submodules))){
info <- as.list(submodules[i,])
if(info$path %in% skip) next
Expand All @@ -46,7 +46,7 @@ submodules_up_to_date <- function(skip_broken = TRUE, path = '.'){
withr::local_dir(path)
repo <- gert::git_open(path)
submodules <- gert::git_submodule_list(repo = repo)
submodules$upstream <- remote_heads_many(submodules$url, submodules$branch)
submodules$upstream <- remote_heads_in_batches(submodules$url, submodules$branch)
isok <- which(submodules$upstream == submodules$head)
fine <- submodules$path[isok]
broken <- submodules[is.na(submodules$upstream),]
Expand Down Expand Up @@ -129,7 +129,7 @@ parse_raw_gitpack <- function(buf){
}

remote_heads_many <- function(repos, refs = NULL, verbose = TRUE){
pool <- curl::multi_set(multiplex = TRUE) # use default pool
pool <- curl::new_pool(multiplex = TRUE, host_con = 1L)
len <- length(repos)
out <- rep(NA_character_, len)
completed <- 0
Expand Down Expand Up @@ -164,6 +164,24 @@ remote_heads_many <- function(repos, refs = NULL, verbose = TRUE){
}, fail = message, pool = pool)
})
curl::multi_run(pool = pool)
rm(pool); gc() #close connection
cat("\n", file = stderr())
out
}

# GitHub does not like too many requests at once so we wait a bit
remote_heads_in_batches <- function(repos, refs){
ngroups <- ceiling(length(repos)/1000)
batch <- sample(seq_len(ngroups), length(repos), replace = TRUE)
output <- rep(NA, length(repos))
for(group in seq_len(ngroups)) {
sx <- batch == group
if(group > 1) {
message("Done! Waiting for a bit for the next batch...")
Sys.sleep(60)
}
message("Starting batch: ", group)
output[sx] <- remote_heads_many(repos[sx], refs[sx])
}
return(output)
}

0 comments on commit d2a834e

Please sign in to comment.