Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions R/EngineGraphQLGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,39 @@ EngineGraphQLGitLab <- R6::R6Class(
return(full_repos_list)
},

# Per-repo fallback: first list project paths with a minimal
# query, then fetch each repo individually with a lightweight
# query (no languages / issueStatusCounts).
get_repos_from_org_per_repo = function(org, verbose = TRUE) {
full_paths <- private$get_repo_paths_from_org(
org = org,
verbose = verbose
)
if (inherits(full_paths, "graphql_error")) {
return(full_paths)
}
if (length(full_paths) == 0) {
return(list())
}
repos_list <- purrr::map(full_paths, function(full_path) {
response <- self$gql_response(
gql_query = self$gql_query$repo_by_fullpath_light(),
vars = list("fullPath" = full_path),
verbose = verbose
)
if (inherits(response, "graphql_error") ||
is.null(response$data$project)) {
if (verbose) {
cli::cli_alert_warning("Failed to fetch repo: {full_path}")
}
return(NULL)
}
list(node = response$data$project)
}) |>
purrr::compact()
return(repos_list)
},

prepare_repos_table = function(repos_list, org) {
if (length(repos_list) > 0) {
repos_table <- purrr::map(repos_list, function(repo) {
Expand Down Expand Up @@ -514,6 +547,38 @@ EngineGraphQLGitLab <- R6::R6Class(
}
),
private = list(
get_repo_paths_from_org = function(org, verbose = TRUE) {
full_paths <- list()
next_page <- TRUE
repo_cursor <- ""
while (next_page) {
response <- self$gql_response(
gql_query = self$gql_query$repos_by_org_minimal(),
vars = list(
"org" = org,
"repo_cursor" = repo_cursor
),
verbose = verbose
)
if (inherits(response, "graphql_error")) {
return(response)
}
core_response <- response$data$group$projects
paths <- purrr::map_chr(
core_response$edges,
~ .$node$fullPath
)
next_page <- core_response$pageInfo$hasNextPage
if (is.null(next_page)) next_page <- FALSE
if (length(paths) == 0) next_page <- FALSE
if (next_page) {
repo_cursor <- core_response$pageInfo$endCursor
}
full_paths <- c(full_paths, as.list(paths))
}
return(unlist(full_paths))
},

get_repos_page = function(org = NULL,
projects_ids = NULL,
type = "organization",
Expand Down
53 changes: 53 additions & 0 deletions R/GQLQueryGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,29 @@ GQLQueryGitLab <- R6::R6Class("GQLQueryGitLab",
}')
},

# Minimal query to list project paths in a group without
# expensive fields (languages, issues). Used as the first step
# of the per-repo fallback when repos_by_org fails.
repos_by_org_minimal = function() {
paste0('
query GetRepoPathsByOrg($org: ID! $repo_cursor: String!) {
group(fullPath: $org) {
projects(first: 100 after: $repo_cursor) {
count
pageInfo {
hasNextPage
endCursor
}
edges {
node {
fullPath
}
}
}
}
}')
},

repo_by_fullpath = function() {
paste0('
query GetRepoByFullPath($fullPath: ID!) {
Expand All @@ -82,6 +105,36 @@ GQLQueryGitLab <- R6::R6Class("GQLQueryGitLab",
}')
},

# Lightweight per-repo query that omits languages and
# issueStatusCounts to avoid complexity limits.
repo_by_fullpath_light = function() {
'
query GetRepoByFullPathLight($fullPath: ID!) {
project(fullPath: $fullPath) {
repo_id: id
repo_name: name
repo_path: path
repo_fullpath: fullPath
... on Project {
repository {
rootRef
lastCommit {
sha
}
}
}
stars: starCount
forks: forksCount
created_at: createdAt
last_activity_at: lastActivityAt
namespace {
path: fullPath
}
repo_url: webUrl
}
}'
},

issues_from_repo = function(issues_cursor = "") {
paste0('
query getIssuesFromRepo ($fullPath: ID!) {
Expand Down
107 changes: 100 additions & 7 deletions R/GitHostGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,27 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab",
engine_used <- graphql_engine
if (inherits(repos_from_org, "graphql_error")) {
if (verbose) {
cli::cli_alert_info("Switching to REST API...")
cli::cli_alert_info("Switching to per-repo GraphQL queries...")
}
rest_engine <- private$engines$rest
repos_from_org <- rest_engine$get_repos_from_org(
org = url_encode(org),
output = "raw",
repos_from_org <- graphql_engine$get_repos_from_org_per_repo(
org = url_decode(org),
verbose = verbose
)
engine_used <- rest_engine
} else {
if (inherits(repos_from_org, "graphql_error")) {
if (verbose) {
cli::cli_alert_info("Switching to REST API...")
}
rest_engine <- private$engines$rest
repos_from_org <- rest_engine$get_repos_from_org(
org = url_encode(org),
output = "raw",
verbose = verbose
)
engine_used <- rest_engine
}
}
if (!inherits(repos_from_org, "graphql_error") &&
engine_used == graphql_engine) {
repos_from_org <- purrr::map(repos_from_org, function(repos_data) {
repos_data$path <- repos_data$node$repo_path
repos_data
Expand Down Expand Up @@ -565,6 +576,88 @@ GitHostGitLab <- R6::R6Class("GitHostGitLab",
return(repos_table)
},

# Override parent to add a per-repo GraphQL fallback before REST.
# Fallback order: repos_by_org query -> per-repo GraphQL -> REST.
get_repos_from_orgs = function(add_languages, verbose, progress) {
if (any(c("all", "org") %in% private$searching_scope)) {
graphql_engine <- private$engines$graphql
gitstats_map(private$orgs, function(org) {
owner_type <- attr(org, "type") %||% "organization"
if (!private$scan_all && verbose) {
show_message(
host = private$host_name,
engine = "graphql",
scope = url_decode(org),
information = paste0("Pulling repositories ", cli_icons$repo)
)
}
repos_from_org <- graphql_engine$get_repos_from_org(
org = url_decode(org),
owner_type = owner_type,
verbose = verbose
)
if (!inherits(repos_from_org, "graphql_error")) {
if (length(repos_from_org) > 0) {
repos_table <- repos_from_org |>
graphql_engine$prepare_repos_table(
org = unclass(url_decode(org))
) |>
dplyr::filter(organization == unclass(url_decode(org)))
} else {
repos_table <- NULL
}
} else {
if (verbose) {
cli::cli_alert_info("Switching to per-repo GraphQL queries...")
show_message(
host = private$host_name,
engine = "graphql",
scope = url_decode(org),
information = paste0("Pulling repositories per-repo ", cli_icons$repo)
)
}
repos_from_org <- graphql_engine$get_repos_from_org_per_repo(
org = url_decode(org),
verbose = verbose
)
if (!inherits(repos_from_org, "graphql_error")) {
if (length(repos_from_org) > 0) {
repos_table <- repos_from_org |>
graphql_engine$prepare_repos_table(
org = unclass(url_decode(org))
) |>
dplyr::filter(organization == unclass(url_decode(org)))
} else {
repos_table <- NULL
}
} else {
if (verbose) {
cli::cli_alert_info("Switching to REST API")
show_message(
host = private$host_name,
engine = "rest",
scope = org,
information = paste0("Pulling repositories ", cli_icons$repo)
)
}
rest_engine <- private$engines$rest
repos_table <- rest_engine$get_repos_from_org(
org = org,
add_languages = add_languages,
output = "full_table",
verbose = verbose
) |>
rest_engine$prepare_repos_table(
org = org
)
}
}
return(repos_table)
}, .progress = set_progress_bar(progress, private)) |>
purrr::list_rbind()
}
},

# Override parent to query repos directly by fullpath instead of
# fetching all repos from org/user and filtering client-side.
# The parent's approach is slow for GitLab because the repos_by_user
Expand Down
106 changes: 0 additions & 106 deletions tests/testthat/_snaps/01-get_repos-GitLab.md

This file was deleted.

Loading