diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index 7348bfc699bb..e8584f57adf3 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -352,8 +352,7 @@ private Expr getUltimateReceiver(MethodCall call) { } // A call to `find`, `where`, etc. that may return active record model object(s) -private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode -{ +class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode { private ActiveRecordModelClass cls; ActiveRecordModelFinderCall() { diff --git a/ruby/ql/src/queries/performance/CouldBeAsync.qhelp b/ruby/ql/src/queries/performance/CouldBeAsync.qhelp new file mode 100644 index 000000000000..280ac776799c --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBeAsync.qhelp @@ -0,0 +1,24 @@ + + + + +

+When an AcriveRecord query is executed synchronously, the application is blocked until the query completes. This can lead to poor performance, especially when the query is slow or when many queries are executed in sequence. This query identifies ActiveRecord queries that could be executed asynchronously to improve performance. +Specifically, this query identifies ActiveRecord queries that are executed in a loop. If each query is independent of the others, the queries could be executed in parallel by using the built-in Ruby load_async method. +In those cases, where the query includes a pluck call, the query could be executed asynchronously by using the async_pluck method. +

+
+ +

If possible, split the loop into two. The first creates a map of promises resolving to the async query results. The second runs throuhg this map, waiting on each promise, and does whatever the original loop did with the result of the query.

+
+ +

The following (suboptimal) example code executes a series of ActiveRecord queries in a loop. The queries are independent of each other, so they could be executed in parallel to improve performance.

+ +

To be able to fetch the necessary information asynchronously, we first pull it out into its own (implicit) loop: + +

We can now use the async_pluck method to execute the queries in parallel.

+ +
+
\ No newline at end of file diff --git a/ruby/ql/src/queries/performance/CouldBeAsync.ql b/ruby/ql/src/queries/performance/CouldBeAsync.ql new file mode 100644 index 000000000000..b4e4ea686d96 --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBeAsync.ql @@ -0,0 +1,93 @@ +/** + * @name Could be async + * @description Use `ActiveRecord::Relation#load_async` to load records asynchronously. + * @kind problem + * @problem.severity info + * @precision high + * @id rb/could-be-async + * @tags performance + */ + +import ruby +private import codeql.ruby.AST +import codeql.ruby.Concepts +import codeql.ruby.frameworks.ActiveRecord +private import codeql.ruby.TaintTracking + +string loopMethodName() { + result in [ + "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?", + "collect", "collect!", "select", "select!", "reject", "reject!" + ] +} + +class LoopingCall extends DataFlow::CallNode { + DataFlow::CallableNode loopBlock; + + LoopingCall() { + this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable() + } + + DataFlow::CallableNode getLoopBlock() { result = loopBlock } +} + +predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) { + loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope() +} + +predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) { + exists(LoopingCall innerLoop | + happensInLoop(outerLoop, innerLoop) and + happensInLoop(innerLoop, e) + ) +} + +predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) { + happensInLoop(loop, e) and + not happensInOuterLoop(loop, e) +} + +private class PluckCall extends ActiveRecordInstanceMethodCall { + PluckCall() { this.getMethodName() in ["pluck"] } + + ActiveRecordInstance chaines() { result = getChain(this) } +} + +private ActiveRecordInstance getChain(ActiveRecordInstanceMethodCall c) { + result = c.getInstance() + or + result = getChain(c.getInstance()) +} + +// The ActiveRecord instance is used to potentially control the loop +predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) { + TaintTracking::localTaint(ar, guard) and + guard = guardForLoopControl(_, _) +} + +// A guard for controlling the loop +DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) { + result.asExpr().getAstNode() = cond.getCondition().getAChild*() and + ( + control.(MethodCall).getMethodName() = "raise" + or + control instanceof NextStmt + ) and + control = cond.getBranch(_).getAChild() +} + +from LoopingCall loop, DataFlow::CallNode call, string message +where + not call.getLocation().getFile().getAbsolutePath().matches("%test%") and + not call = any(PluckCall p).chaines() and + not usedInLoopControlGuard(call, _) and + happensInInnermostLoop(loop, call) and + ( + call instanceof ActiveRecordModelFinderCall and + not call.getMethodName() in ["new", "create"] and + message = "could be chained with load_async" + or + call instanceof PluckCall and + message = "could be async_pluck" + ) +select call, "This call happens inside $@, and " + message, loop, "this loop" diff --git a/ruby/ql/src/queries/performance/examples/async_pluck.rb b/ruby/ql/src/queries/performance/examples/async_pluck.rb new file mode 100644 index 000000000000..daedac75db6b --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/async_pluck.rb @@ -0,0 +1,16 @@ +require 'async_pluck' + +# Preload User data in parallel +user_data = User.where(login: repo_names_by_owner.keys).async_pluck(:login, :id, :type).to_h do |login, id, type| + [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }] +end + +repo_names_by_owner.each do |owner_slug, repo_names| + owner_info = user_data[owner_slug] + owner_id = owner_info[:id] + owner_type = owner_info[:type] + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg +end \ No newline at end of file diff --git a/ruby/ql/src/queries/performance/examples/preload.rb b/ruby/ql/src/queries/performance/examples/preload.rb new file mode 100644 index 000000000000..19c0af583f7f --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/preload.rb @@ -0,0 +1,14 @@ +# Preload User data +user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type| + [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }] + end + + repo_names_by_owner.each do |owner_slug, repo_names| + owner_info = user_data[owner_slug] + owner_id = owner_info[:id] + owner_type = owner_info[:type] + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg + end \ No newline at end of file diff --git a/ruby/ql/src/queries/performance/examples/straight_loop.rb b/ruby/ql/src/queries/performance/examples/straight_loop.rb new file mode 100644 index 000000000000..4c64d3bb84f2 --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/straight_loop.rb @@ -0,0 +1,8 @@ +repo_names_by_owner.map do |owner_slug, repo_names| + owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first + owner_type = owner_type == "User" ? "USER" : "ORGANIZATION" + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg + end \ No newline at end of file