Skip to content

Commit

Permalink
Merge pull request #5198 from edwintorok/master
Browse files Browse the repository at this point in the history
CP-43755: parallelise PAM logins
  • Loading branch information
edwintorok authored Oct 12, 2023
2 parents 41773b4 + c5b0970 commit 239b358
Show file tree
Hide file tree
Showing 17 changed files with 194 additions and 12 deletions.
2 changes: 1 addition & 1 deletion ocaml/auth/dune
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
(names xa_auth xa_auth_stubs)
)
(name pam)
(c_library_flags -lpam)
(c_library_flags -lpam -lcrypt)
(wrapped false)
)

29 changes: 27 additions & 2 deletions ocaml/auth/xa_auth_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*/
/*
*/

/* must be at the beginning, it affects defines in other headers that cannot be reenabled later */
#define _GNU_SOURCE

#include <unistd.h>
#include <stdlib.h>
Expand Down Expand Up @@ -70,6 +71,30 @@ CAMLprim value stub_XA_mh_chpasswd(value username, value new_password){
CAMLreturn(ret);
}

#include <crypt.h>
/* 'constructor' attribute will ensure this function gets call early during program startup. */
void __attribute__((constructor)) stub_XA_workaround(void)
{
struct crypt_data data;
memset(&data, 0, sizeof(data));

/* Initialize and load crypt library used for password hashing.
This library is loaded and initialized at [pam_authenticate] time and not at [pam_start].
If it detects a race condition (multiple threads with their own PAM contexts trying to call 'crypt_r'),
then it does [sleep 1] as can be seen in this call trace:
[pam_authenticate -> crypt_r -> __sha512_crypt_r -> freebl_InitVector -> freebl_RunLoaderOnce -> sleep].
As a workaround link with 'libcrypt' and call 'crypt_r' with a setting that will make it take tha sha512 route
to ensure that the library gets initialized while we're still single-threaded and stays loaded and initialized.
'$6$' is the setting for sha512 according to crypt(5).
Upstream has switched to using libxcrypt instead which doesn't have these problems, when we switch then
this workaround can be dropped.
*/
crypt_r("", "$6$", &data);
}

/*
* Local variables:
* mode: C
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/datamodel_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ open Datamodel_roles
to leave a gap for potential hotfixes needing to increment the schema version.*)
let schema_major_vsn = 5

let schema_minor_vsn = 766
let schema_minor_vsn = 767

(* Historical schema versions just in case this is useful later *)
let rio_schema_major_vsn = 5
Expand Down
10 changes: 10 additions & 0 deletions ocaml/idl/datamodel_lifecycle.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ let prototyped_of_field = function
Some "23.9.0"
| "pool", "telemetry_uuid" ->
Some "23.9.0"
| "pool", "ext_auth_max_threads" ->
Some "23.26.0-next"
| "pool", "local_auth_max_threads" ->
Some "23.26.0-next"
| "pool", "coordinator_bias" ->
Some "22.37.0"
| "pool", "migration_compression" ->
Expand Down Expand Up @@ -91,8 +95,14 @@ let prototyped_of_message = function
Some "22.26.0"
| "VTPM", "create" ->
Some "22.26.0"
| "host", "apply_recommended_guidances" ->
Some "23.18.0"
| "host", "set_https_only" ->
Some "22.27.0"
| "pool", "set_ext_auth_max_threads" ->
Some "23.26.0-next"
| "pool", "set_local_auth_max_threads" ->
Some "23.26.0-next"
| "pool", "set_update_sync_enabled" ->
Some "23.18.0"
| "pool", "configure_update_sync" ->
Expand Down
18 changes: 18 additions & 0 deletions ocaml/idl/datamodel_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,16 @@ let set_update_sync_enabled =
]
~allowed_roles:_R_POOL_OP ()

let set_local_auth_max_threads =
call ~flags:[`Session] ~name:"set_local_auth_max_threads" ~lifecycle:[]
~params:[(Ref _pool, "self", "The pool"); (Int, "value", "The new maximum")]
~allowed_roles:_R_POOL_OP ()

let set_ext_auth_max_threads =
call ~flags:[`Session] ~name:"set_ext_auth_max_threads" ~lifecycle:[]
~params:[(Ref _pool, "self", "The pool"); (Int, "value", "The new maximum")]
~allowed_roles:_R_POOL_OP ()

(** A pool class *)
let t =
create_obj ~in_db:true ~in_product_since:rel_rio ~in_oss_since:None
Expand Down Expand Up @@ -1189,6 +1199,8 @@ let t =
; reset_telemetry_uuid
; configure_update_sync
; set_update_sync_enabled
; set_local_auth_max_threads
; set_ext_auth_max_threads
]
~contents:
([uid ~in_oss_since:None _pool]
Expand Down Expand Up @@ -1419,6 +1431,12 @@ let t =
"coordinator_bias"
"true if bias against pool master when scheduling vms is enabled, \
false otherwise"
; field ~qualifier:StaticRO ~ty:Int ~default_value:(Some (VInt 8L))
~lifecycle:[] "local_auth_max_threads"
"Maximum number of threads to use for PAM authentication"
; field ~qualifier:StaticRO ~ty:Int ~default_value:(Some (VInt 1L))
~lifecycle:[] "ext_auth_max_threads"
"Maximum number of threads to use for external (AD) authentication"
; field ~lifecycle:[] ~qualifier:DynamicRO ~ty:(Ref _secret)
~default_value:(Some (VRef null_ref)) "telemetry_uuid"
"The UUID of the pool for identification of telemetry data"
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/dune
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
; use the binary promoted file from the source dir (not the build dir) that has
; the correct version number embedded
(rule
(deps gen_lifecycle.exe)
(deps gen_lifecycle.exe (universe))
(action (with-stdout-to datamodel_lifecycle.ml.generated (system %{project_root}/../../ocaml/idl/gen_lifecycle.exe))))

; 'diff' handles promotion too, see https://dune.readthedocs.io/en/stable/concepts.html?highlight=diffing#diffing-and-promotion
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/schematest.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ let hash x = Digest.string x |> Digest.to_hex

(* BEWARE: if this changes, check that schema has been bumped accordingly in
ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *)
let last_known_schema_hash = "f4d0ee4f27d7fc0377add334197f6cd8"
let last_known_schema_hash = "95077aed35b715c362c7cf98902de578"

let current_schema_hash : string =
let open Datamodel_types in
Expand Down
3 changes: 2 additions & 1 deletion ocaml/tests/common/test_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,8 @@ let make_pool ~__context ~master ?(name_label = "") ?(name_description = "")
~repository_proxy_url ~repository_proxy_username ~repository_proxy_password
~migration_compression ~coordinator_bias ~telemetry_uuid
~telemetry_frequency ~telemetry_next_collection ~last_update_sync
~update_sync_frequency ~update_sync_day ~update_sync_enabled ;
~local_auth_max_threads:8L ~ext_auth_max_threads:8L ~update_sync_frequency
~update_sync_day ~update_sync_enabled ;
pool_ref

let default_sm_features =
Expand Down
3 changes: 2 additions & 1 deletion ocaml/xapi/dbsync_master.ml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ let create_pool_record ~__context =
~telemetry_next_collection:Xapi_stdext_date.Date.epoch
~last_update_sync:Xapi_stdext_date.Date.epoch
~update_sync_frequency:`weekly ~update_sync_day:0L
~update_sync_enabled:false
~update_sync_enabled:false ~local_auth_max_threads:8L
~ext_auth_max_threads:1L

let set_master_ip ~__context =
let ip =
Expand Down
52 changes: 52 additions & 0 deletions ocaml/xapi/locking_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,55 @@ module Named_mutex = struct
finally f (fun () -> Thread_state.released r)
)
end

module Semaphore = struct
type t = {
name: string
; sem: Semaphore.Counting.t
; max_lock: Mutex.t
; mutable max: int
}

let create name =
let max = 1 in
{name; sem= Semaphore.Counting.make max; max_lock= Mutex.create (); max}

let execute (x : t) f =
Semaphore.Counting.acquire x.sem ;
let finally () = Semaphore.Counting.release x.sem in
Fun.protect ~finally f

let set_max t n =
if n < 1 then
Fmt.invalid_arg
"The semaphore '%s' must have at least 1 resource available, \
requested: %d"
t.name n ;
(* ensure only 1 thread attempts to modify the maximum at a time, this is a slow path *)
with_lock t.max_lock @@ fun () ->
D.debug
"Setting semaphore '%s' to have at most %d resource (current max: %d)"
t.name n t.max ;

(* requested to decrease maximum, this might block *)
while t.max > n do
if not (Semaphore.Counting.try_acquire t.sem) then (
D.debug
"Semaphore '%s' has >%d resources in use, waiting until some of them \
are released"
t.name n ;
(* may block *)
Semaphore.Counting.acquire t.sem
) ;
t.max <- t.max - 1
done ;

(* requested to increase maximum, this doesn't block *)
while t.max < n do
(* doesn't block, semaphores can also be acquired and released from any thread *)
Semaphore.Counting.release t.sem ;
t.max <- t.max + 1
done ;
D.debug "Semaphore '%s' updated to have %d resources (%d in use)" t.name n
(Semaphore.Counting.get_value t.sem)
end
24 changes: 24 additions & 0 deletions ocaml/xapi/locking_helpers.mli
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,27 @@ module Named_mutex : sig

val execute : t -> (unit -> 'a) -> 'a
end

module Semaphore : sig
(** a semaphore that allows at most N operations to proceed at a time *)
type t

val create : string -> t
(** [create name] creates a semaphore with an initial count of 1.
@see {!set_max} *)

val execute : t -> (unit -> 'a) -> 'a
(** [execute sem f] executes [f] after acquiring the [sem]aphore.
Releases the semaphore on all codepaths. *)

val set_max : t -> int -> unit
(** [set_max sem n] sets the semaphore's maximum count to [n].
Once all threads that are inside {!execute} release the semaphore then the semaphore will accept
at most [n] {!execute} calls in parallel until it blocks.
Increasing a semaphore's count is done immediately,
whereas decreasing the count may block until sufficient number of threads release the semaphore.
It is safe to call this function in parallel.
*)
end
12 changes: 12 additions & 0 deletions ocaml/xapi/message_forwarding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,18 @@ functor
(pool_uuid ~__context self)
value ;
Local.Pool.set_update_sync_enabled ~__context ~self ~value

let set_local_auth_max_threads ~__context ~self ~value =
info "%s: pool='%s' value='%Ld'" __FUNCTION__
(pool_uuid ~__context self)
value ;
Local.Pool.set_local_auth_max_threads ~__context ~self ~value

let set_ext_auth_max_threads ~__context ~self ~value =
info "%s: pool='%s' value='%Ld'" __FUNCTION__
(pool_uuid ~__context self)
value ;
Local.Pool.set_ext_auth_max_threads ~__context ~self ~value
end

module VM = struct
Expand Down
13 changes: 13 additions & 0 deletions ocaml/xapi/xapi.ml
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,16 @@ let server_init () =
has just started up we want to wait forever for the master to appear. (See CA-25481) *)
let initial_connection_timeout = !Master_connection.connection_timeout in
Master_connection.connection_timeout := -1. ;

(* never timeout *)
let initialize_auth_semaphores ~__context =
let pool = Helpers.get_pool ~__context in
Xapi_session.set_local_auth_max_threads
(Db.Pool.get_local_auth_max_threads ~__context ~self:pool) ;
Xapi_session.set_ext_auth_max_threads
(Db.Pool.get_ext_auth_max_threads ~__context ~self:pool)
in

let call_extauth_hook_script_after_xapi_initialize ~__context =
(* CP-709 *)
(* in each initialization of xapi, extauth_hook script must be called in case this host was *)
Expand Down Expand Up @@ -1390,6 +1399,10 @@ let server_init () =
, []
, fun () -> Storage_migrate.killall ~dbg:"xapi init"
)
; ( "Initialize threaded authentication"
, [Startup.NoExnRaising]
, fun () -> initialize_auth_semaphores ~__context
)
; (* Start the external authentification plugin *)
( "Calling extauth_hook_script_before_xapi_initialize"
, [Startup.NoExnRaising]
Expand Down
6 changes: 6 additions & 0 deletions ocaml/xapi/xapi_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3669,3 +3669,9 @@ let configure_update_sync ~__context ~self ~update_sync_frequency
let set_update_sync_enabled ~__context ~self ~value =
Pool_periodic_update_sync.set_enabled ~__context ~value ;
Db.Pool.set_update_sync_enabled ~__context ~self ~value

let set_local_auth_max_threads ~__context:_ ~self:_ ~value =
Xapi_session.set_local_auth_max_threads value

let set_ext_auth_max_threads ~__context:_ ~self:_ ~value =
Xapi_session.set_ext_auth_max_threads value
6 changes: 6 additions & 0 deletions ocaml/xapi/xapi_pool.mli
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,9 @@ val configure_update_sync :

val set_update_sync_enabled :
__context:Context.t -> self:API.ref_pool -> value:bool -> unit

val set_local_auth_max_threads :
__context:Context.t -> self:API.ref_pool -> value:int64 -> unit

val set_ext_auth_max_threads :
__context:Context.t -> self:API.ref_pool -> value:int64 -> unit
18 changes: 14 additions & 4 deletions ocaml/xapi/xapi_session.ml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,17 @@ let local_superuser = "root"

let xapi_internal_originator = "xapi"

let serialize_auth = Mutex.create ()
let throttle_auth_internal = Locking_helpers.Semaphore.create "Internal auth"

let throttle_auth_external = Locking_helpers.Semaphore.create "External auth"

let with_throttle = Locking_helpers.Semaphore.execute

let set_local_auth_max_threads n =
Locking_helpers.Semaphore.set_max throttle_auth_internal @@ Int64.to_int n

let set_ext_auth_max_threads n =
Locking_helpers.Semaphore.set_max throttle_auth_external @@ Int64.to_int n

let wipe_string_contents str =
for i = 0 to Bytes.length str - 1 do
Expand All @@ -272,13 +282,13 @@ let wipe_params_after_fn params fn =
with e -> wipe params ; raise e

let do_external_auth uname pwd =
with_lock serialize_auth (fun () ->
with_throttle throttle_auth_external (fun () ->
(Ext_auth.d ()).authenticate_username_password uname
(Bytes.unsafe_to_string pwd)
)

let do_local_auth uname pwd =
with_lock serialize_auth (fun () ->
with_throttle throttle_auth_internal (fun () ->
try Pam.authenticate uname (Bytes.unsafe_to_string pwd)
with Failure msg ->
raise
Expand All @@ -288,7 +298,7 @@ let do_local_auth uname pwd =
)

let do_local_change_password uname newpwd =
with_lock serialize_auth (fun () ->
with_throttle throttle_auth_internal (fun () ->
Pam.change_password uname (Bytes.unsafe_to_string newpwd)
)

Expand Down
4 changes: 4 additions & 0 deletions ocaml/xapi/xapi_session.mli
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,7 @@ val get_failed_login_stats : unit -> string option

val get_total_sessions : unit -> Int64.t
(** Retrieves the amount of sessions opened since the last time xapi was started *)

val set_local_auth_max_threads : int64 -> unit

val set_ext_auth_max_threads : int64 -> unit

0 comments on commit 239b358

Please sign in to comment.