-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5228 from GeorgianaElena/maap-cluster
maap: new cluster
- Loading branch information
Showing
9 changed files
with
452 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
name: maap | ||
provider: aws # https://916098889494.signin.aws.amazon.com/console | ||
aws: | ||
key: enc-deployer-credentials.secret.json | ||
clusterType: eks | ||
clusterName: maap | ||
region: us-west-2 | ||
billing: | ||
paid_by_us: false | ||
support: | ||
helm_chart_values_files: | ||
- support.values.yaml | ||
- enc-support.secret.values.yaml | ||
hubs: | ||
[] | ||
# Uncomment the lines below once the support infrastructure was deployed and | ||
# you are ready to add the first cluster | ||
|
||
# - name: staging | ||
# # Tip: consider changing this to something more human friendly | ||
# display_name: "maap - staging" | ||
# domain: staging.maap.2i2c.cloud | ||
# helm_chart: basehub | ||
# helm_chart_values_files: | ||
# - common.values.yaml | ||
# - staging.values.yaml | ||
# - enc-staging.secret.values.yaml | ||
|
||
# - name: prod | ||
# # Tip: consider changing this to something more human friendly | ||
# display_name: "maap - prod" | ||
# domain: prod.maap.2i2c.cloud | ||
# helm_chart: basehub | ||
# helm_chart_values_files: | ||
# - common.values.yaml | ||
# - prod.values.yaml | ||
# - enc-prod.secret.values.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"AccessKey": { | ||
"AccessKeyId": "ENC[AES256_GCM,data:JMiFl1UnzusCQNlEOBsYvHa+9Uo=,iv:CC0kCAIAbQXtJE4aWfvXd63FWVSuO9To2L8aKkHRgo4=,tag:r2ZlXvm+UtsVyim0WI0M9Q==,type:str]", | ||
"SecretAccessKey": "ENC[AES256_GCM,data:w6Agme4BM109uRDH2CXIp9ffqeD6xXe/Rw6ed2X8uN42CecK1vamNQ==,iv:7eEROA5OrThNMgq9dsHeVyFFsSUbksmt1kA0f5dBDXA=,tag:5UD9cGGNEKvw20Cril4evw==,type:str]", | ||
"UserName": "ENC[AES256_GCM,data:GcAK1BJTZVmJGoVxeRb4zErA7RA371Y=,iv:6udAmDeSfJ2DO8j+/aINVF4PSjhQs+j5BxBSA2llB9Y=,tag:zYLlltSLTCH01wxrr5mffg==,type:str]" | ||
}, | ||
"sops": { | ||
"kms": null, | ||
"gcp_kms": [ | ||
{ | ||
"resource_id": "projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs", | ||
"created_at": "2024-12-04T12:21:40Z", | ||
"enc": "CiUA4OM7eOtAu8gt5nq+Tr+m64LsqMU7YruHfYzFWFswrGfKO5SgEkkAnGhyNghFbi9rWO0BUsWs199nUCTeQOOebtO8KFEMrbH5bejuZDyjRar2fU3WyUKxlBRuywgZySqZgJ9Ut+LDL+c2LdWZD+Qz" | ||
} | ||
], | ||
"azure_kv": null, | ||
"hc_vault": null, | ||
"age": null, | ||
"lastmodified": "2024-12-04T12:21:41Z", | ||
"mac": "ENC[AES256_GCM,data:kuyRynza4+RG2CGJyYQgUqjLAEZiCrjRvTpR/ciO0yKoRhFzykkbg12J/1y4M4eqlsezvUfyqE+EUtsBaISH1mg8nIuchHi6sRz9XAjQeLX3cwrEPlItH7sUjjGOTbRhcHna+zXVoM2q6gxIpEdNaNq/vPtAKs9TGCRRkw1NfSQ=,iv:RvP7hU6/6kJOBStTO5FEACDPwDA5tBYvjEptdGDRcOA=,tag:as4VS4owv5yZ2c0s+lbZ8A==,type:str]", | ||
"pgp": null, | ||
"unencrypted_suffix": "_unencrypted", | ||
"version": "3.8.1" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
prometheusIngressAuthSecret: | ||
username: ENC[AES256_GCM,data:1Fs5zwh1wn4/8KWnSoswC/KiW/1jw8CJxUSnOLne6KRI1W9uftsJt43FmRdzQMqsiadc291Jo74/YWBFBC1khw==,iv:ouHNVDQcyfsHQ7zj144fVEfqQX7oIez0uLmCDeO47dw=,tag:MxKMSNP+DVTBdQbBRIxA+Q==,type:str] | ||
password: ENC[AES256_GCM,data:qtItFIiARguwpejHWHBDSoKOl4uilmXgEkC4nBonqqWoCkMBHBDFCAr7qbH+fwep+1+yNUkuDXKJE6l0zp/gqw==,iv:8Pcbr2lulRPc0wPYOtgLez2lBLa+PKfxmd/SA75VLpY=,tag:mzZukJ3yv+IPxxDO22O9Sg==,type:str] | ||
sops: | ||
kms: [] | ||
gcp_kms: | ||
- resource_id: projects/two-eye-two-see/locations/global/keyRings/sops-keys/cryptoKeys/similar-hubs | ||
created_at: "2024-12-04T11:17:12Z" | ||
enc: CiUA4OM7eInxKKOnVMfm7f3ZEMUF8+vdF7TSx3WQo65HugraH6wMEkkAnGhyNpIACP7jUyAu/WPOXEmSwhwAXVaQGCMbgWbeuh0A+qvSUieMHE53t/VCgGa5n0Dnitr/jqchmhNaJQfs4GyoxgF3RbAp | ||
azure_kv: [] | ||
hc_vault: [] | ||
age: [] | ||
lastmodified: "2024-12-04T11:17:12Z" | ||
mac: ENC[AES256_GCM,data:9hrfgDF4tkpynItWcIkFTIGF8GRxeCXm0vcdMwcuNAx4E/vC/WMKxES3LFK2ygNzSljKZ3C76F3ipHjEioognquZQoEZWF22tAcJHFfc1VGa9iR6Dh22z4X33UcEZFELXBDJUPI01YWEOybqx74Khd13Yo8ht61vnUsDEbvEPTY=,iv:EwWG5H90WIEoX1T46DDaSvascSafppbtRvQPW9byerY=,tag:wDIatpNvUyHBzLSqzhabkQ==,type:str] | ||
pgp: [] | ||
unencrypted_suffix: _unencrypted | ||
version: 3.8.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
prometheusIngressAuthSecret: | ||
enabled: true | ||
|
||
prometheus: | ||
server: | ||
ingress: | ||
enabled: true | ||
hosts: | ||
- prometheus.maap.2i2c.cloud | ||
tls: | ||
- secretName: prometheus-tls | ||
hosts: | ||
- prometheus.maap.2i2c.cloud | ||
|
||
grafana: | ||
grafana.ini: | ||
server: | ||
root_url: https://grafana.maap.2i2c.cloud/ | ||
auth.github: | ||
enabled: true | ||
allowed_organizations: 2i2c-org | ||
ingress: | ||
hosts: | ||
- grafana.maap.2i2c.cloud | ||
tls: | ||
- secretName: grafana-tls | ||
hosts: | ||
- grafana.maap.2i2c.cloud | ||
|
||
aws-ce-grafana-backend: | ||
enabled: true | ||
envBasedConfig: | ||
clusterName: maap | ||
serviceAccount: | ||
annotations: | ||
eks.amazonaws.com/role-arn: arn:aws:iam::916098889494:role/aws_ce_grafana_backend_iam_role | ||
|
||
cluster-autoscaler: | ||
enabled: true | ||
autoDiscovery: | ||
clusterName: maap | ||
awsRegion: us-west-2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
/* | ||
This file is a jsonnet template of a eksctl's cluster configuration file, | ||
that is used with the eksctl CLI to both update and initialize an AWS EKS | ||
based cluster. | ||
This file has in turn been generated from eksctl/template.jsonnet which is | ||
relevant to compare with for changes over time. | ||
To use jsonnet to generate an eksctl configuration file from this, do: | ||
jsonnet maap.jsonnet > maap.eksctl.yaml | ||
References: | ||
- https://eksctl.io/usage/schema/ | ||
*/ | ||
local ng = import "./libsonnet/nodegroup.jsonnet"; | ||
|
||
// place all cluster nodes here | ||
local clusterRegion = "us-west-2"; | ||
local masterAzs = ["us-west-2a", "us-west-2b", "us-west-2c"]; | ||
local nodeAz = "us-west-2a"; | ||
|
||
// Node definitions for notebook nodes. Config here is merged | ||
// with our notebook node definition. | ||
// A `node.kubernetes.io/instance-type label is added, so pods | ||
// can request a particular kind of node with a nodeSelector | ||
local notebookNodes = [ | ||
// staging | ||
{ | ||
instanceType: "r5.xlarge", | ||
namePrefix: "nb-staging", | ||
labels+: { "2i2c/hub-name": "staging" }, | ||
tags+: { "2i2c:hub-name": "staging" }, | ||
}, | ||
{ | ||
instanceType: "r5.4xlarge", | ||
namePrefix: "nb-staging", | ||
labels+: { "2i2c/hub-name": "staging" }, | ||
tags+: { "2i2c:hub-name": "staging" }, | ||
}, | ||
{ | ||
instanceType: "r5.16xlarge", | ||
namePrefix: "nb-staging", | ||
labels+: { "2i2c/hub-name": "staging" }, | ||
tags+: { "2i2c:hub-name": "staging" }, | ||
}, | ||
// prod | ||
{ | ||
instanceType: "r5.xlarge", | ||
namePrefix: "nb-prod", | ||
labels+: { "2i2c/hub-name": "prod" }, | ||
tags+: { "2i2c:hub-name": "prod" }, | ||
}, | ||
{ | ||
instanceType: "r5.4xlarge", | ||
namePrefix: "nb-prod", | ||
labels+: { "2i2c/hub-name": "prod" }, | ||
tags+: { "2i2c:hub-name": "prod" }, | ||
}, | ||
{ | ||
instanceType: "r5.16xlarge", | ||
namePrefix: "nb-prod", | ||
labels+: { "2i2c/hub-name": "prod" }, | ||
tags+: { "2i2c:hub-name": "prod" }, | ||
}, | ||
// gpus | ||
{ | ||
instanceType: "g4dn.xlarge", | ||
namePrefix: "gpu-staging", | ||
labels+: { "2i2c/hub-name": "staging" }, | ||
tags+: { | ||
"2i2c:hub-name": "staging", | ||
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" | ||
}, | ||
taints+: { | ||
"nvidia.com/gpu": "present:NoSchedule" | ||
}, | ||
// Allow provisioning GPUs across all AZs, to prevent situation where all | ||
// GPUs in a single AZ are in use and no new nodes can be spawned | ||
availabilityZones: masterAzs, | ||
}, | ||
{ | ||
instanceType: "g4dn.xlarge", | ||
namePrefix: "gpu-prod", | ||
labels+: { "2i2c/hub-name": "prod" }, | ||
tags+: { | ||
"2i2c:hub-name": "prod", | ||
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" | ||
}, | ||
taints+: { | ||
"nvidia.com/gpu": "present:NoSchedule" | ||
}, | ||
// Allow provisioning GPUs across all AZs, to prevent situation where all | ||
// GPUs in a single AZ are in use and no new nodes can be spawned | ||
availabilityZones: masterAzs, | ||
}, | ||
]; | ||
|
||
local daskNodes = [ | ||
// Node definitions for dask worker nodes. Config here is merged | ||
// with our dask worker node definition, which uses spot instances. | ||
// A `node.kubernetes.io/instance-type label is set to the name of the | ||
// *first* item in instanceDistribution.instanceTypes, to match | ||
// what we do with notebook nodes. Pods can request a particular | ||
// kind of node with a nodeSelector | ||
// | ||
// A not yet fully established policy is being developed about using a single | ||
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687. | ||
// | ||
{ | ||
namePrefix: "dask-staging", | ||
labels+: { "2i2c/hub-name": "staging" }, | ||
tags+: { "2i2c:hub-name": "staging" }, | ||
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } | ||
}, | ||
{ | ||
namePrefix: "dask-prod", | ||
labels+: { "2i2c/hub-name": "prod" }, | ||
tags+: { "2i2c:hub-name": "prod" }, | ||
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] } | ||
}, | ||
]; | ||
|
||
|
||
{ | ||
apiVersion: 'eksctl.io/v1alpha5', | ||
kind: 'ClusterConfig', | ||
metadata+: { | ||
name: "maap", | ||
region: clusterRegion, | ||
version: "1.30", | ||
tags+: { | ||
"ManagedBy": "2i2c", | ||
"2i2c.org/cluster-name": $.metadata.name, | ||
}, | ||
}, | ||
availabilityZones: masterAzs, | ||
iam: { | ||
withOIDC: true, | ||
}, | ||
// If you add an addon to this config, run the create addon command. | ||
// | ||
// eksctl create addon --config-file=maap.eksctl.yaml | ||
// | ||
addons: [ | ||
{ version: "latest", tags: $.metadata.tags } + addon | ||
for addon in | ||
[ | ||
{ name: "coredns" }, | ||
{ name: "kube-proxy" }, | ||
{ | ||
// vpc-cni is a Amazon maintained container networking interface | ||
// (CNI), where a CNI is required for k8s networking. The aws-node | ||
// DaemonSet in kube-system stems from installing this. | ||
// | ||
// Related docs: https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/network-plugins/ | ||
// https://docs.aws.amazon.com/eks/latest/userguide/managing-vpc-cni.html | ||
// | ||
name: "vpc-cni", | ||
attachPolicyARNs: ["arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"], | ||
# FIXME: enabling network policy enforcement didn't work as of | ||
# August 2024, what's wrong isn't clear. | ||
# | ||
# configurationValues ref: https://github.com/aws/amazon-vpc-cni-k8s/blob/HEAD/charts/aws-vpc-cni/values.yaml | ||
configurationValues: ||| | ||
enableNetworkPolicy: "false" | ||
|||, | ||
}, | ||
{ | ||
// aws-ebs-csi-driver ensures that our PVCs are bound to PVs that | ||
// couple to AWS EBS based storage, without it expect to see pods | ||
// mounting a PVC failing to schedule and PVC resources that are | ||
// unbound. | ||
// | ||
// Related docs: https://docs.aws.amazon.com/eks/latest/userguide/managing-ebs-csi.html | ||
// | ||
name: "aws-ebs-csi-driver", | ||
wellKnownPolicies: { | ||
ebsCSIController: true, | ||
}, | ||
# configurationValues ref: https://github.com/kubernetes-sigs/aws-ebs-csi-driver/blob/HEAD/charts/aws-ebs-csi-driver/values.yaml | ||
configurationValues: ||| | ||
defaultStorageClass: | ||
enabled: true | ||
|||, | ||
}, | ||
] | ||
], | ||
nodeGroups: [ | ||
n + {clusterName: $.metadata.name} for n in | ||
[ | ||
ng + { | ||
namePrefix: 'core', | ||
nameSuffix: 'a', | ||
nameIncludeInstanceType: false, | ||
availabilityZones: [nodeAz], | ||
ssh: { | ||
publicKeyPath: 'ssh-keys/maap.key.pub' | ||
}, | ||
instanceType: "r5.xlarge", | ||
minSize: 1, | ||
maxSize: 6, | ||
labels+: { | ||
"hub.jupyter.org/node-purpose": "core", | ||
"k8s.dask.org/node-purpose": "core", | ||
}, | ||
tags+: { | ||
"2i2c:node-purpose": "core" | ||
}, | ||
}, | ||
] + [ | ||
ng + { | ||
namePrefix: 'nb', | ||
availabilityZones: [nodeAz], | ||
minSize: 0, | ||
maxSize: 500, | ||
instanceType: n.instanceType, | ||
ssh: { | ||
publicKeyPath: 'ssh-keys/maap.key.pub' | ||
}, | ||
labels+: { | ||
"hub.jupyter.org/node-purpose": "user", | ||
"k8s.dask.org/node-purpose": "scheduler" | ||
}, | ||
taints+: { | ||
"hub.jupyter.org_dedicated": "user:NoSchedule", | ||
"hub.jupyter.org/dedicated": "user:NoSchedule", | ||
}, | ||
tags+: { | ||
"2i2c:node-purpose": "user" | ||
}, | ||
} + n for n in notebookNodes | ||
] + ( if daskNodes != null then | ||
[ | ||
ng + { | ||
namePrefix: 'dask', | ||
availabilityZones: [nodeAz], | ||
minSize: 0, | ||
maxSize: 500, | ||
ssh: { | ||
publicKeyPath: 'ssh-keys/maap.key.pub' | ||
}, | ||
labels+: { | ||
"k8s.dask.org/node-purpose": "worker" | ||
}, | ||
taints+: { | ||
"k8s.dask.org_dedicated" : "worker:NoSchedule", | ||
"k8s.dask.org/dedicated" : "worker:NoSchedule", | ||
}, | ||
tags+: { | ||
"2i2c:node-purpose": "worker" | ||
}, | ||
instancesDistribution+: { | ||
onDemandBaseCapacity: 0, | ||
onDemandPercentageAboveBaseCapacity: 0, | ||
spotAllocationStrategy: "capacity-optimized", | ||
}, | ||
} + n for n in daskNodes | ||
] else [] | ||
) | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEKAnc9uvG/u94tT0iBOzgpcIbtzYqn18Mrm0MGGscJc [email protected] |
Oops, something went wrong.