diff --git a/pgdb01-cirrus/templates/alert-email.yaml b/pgdb01-cirrus/templates/alert-email.yaml new file mode 100644 index 0000000..51150d3 --- /dev/null +++ b/pgdb01-cirrus/templates/alert-email.yaml @@ -0,0 +1,34 @@ +apiVersion: monitoring.coreos.com/v1alpha1 +kind: AlertmanagerConfig +metadata: + name: gdex-app-team + namespace: rda + labels: + alertmanagerConfig: gdex + namespace: rda +spec: + route: + receiver: gdex-app-team + groupBy: + - alertname + groupWait: 10s + groupInterval: 1m + repeatInterval: 60m + matchers: + - name: namespace + value: rda + matchType: "=" + routes: + - receiver: "null" + matchers: + - name: alertname + value: InfoInhibitor + matchType: "=" + + receivers: + - name: gdex-app-team + emailConfigs: + - to: decs-info@ucar.edu + from: alertmanager@k8s.ucar.edu + smarthost: vdir.ucar.edu:25 + - name: "null" \ No newline at end of file diff --git a/pgdb01-cirrus/templates/alert-rule.yaml b/pgdb01-cirrus/templates/alert-rule.yaml new file mode 100644 index 0000000..15ed310 --- /dev/null +++ b/pgdb01-cirrus/templates/alert-rule.yaml @@ -0,0 +1,48 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: gdex-pg-replication-alerts + namespace: rda + labels: + team: gdex-app-team + release: kube-prometheus-stack +spec: + groups: + - name: pg.replication + interval: 60s + rules: + - alert: PGReplicationLagHigh + expr: | + cnpg_pg_replication_lag{namespace="rda"} > 100 + for: 15m + labels: + severity: warning + team: gdex-app-team + namespace: rda + annotations: + summary: "PostgresDB replication lag high on {{`{{ $labels.pod }}`}}" + description: "Replication lag is {{`{{ $value }}`}} WAL segments behind on {{`{{ $labels.pod }}`}} in cluster {{`{{ $labels.cluster }}`}}" + + - alert: PGReplicationBroken + expr: | + cnpg_pg_replication_streaming_replicas{namespace="rda"} == 0 + for: 5m + labels: + severity: critical + team: gdex-app-team + namespace: rda + annotations: + summary: "PostgresDB cluster replication broken for {{`{{ $labels.cluster }}`}}" + description: "Cluster {{`{{ $labels.cluster }}`}} has no streaming replicas. Replication may be broken." + + - alert: PGClusterNotHealthy + expr: | + cnpg_collector_up{namespace="rda"} == 0 + for: 5m + labels: + severity: critical + team: gdex-app-team + namespace: rda + annotations: + summary: "PostgresDB cluster {{`{{ $labels.cluster }}`}} is not healthy" + description: "The Postgres exporter for cluster {{`{{ $labels.cluster }}`}} is down, indicating cluster health issues" \ No newline at end of file diff --git a/pgdb01-cirrus/templates/backups_external_secret.yaml b/pgdb01-cirrus/templates/backups_external_secret.yaml new file mode 100644 index 0000000..f7dd7e6 --- /dev/null +++ b/pgdb01-cirrus/templates/backups_external_secret.yaml @@ -0,0 +1,25 @@ +{{- if .Values.db.backups.s3.enabled }} +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: backup-s3-creds + namespace: {{ .Release.Namespace }} +spec: + data: + - remoteRef: + key: {{ .Values.db.backups.s3.secretPath }} + property: access_key + secretKey: access_key + - remoteRef: + key: {{ .Values.db.backups.s3.secretPath }} + property: secret_key + secretKey: secret_key + refreshInterval: 1h + secretStoreRef: + kind: SecretStore + name: rda-ro + target: + creationPolicy: Owner + deletionPolicy: Retain + name: {{ .Values.db.backups.s3.secretName }} +{{- end -}} diff --git a/pgdb01-cirrus/templates/postgres_cluster.yaml b/pgdb01-cirrus/templates/postgres_cluster.yaml index 49242ca..de0defb 100644 --- a/pgdb01-cirrus/templates/postgres_cluster.yaml +++ b/pgdb01-cirrus/templates/postgres_cluster.yaml @@ -10,6 +10,34 @@ spec: instances: {{ .Values.db.instances }} storage: size: {{ .Values.db.size }} + resources: + limits: + cpu: {{ .Values.db.resource.limits.cpu | quote }} + memory: {{ .Values.db.resource.limits.memory }} + + {{- if .Values.db.backups }} + backup: + {{- if .Values.db.backups.volumeSnapshot }} + volumeSnapshot: + className: {{ .Values.db.backups.volumeSnapshot.snapshotClassName }} + {{- end }} + {{- if .Values.db.backups.s3.enabled }} + barmanObjectStore: + wal: + compression: bzip2 + data: + compression: bzip2 + destinationPath: {{ .Values.db.backups.s3.destinationPath }} + endpointURL: {{ .Values.db.backups.s3.endpointURL }} + s3Credentials: + accessKeyId: + name: {{ .Values.db.backups.s3.secretName }} + key: access_key + secretAccessKey: + name: {{ .Values.db.backups.s3.secretName }} + key: secret_key + {{- end }} + {{- end }} # Add TLS certificates for encrypted communication certificates: @@ -22,19 +50,14 @@ spec: # Configure postgres superuser from su_external_secret superuserSecret: name: "{{ .Values.db.name }}-superuser" - - # Allow outside hosts to connect to the database + postgresql: parameters: # Connection settings - listen_addresses: "*" - port: "5432" max_connections: "500" # SSL Configuration - ssl: "on" ssl_ciphers: "HIGH:!aNULL" - ssl_prefer_server_ciphers: "on" ssl_min_protocol_version: "TLSv1.3" # Memory settings @@ -55,23 +78,16 @@ spec: min_wal_size: "1GB" # Replication settings - max_wal_senders: "3" - max_replication_slots: "3" - wal_keep_size: "256" - max_slot_wal_keep_size: "-1" - hot_standby: "on" + max_wal_senders: "6" + max_replication_slots: "6" + wal_keep_size: "1024MB" + max_slot_wal_keep_size: "100GB" max_standby_archive_delay: "-1" max_standby_streaming_delay: "-1" # Logging settings - log_destination: "stderr" logging_collector: "on" - log_directory: "log" - log_filename: "postgresql-%Y-%m-%d_%H%M%S.log" - log_file_mode: "0644" log_rotation_age: "0" - log_rotation_size: "1GB" - log_truncate_on_rotation: "off" log_min_duration_statement: "120000" log_line_prefix: "%t %a [%p] " log_timezone: "America/Denver" @@ -87,7 +103,7 @@ spec: # Lock management max_locks_per_transaction: "1024" - + pg_hba: # Local connections with md5 authentication - local all root md5 @@ -107,4 +123,7 @@ spec: - host replication all 127.0.0.1/32 md5 # Remote replication - - host replication all 128.117.0.0/16 trust \ No newline at end of file + - host replication all 128.117.0.0/16 trust + + # Remote replication from Kubernetes pod network + - host replication all 10.0.0.0/16 trust diff --git a/pgdb01-cirrus/templates/su_external_secret.yaml b/pgdb01-cirrus/templates/su_external_secret.yaml index 8ceb3bf..3567a2e 100644 --- a/pgdb01-cirrus/templates/su_external_secret.yaml +++ b/pgdb01-cirrus/templates/su_external_secret.yaml @@ -1,4 +1,4 @@ -apiVersion: external-secrets.io/v1beta1 +apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: {{ .Values.db.name }}-superuser-esos @@ -6,11 +6,10 @@ metadata: spec: refreshInterval: 1h secretStoreRef: - name: user-ro + name: rda-ro kind: SecretStore target: name: {{ .Values.db.name }}-superuser - type: kubernetes.io/basic-auth data: - secretKey: username remoteRef: diff --git a/pgdb01-cirrus/values.yaml b/pgdb01-cirrus/values.yaml index 3bae528..0d63122 100644 --- a/pgdb01-cirrus/values.yaml +++ b/pgdb01-cirrus/values.yaml @@ -1,9 +1,18 @@ db: name: pgdb01 group: pgdb01 - instances: 3 - size: 5000Gi + instances: 2 + size: 8000Gi superUser: usernameKey: username passwordKey: password - secretPath: gdex/pgdb01 \ No newline at end of file + secretPath: gdex/pgdb01 + resource: + limits: + cpu: '16' + memory: 128Gi + + backups: + enabled: false + s3: + enabled: false diff --git a/pgdb02-cirrus/templates/alert-email.yaml b/pgdb02-cirrus/templates/alert-email.yaml new file mode 100644 index 0000000..cd5933c --- /dev/null +++ b/pgdb02-cirrus/templates/alert-email.yaml @@ -0,0 +1,34 @@ +apiVersion: monitoring.coreos.com/v1alpha1 +kind: AlertmanagerConfig +metadata: + name: gdex-app-team + namespace: rda + labels: + alertmanagerConfig: gdex + namespace: rda +spec: + route: + receiver: gdex-app-team + groupBy: + - alertname + groupWait: 10s + groupInterval: 1m + repeatInterval: 60m + matchers: + - name: namespace + value: rda + matchType: "=" + routes: + - receiver: "null" + matchers: + - name: alertname + value: InfoInhibitor + matchType: "=" + + receivers: + - name: gdex-app-team + emailConfigs: + - to: decs-info@ucar.edu + from: alertmanager@k8s.ucar.edu + smarthost: vdir.ucar.edu:25 + - name: "null" \ No newline at end of file diff --git a/pgdb02-cirrus/templates/alert-rule.yaml b/pgdb02-cirrus/templates/alert-rule.yaml new file mode 100644 index 0000000..5357e24 --- /dev/null +++ b/pgdb02-cirrus/templates/alert-rule.yaml @@ -0,0 +1,48 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: gdex-pg-replication-alerts + namespace: rda + labels: + team: gdex-app-team + release: kube-prometheus-stack +spec: + groups: + - name: pg.replication + interval: 60s + rules: + - alert: PGReplicationLagHigh + expr: | + cnpg_pg_replication_lag{namespace="rda"} > 100 + for: 15m + labels: + severity: warning + team: gdex-app-team + namespace: rda + annotations: + summary: "PostgresDB replication lag high on {{`{{ $labels.pod }}`}}" + description: "Replication lag is {{`{{ $value }}`}} WAL segments behind on {{`{{ $labels.pod }}`}} in cluster {{`{{ $labels.cluster }}`}}" + + - alert: PGReplicationBroken + expr: | + cnpg_pg_replication_streaming_replicas{namespace="rda", cluster="pgdb03"} == 0 + for: 5m + labels: + severity: critical + team: gdex-app-team + namespace: rda + annotations: + summary: "PostgresDB cluster replication broken for {{`{{ $labels.cluster }}`}}" + description: "Cluster {{`{{ $labels.cluster }}`}} has no streaming replicas. Replication may be broken." + + - alert: PGClusterNotHealthy + expr: | + cnpg_collector_up{namespace="rda"} == 0 + for: 5m + labels: + severity: critical + team: gdex-app-team + namespace: rda + annotations: + summary: "PostgresDB cluster {{`{{ $labels.cluster }}`}} is not healthy" + description: "The Postgres exporter for cluster {{`{{ $labels.cluster }}`}} is down, indicating cluster health issues" \ No newline at end of file diff --git a/pgdb02-cirrus/templates/backup_compressor.yaml b/pgdb02-cirrus/templates/backup_compressor.yaml new file mode 100644 index 0000000..6f34df4 --- /dev/null +++ b/pgdb02-cirrus/templates/backup_compressor.yaml @@ -0,0 +1,40 @@ +{{- if .Values.db.backups.s3.enabled }} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ .Values.db.name }}-s3-backup-compressor +spec: + schedule: "0 12 * * *" + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: compressor + image: hub.k8s.ucar.edu/khrpcek/backup-compressor:kmh + command: ["/multi_compress.sh"] + imagePullPolicy: Always + env: + - name: BASEDIR + value: "{{ .Values.db.backups.s3.destinationPath }}/{{ .Values.db.name }}/base/" + - name: AWS_ENDPOINT_URL + value: "{{ .Values.db.backups.s3.endpointURL }}" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Values.db.backups.s3.secretName }} + key: access_key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.db.backups.s3.secretName }} + key: secret_key + resources: + requests: + memory: 12Gi + cpu: 30 + limits: + memory: 16Gi + cpu: 32 +{{- end }} diff --git a/pgdb02-cirrus/templates/backups_external_secret.yaml b/pgdb02-cirrus/templates/backups_external_secret.yaml new file mode 100644 index 0000000..74dc9f8 --- /dev/null +++ b/pgdb02-cirrus/templates/backups_external_secret.yaml @@ -0,0 +1,25 @@ +{{- if .Values.db.backups.s3.enabled }} +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: {{ .Values.db.name }}-backup-s3-creds + namespace: {{ .Release.Namespace }} +spec: + data: + - remoteRef: + key: {{ .Values.db.backups.s3.secretPath }} + property: access_key + secretKey: access_key + - remoteRef: + key: {{ .Values.db.backups.s3.secretPath }} + property: secret_key + secretKey: secret_key + refreshInterval: 1h + secretStoreRef: + kind: SecretStore + name: rda-ro + target: + creationPolicy: Owner + deletionPolicy: Retain + name: {{ .Values.db.backups.s3.secretName }} +{{- end }} \ No newline at end of file diff --git a/pgdb02-cirrus/templates/objectstore.yaml b/pgdb02-cirrus/templates/objectstore.yaml new file mode 100644 index 0000000..0ef5d9c --- /dev/null +++ b/pgdb02-cirrus/templates/objectstore.yaml @@ -0,0 +1,20 @@ +{{- if .Values.db.backups.s3.enabled }} +apiVersion: barmancloud.cnpg.io/v1 +kind: ObjectStore +metadata: + name: boreas +spec: + retentionPolicy: "14d" + configuration: + destinationPath: {{ .Values.db.backups.s3.destinationPath }} + endpointURL: {{ .Values.db.backups.s3.endpointURL }} + s3Credentials: + accessKeyId: + name: {{ .Values.db.backups.s3.secretName }} + key: access_key + secretAccessKey: + name: {{ .Values.db.backups.s3.secretName }} + key: secret_key + wal: + compression: bzip2 +{{- end }} \ No newline at end of file diff --git a/pgdb02-cirrus/templates/postgres_cluster.yaml b/pgdb02-cirrus/templates/postgres_cluster.yaml index 09a38cc..8cac490 100644 --- a/pgdb02-cirrus/templates/postgres_cluster.yaml +++ b/pgdb02-cirrus/templates/postgres_cluster.yaml @@ -7,20 +7,58 @@ metadata: app: {{ .Values.db.name }} group: {{ .Values.db.group }} spec: + imageName: ghcr.io/cloudnative-pg/postgresql:17.4 instances: {{ .Values.db.instances }} storage: size: {{ .Values.db.size }} + resources: + limits: + cpu: {{ .Values.db.resource.limits.cpu | quote }} + memory: {{ .Values.db.resource.limits.memory }} + + backup: + {{- if .Values.db.backups.volumeSnapshot.enabled }} + volumeSnapshot: + className: {{ .Values.db.backups.volumeSnapshot.snapshotClassName }} + {{- end }} + + # Keep 8 weekly backups + retentionPolicy: "8w" + + {{- if .Values.db.backups.s3.enabled }} + plugins: + - enabled: true + isWALArchiver: false + name: barman-cloud.cloudnative-pg.io + parameters: + barmanObjectName: boreas + {{- end }} + + bootstrap: + pg_basebackup: + source: pgdb01-external + + monitoring: + enablePodMonitor: true + replica: + enabled: true + source: pgdb01-external + + externalClusters: + - name: pgdb01-external + connectionParameters: + host: pgdb01.k8s.ucar.edu + user: "repl" + sslmode: prefer + password: + name: {{ .Values.db.name }}-superuser + key: replication-password # Add TLS certificates for encrypted communication certificates: serverTLSSecret: {{ .Values.db.name }}-server-cert serverCASecret: {{ .Values.db.name }}-server-cert - # Allow outside hosts to connect to the database - postgresql: - pg_hba: - - "host all all 0.0.0.0/0 md5" - # Enable superuser access enableSuperuserAccess: true @@ -32,14 +70,10 @@ spec: postgresql: parameters: # Connection settings - listen_addresses: "*" - port: "5432" max_connections: "500" # SSL Configuration - ssl: "on" ssl_ciphers: "HIGH:!aNULL" - ssl_prefer_server_ciphers: "on" ssl_min_protocol_version: "TLSv1.3" # Memory settings @@ -64,19 +98,12 @@ spec: max_replication_slots: "3" wal_keep_size: "256" max_slot_wal_keep_size: "-1" - hot_standby: "on" max_standby_archive_delay: "-1" max_standby_streaming_delay: "-1" # Logging settings - log_destination: "stderr" logging_collector: "on" - log_directory: "log" - log_filename: "postgresql-%Y-%m-%d_%H%M%S.log" - log_file_mode: "0644" log_rotation_age: "0" - log_rotation_size: "1GB" - log_truncate_on_rotation: "off" log_min_duration_statement: "120000" log_line_prefix: "%t %a [%p] " log_timezone: "America/Denver" @@ -92,7 +119,7 @@ spec: # Lock management max_locks_per_transaction: "1024" - + pg_hba: # Local connections with md5 authentication - local all root md5 @@ -112,4 +139,4 @@ spec: - host replication all 127.0.0.1/32 md5 # Remote replication - - host replication all 128.117.0.0/16 trust \ No newline at end of file + - host replication all 128.117.0.0/16 trust diff --git a/pgdb02-cirrus/templates/s3-backup.yaml b/pgdb02-cirrus/templates/s3-backup.yaml new file mode 100644 index 0000000..62bb677 --- /dev/null +++ b/pgdb02-cirrus/templates/s3-backup.yaml @@ -0,0 +1,22 @@ +{{- if .Values.db.backups.s3.enabled }} +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: {{ .Values.db.name }}-s3-backup + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Values.db.name }} +spec: + # Schedule: Roughly midnight local time + schedule: "0 0 5 */3 * *" + + backupOwnerReference: self + + cluster: + name: {{ .Values.db.name }} + + target: primary + method: plugin + pluginConfiguration: + name: barman-cloud.cloudnative-pg.io +{{- end }} \ No newline at end of file diff --git a/pgdb02-cirrus/templates/su_external_secret.yaml b/pgdb02-cirrus/templates/su_external_secret.yaml index 8ceb3bf..f66ebc9 100644 --- a/pgdb02-cirrus/templates/su_external_secret.yaml +++ b/pgdb02-cirrus/templates/su_external_secret.yaml @@ -1,4 +1,4 @@ -apiVersion: external-secrets.io/v1beta1 +apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: {{ .Values.db.name }}-superuser-esos @@ -6,11 +6,10 @@ metadata: spec: refreshInterval: 1h secretStoreRef: - name: user-ro + name: rda-ro kind: SecretStore target: name: {{ .Values.db.name }}-superuser - type: kubernetes.io/basic-auth data: - secretKey: username remoteRef: @@ -20,4 +19,12 @@ spec: remoteRef: key: {{ .Values.db.superUser.secretPath }} property: {{ .Values.db.superUser.passwordKey }} + - secretKey: replication-username + remoteRef: + key: {{ .Values.db.superUser.secretPath }} + property: {{ .Values.db.superUser.replicationUserKey }} + - secretKey: replication-password + remoteRef: + key: {{ .Values.db.superUser.secretPath }} + property: {{ .Values.db.superUser.replicationPassKey }} \ No newline at end of file diff --git a/pgdb02-cirrus/templates/weekly-backup.yaml b/pgdb02-cirrus/templates/weekly-backup.yaml new file mode 100644 index 0000000..8825106 --- /dev/null +++ b/pgdb02-cirrus/templates/weekly-backup.yaml @@ -0,0 +1,20 @@ +{{- if .Values.db.backups.volumeSnapshot.enabled }} +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: {{ .Values.db.name }}-weekly-backup + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Values.db.name }} +spec: + # Schedule: Every Friday at 11:00 PM + schedule: "0 0 23 * * 5" + + backupOwnerReference: self + + cluster: + name: {{ .Values.db.name }} + + target: primary + method: volumeSnapshot +{{- end }} \ No newline at end of file diff --git a/pgdb02-cirrus/values.yaml b/pgdb02-cirrus/values.yaml index 12a3629..9fec517 100644 --- a/pgdb02-cirrus/values.yaml +++ b/pgdb02-cirrus/values.yaml @@ -1,9 +1,25 @@ db: name: pgdb02 group: pgdb02 - instances: 3 - size: 5000Gi + instances: 1 + size: 5001Gi superUser: usernameKey: username passwordKey: password - secretPath: gdex/pgdb02 \ No newline at end of file + replicationUserKey: repuser + replicationPassKey: reppass + secretPath: gdex/pgdb02 + resource: + limits: + cpu: '16' + memory: 128Gi + backups: + volumeSnapshot: + enabled: true + snapshotClassName: csi-rbdplugin-snapclass + s3: + enabled: false + secretName: pgdb02-backup-s3-creds + endpointURL: https://boreas.hpc.ucar.edu:6443 + destinationPath: s3://gdex + secretPath: gdex/boreas diff --git a/pgdb03-cirrus/templates/backups_external_secret.yaml b/pgdb03-cirrus/templates/backups_external_secret.yaml new file mode 100644 index 0000000..74dc9f8 --- /dev/null +++ b/pgdb03-cirrus/templates/backups_external_secret.yaml @@ -0,0 +1,25 @@ +{{- if .Values.db.backups.s3.enabled }} +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: {{ .Values.db.name }}-backup-s3-creds + namespace: {{ .Release.Namespace }} +spec: + data: + - remoteRef: + key: {{ .Values.db.backups.s3.secretPath }} + property: access_key + secretKey: access_key + - remoteRef: + key: {{ .Values.db.backups.s3.secretPath }} + property: secret_key + secretKey: secret_key + refreshInterval: 1h + secretStoreRef: + kind: SecretStore + name: rda-ro + target: + creationPolicy: Owner + deletionPolicy: Retain + name: {{ .Values.db.backups.s3.secretName }} +{{- end }} \ No newline at end of file diff --git a/pgdb03-cirrus/templates/postgres_cluster.yaml b/pgdb03-cirrus/templates/postgres_cluster.yaml index 09a38cc..56de9d8 100644 --- a/pgdb03-cirrus/templates/postgres_cluster.yaml +++ b/pgdb03-cirrus/templates/postgres_cluster.yaml @@ -10,17 +10,43 @@ spec: instances: {{ .Values.db.instances }} storage: size: {{ .Values.db.size }} + resources: + limits: + cpu: {{ .Values.db.resource.limits.cpu | quote }} + memory: {{ .Values.db.resource.limits.memory }} + + {{- if .Values.db.backups }} + backup: + # Keep 8 weekly backups + retentionPolicy: "8w" + target: "prefer-standby" + {{- if .Values.db.backups.volumeSnapshot }} + volumeSnapshot: + className: {{ .Values.db.backups.volumeSnapshot.snapshotClassName }} + {{- end }} + {{- if .Values.db.backups.s3.enabled }} + barmanObjectStore: + wal: + compression: gzip + data: + compression: gzip + destinationPath: {{ .Values.db.backups.s3.destinationPath }} + endpointURL: {{ .Values.db.backups.s3.endpointURL }} + s3Credentials: + accessKeyId: + name: {{ .Values.db.backups.s3.secretName }} + key: access_key + secretAccessKey: + name: {{ .Values.db.backups.s3.secretName }} + key: secret_key + {{- end }} + {{- end }} # Add TLS certificates for encrypted communication certificates: serverTLSSecret: {{ .Values.db.name }}-server-cert serverCASecret: {{ .Values.db.name }}-server-cert - # Allow outside hosts to connect to the database - postgresql: - pg_hba: - - "host all all 0.0.0.0/0 md5" - # Enable superuser access enableSuperuserAccess: true @@ -32,21 +58,17 @@ spec: postgresql: parameters: # Connection settings - listen_addresses: "*" - port: "5432" max_connections: "500" # SSL Configuration - ssl: "on" ssl_ciphers: "HIGH:!aNULL" - ssl_prefer_server_ciphers: "on" ssl_min_protocol_version: "TLSv1.3" # Memory settings shared_buffers: "32GB" temp_buffers: "64MB" - work_mem: "32MB" - maintenance_work_mem: "128MB" + work_mem: "64MB" + maintenance_work_mem: "512MB" dynamic_shared_memory_type: "posix" # Resource limits @@ -56,27 +78,20 @@ spec: wal_level: "replica" checkpoint_timeout: "15min" checkpoint_completion_target: "0.9" - max_wal_size: "20GB" - min_wal_size: "1GB" + max_wal_size: "16GB" + min_wal_size: "512MB" # Replication settings - max_wal_senders: "3" - max_replication_slots: "3" - wal_keep_size: "256" + max_wal_senders: "6" + max_replication_slots: "6" + wal_keep_size: "4GB" max_slot_wal_keep_size: "-1" - hot_standby: "on" max_standby_archive_delay: "-1" max_standby_streaming_delay: "-1" # Logging settings - log_destination: "stderr" logging_collector: "on" - log_directory: "log" - log_filename: "postgresql-%Y-%m-%d_%H%M%S.log" - log_file_mode: "0644" log_rotation_age: "0" - log_rotation_size: "1GB" - log_truncate_on_rotation: "off" log_min_duration_statement: "120000" log_line_prefix: "%t %a [%p] " log_timezone: "America/Denver" diff --git a/pgdb03-cirrus/templates/s3_backup.yaml b/pgdb03-cirrus/templates/s3_backup.yaml new file mode 100644 index 0000000..02f7b25 --- /dev/null +++ b/pgdb03-cirrus/templates/s3_backup.yaml @@ -0,0 +1,11 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: ScheduledBackup +metadata: + name: {{ .Values.db.name }}-backup + namespace: {{ .Release.Namespace }} +spec: + schedule: "0 0 2 * * *" # daily at 2am + backupOwnerReference: self + cluster: + name: {{ .Values.db.name }} + method: barmanObjectStore \ No newline at end of file diff --git a/pgdb03-cirrus/templates/su_external_secret.yaml b/pgdb03-cirrus/templates/su_external_secret.yaml index 8ceb3bf..3567a2e 100644 --- a/pgdb03-cirrus/templates/su_external_secret.yaml +++ b/pgdb03-cirrus/templates/su_external_secret.yaml @@ -1,4 +1,4 @@ -apiVersion: external-secrets.io/v1beta1 +apiVersion: external-secrets.io/v1 kind: ExternalSecret metadata: name: {{ .Values.db.name }}-superuser-esos @@ -6,11 +6,10 @@ metadata: spec: refreshInterval: 1h secretStoreRef: - name: user-ro + name: rda-ro kind: SecretStore target: name: {{ .Values.db.name }}-superuser - type: kubernetes.io/basic-auth data: - secretKey: username remoteRef: diff --git a/pgdb03-cirrus/values.yaml b/pgdb03-cirrus/values.yaml index 2c17abc..c6c3b13 100644 --- a/pgdb03-cirrus/values.yaml +++ b/pgdb03-cirrus/values.yaml @@ -1,9 +1,23 @@ db: name: pgdb03 group: pgdb03 - instances: 3 - size: 7000Gi + instances: 2 + size: 9000Gi superUser: usernameKey: username passwordKey: password - secretPath: gdex/pgdb03 \ No newline at end of file + secretPath: gdex/pgdb03 + resource: + limits: + cpu: '16' + memory: 128Gi + backups: + enabled: false + volumeSnapshot: + enabled: false + s3: + enabled: true + secretName: pgdb03-backup-s3-creds + endpointURL: https://boreas.hpc.ucar.edu:6443 + destinationPath: s3://gdex + secretPath: gdex/boreas \ No newline at end of file diff --git a/pgdb04-cirrus/.helmignore b/pgdb04-cirrus/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/pgdb04-cirrus/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/pgdb04-cirrus/Chart.yaml b/pgdb04-cirrus/Chart.yaml new file mode 100644 index 0000000..39aaa9a --- /dev/null +++ b/pgdb04-cirrus/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: CloudNative-PostgreSQL Template +description: A CloudnativePG Helm chart template for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.1.0" diff --git a/pgdb04-cirrus/README.md b/pgdb04-cirrus/README.md new file mode 100644 index 0000000..43d678e --- /dev/null +++ b/pgdb04-cirrus/README.md @@ -0,0 +1,28 @@ +# postgres-helm +A chart for deploying a PostgreSQL database cluster to the CISL cloud with Helm. This requires a superuser and regular user username and password to access the PostgreSQL database to be stored in bao.k8s.ucar.edu so it can be injected in to the required containers appropriately. + +```{note} +Information required to create a Helm chart for your web application: +1. A Name for the database. This will be used as a hostname to connect to via .k8s.ucar.edu +2. The number of PostgreSQL servers to run in the database cluster +3. The size of the volume to mount to the database and a unique name for the volume. +4. Secret information to access the database. This should be stored in bao.k8s.ucar.edu. An example of what a path would look like is, /database01. Under that path use the keys postgresuser and postgrespass to store the username and password for the DB securely. +``` + +## Update values.yaml file +In the `postgres-helm/` directory is a file named `values.yaml` which contains all the specific details for your application. You need to update the following values to be unique for your deployment: + + - `#DATABASE_NAME` : The name of the database. + - `#DATABASE_APP_GROUP` : The group of applications to run the database with. If it's a standalone DB this can just be the DB name. + - `#DATABASE_CLUSTER_MEMBERS` : The number of PostgreSQL database servers running for the cluster + - `#DATABASE_SIZE` : How large to make the database in Gi. + - `#SU_USERNAME_SECRET_KEY` : The superuser username key as designated in bao.k8s.ucar.edu. + - `#SU_PASSWORD_SECRET_KEY` : The superuser password key as designated in bao.k8s.ucar.edu. + - `#SU_SECRET_PATH` : The superuser secret path designated in bao.k8s.ucar.edu. + - `#APP_USERNAME_SECRET_KEY` : The database username key to query in bao.k8s.ucar.edu in order to get the username value. + - `#APP_PASSWORD_SECRET_KEY` : The database password key to query in bao.k8s.ucar.edu in order to get the password value. + - `#APP_USER_SECRET_PATH` : The path in bao.k8s.ucar.edu where the DB secrets are stored. + + +## Update Chart.yaml +The Chart.yaml file is mostly used to describe your application and keep track of what versions you are on and running. \ No newline at end of file diff --git a/pgdb04-cirrus/templates/cert.yaml b/pgdb04-cirrus/templates/cert.yaml new file mode 100644 index 0000000..ec7310f --- /dev/null +++ b/pgdb04-cirrus/templates/cert.yaml @@ -0,0 +1,30 @@ +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ .Values.db.name }}-selfsigned-issuer +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.db.name }}-server-cert +spec: + secretName: {{ .Values.db.name }}-server-cert + usages: + - server auth + dnsNames: + - "{{ .Values.db.name }}.k8s.ucar.edu" + - {{ .Values.db.name }}-rw + - {{ .Values.db.name }}-rw.{{ .Release.Namespace }} + - {{ .Values.db.name }}-rw.{{ .Release.Namespace }}.svc + - {{ .Values.db.name }}-r + - {{ .Values.db.name }}-r.{{ .Release.Namespace }} + - {{ .Values.db.name }}-r.{{ .Release.Namespace }}.svc + - {{ .Values.db.name }}-ro + - {{ .Values.db.name }}-ro.{{ .Release.Namespace }} + - {{ .Values.db.name }}-ro.{{ .Release.Namespace }}.svc + issuerRef: + name: {{ .Values.db.name }}-selfsigned-issuer + kind: Issuer + group: cert-manager.io \ No newline at end of file diff --git a/pgdb04-cirrus/templates/pg_service.yaml b/pgdb04-cirrus/templates/pg_service.yaml new file mode 100644 index 0000000..68ae506 --- /dev/null +++ b/pgdb04-cirrus/templates/pg_service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.db.name }}-service + namespace: {{ .Release.Namespace }} + labels: + cirrus/lb: internal + annotations: + external-dns.alpha.kubernetes.io/hostname: "{{ .Values.db.name }}.k8s.ucar.edu" + external-dns.alpha.kubernetes.io/ttl: "300" +spec: + type: LoadBalancer + selector: + cnpg.io/cluster: {{ .Values.db.name }} + cnpg.io/instanceRole: primary + ports: + - port: 5432 + targetPort: 5432 + protocol: TCP \ No newline at end of file diff --git a/pgdb04-cirrus/templates/postgres_cluster.yaml b/pgdb04-cirrus/templates/postgres_cluster.yaml new file mode 100644 index 0000000..d3aee41 --- /dev/null +++ b/pgdb04-cirrus/templates/postgres_cluster.yaml @@ -0,0 +1,123 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: {{ .Values.db.name }} + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Values.db.name }} + group: {{ .Values.db.group }} +spec: + instances: {{ .Values.db.instances }} + storage: + size: {{ .Values.db.size }} + resources: + limits: + cpu: {{ .Values.db.resource.limits.cpu | quote }} + memory: {{ .Values.db.resource.limits.memory }} + + monitoring: + enablePodMonitor: true + + bootstrap: + pg_basebackup: + source: pgdb03-external + replica: + enabled: true + source: pgdb03-external + + externalClusters: + - name: pgdb03-external + connectionParameters: + host: pgdb03.k8s.ucar.edu + user: "rep1" + sslmode: prefer + password: + name: {{ .Values.db.name }}-superuser + key: replication-password + + # Add TLS certificates for encrypted communication + certificates: + serverTLSSecret: {{ .Values.db.name }}-server-cert + serverCASecret: {{ .Values.db.name }}-server-cert + + # Enable superuser access + enableSuperuserAccess: true + + # Configure postgres superuser from su_external_secret + superuserSecret: + name: "{{ .Values.db.name }}-superuser" + + # Allow outside hosts to connect to the database + postgresql: + parameters: + # Connection settings + max_connections: "500" + + # SSL Configuration + ssl_ciphers: "HIGH:!aNULL" + ssl_min_protocol_version: "TLSv1.3" + + # Memory settings + shared_buffers: "32GB" + temp_buffers: "64MB" + work_mem: "64MB" + maintenance_work_mem: "512MB" + dynamic_shared_memory_type: "posix" + + # Resource limits + max_files_per_process: "2000" + + # WAL settings + wal_level: "replica" + checkpoint_timeout: "15min" + checkpoint_completion_target: "0.9" + max_wal_size: "16GB" + min_wal_size: "512MB" + + # Replication settings + max_wal_senders: "3" + max_replication_slots: "3" + wal_keep_size: "4GB" + max_slot_wal_keep_size: "-1" + max_standby_archive_delay: "-1" + max_standby_streaming_delay: "-1" + + # Logging settings + logging_collector: "on" + log_rotation_age: "0" + log_min_duration_statement: "120000" + log_line_prefix: "%t %a [%p] " + log_timezone: "America/Denver" + + # Locale and timezone settings + datestyle: "iso, mdy" + timezone: "America/Denver" + lc_messages: "en_US.UTF-8" + lc_monetary: "en_US.UTF-8" + lc_numeric: "en_US.UTF-8" + lc_time: "en_US.UTF-8" + default_text_search_config: "pg_catalog.english" + + # Lock management + max_locks_per_transaction: "1024" + + pg_hba: + # Local connections with md5 authentication + - local all root md5 + - local all all md5 + + # IPv4 local connections with md5 + - host all all 127.0.0.1/32 md5 + + # IPv6 local connections with md5 + - host all all ::1/128 md5 + + # IPv4 remote connections for UCAR network + - host all all 128.117.0.0/16 md5 + + # Replication connections + - local replication all md5 + - host replication all 127.0.0.1/32 md5 + + # Remote replication + - host replication all 128.117.0.0/16 trust \ No newline at end of file diff --git a/pgdb04-cirrus/templates/su_external_secret.yaml b/pgdb04-cirrus/templates/su_external_secret.yaml new file mode 100644 index 0000000..f66ebc9 --- /dev/null +++ b/pgdb04-cirrus/templates/su_external_secret.yaml @@ -0,0 +1,30 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: {{ .Values.db.name }}-superuser-esos + namespace: {{ .Release.Namespace }} +spec: + refreshInterval: 1h + secretStoreRef: + name: rda-ro + kind: SecretStore + target: + name: {{ .Values.db.name }}-superuser + data: + - secretKey: username + remoteRef: + key: {{ .Values.db.superUser.secretPath }} + property: {{ .Values.db.superUser.usernameKey }} + - secretKey: password + remoteRef: + key: {{ .Values.db.superUser.secretPath }} + property: {{ .Values.db.superUser.passwordKey }} + - secretKey: replication-username + remoteRef: + key: {{ .Values.db.superUser.secretPath }} + property: {{ .Values.db.superUser.replicationUserKey }} + - secretKey: replication-password + remoteRef: + key: {{ .Values.db.superUser.secretPath }} + property: {{ .Values.db.superUser.replicationPassKey }} + \ No newline at end of file diff --git a/pgdb04-cirrus/values.yaml b/pgdb04-cirrus/values.yaml new file mode 100644 index 0000000..9a06f83 --- /dev/null +++ b/pgdb04-cirrus/values.yaml @@ -0,0 +1,15 @@ +db: + name: pgdb04 + group: pgdb04 + instances: 2 + size: 9000Gi + superUser: + usernameKey: username + passwordKey: password + replicationUserKey: repuser + replicationPassKey: reppass + secretPath: gdex/pgdb04 + resource: + limits: + cpu: '16' + memory: 128Gi \ No newline at end of file