Public Information

2025-01-24 16:18:47 +01:00
commit 0bd2038c86
449 changed files with 108655 additions and 0 deletions
--- a/samples/ha/README.md
+++ b/samples/ha/README.md
@@ -0,0 +1,78 @@
+# High Availability
+
+To gain a higher level of availability for your Instance, you can 
+
+- create more Kubernetes Cluster Nodes
+- create more replicas of the *nscale* and *nplus* components
+- distribute those replicas across multiple nodes using anti-affinities
+
+This is how:
+
+```
+helm install \
+  --values samples/ha/values.yaml
+  --values samples/environment/demo.yaml \
+  sample-ha nplus/nplus-instance
+```
+
+The essents of the values file is this:
+
+- We use three (3) *nscale Server Application Layer*, two dedicated to user access, one dedicated to jobs
+- if the jobs node fails, the user nodes take the jobs (handled by priority)
+- if one of the user nodes fail, the other one handles the load
+- Kubernetes takes care of restarting nodes should that happen
+- All components run with two replicas
+- Pod anti-affinities handle the distribution
+- any administration component only connects to the jobs nappl, leaving the user nodes to the users
+- PodDisruptionBudgets are defined for the crutial components. These are set via `minReplicaCount` for the components that can support multiple replicas, and `minReplicaCountType` for the **first** replicaSet of the components that do not support replicas, in this case nstla.
+
+```
+web:
+  replicaCount: 2
+  minReplicaCount: 1
+rs:
+  replicaCount: 2
+  minReplicaCount: 1
+ilm:
+  replicaCount: 2
+  minReplicaCount: 1
+cmis:
+  replicaCount: 2
+  minReplicaCount: 1
+webdav:
+  replicaCount: 2
+  minReplicaCount: 1
+nstla:
+  minReplicaCountType: 1
+administrator:
+  nappl:
+    host: "{{ .component.prefix }}nappljobs.{{ .Release.Namespace }}"
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 600"
+pam:
+  nappl:
+    host: "{{ .component.prefix }}nappljobs.{{ .Release.Namespace }}"
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 600"
+nappl:
+  replicaCount: 2
+  minReplicaCount: 1
+  jobs: false
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 600"
+nappljobs:
+  replicaCount: 1
+  jobs: true
+  disableSessionReplication: true
+  ingress:
+    enabled: false
+  snc:
+    enabled: true
+  waitFor:
+    - "-service {{ .component.prefix }}database.{{ .Release.Namespace }}.svc.cluster.local:5432 -timeout 600"
+application:
+  nstl: 
+    host: "{{ .component.prefix }}nstl-cluster.{{ .Release.Namespace }}"
+  nappl:
+    host: "{{ .component.prefix }}nappljobs.{{ .Release.Namespace }}"
+```
--- a/samples/ha/build.sh
+++ b/samples/ha/build.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+#
+# This sample script builds the example as described. It is also used to build the test environment in our lab,
+# so it should be well tested.
+#
+
+# Make sure it fails immediately, if anything goes wrong
+set -e
+
+# -- ENVironment variables:
+# CHARTS: The path to the source code
+# DEST: The path to the build destination
+# SAMPLE: The directory of the sample
+# NAME: The name of the sample, used as the .Release.Name
+# KUBE_CONTEXT: The name of the kube context, used to build this sample depending on where you run it against. You might have different Environments such as lab, dev, qa, prod, demo, local, ...
+
+# Check, if we have the source code available
+if [ ! -d "$CHARTS" ]; then
+    echo "ERROR Building $SAMPLE example: The Charts Sources folder is not set. Please make sure to run this script with the full Source Code available"
+    exit 1
+fi
+if [ ! -d "$DEST" ]; then
+    echo "ERROR Building $SAMPLE example: DEST folder not found."
+    exit 1
+fi
+if [ ! -d "$CHARTS/instance" ]; then
+    echo "ERROR Building $SAMPLE example: Chart Sources in $CHARTS/instance not found. Are you running this script as a subscriber?"
+    exit 1
+fi
+
+# Set the Variables
+SAMPLE="ha"
+NAME="sample-$SAMPLE"
+
+# Output what is happening
+echo "Building $NAME"
+
+# Create the manifest
+mkdir -p $DEST/instance
+helm template --debug \
+     --values $SAMPLES/ha/values.yaml \
+     --values $SAMPLES/hid/values.yaml \
+     --values $SAMPLES/application/empty.yaml \
+     --values $SAMPLES/environment/$KUBE_CONTEXT.yaml \
+     --values $SAMPLES/resources/$KUBE_CONTEXT.yaml \
+     $NAME $CHARTS/instance > $DEST/instance/$SAMPLE.yaml
+
+# creating the Argo manifest
+mkdir -p $DEST/instance-argo
+helm template --debug \
+     --values $SAMPLES/ha/values.yaml \
+     --values $SAMPLES/hid/values.yaml \
+     --values $SAMPLES/application/empty.yaml \
+     --values $SAMPLES/environment/$KUBE_CONTEXT.yaml \
+     --values $SAMPLES/resources/$KUBE_CONTEXT.yaml \
+     $NAME-argo $CHARTS/instance-argo > $DEST/instance-argo/$SAMPLE-argo.yaml
+
+
--- a/samples/ha/values.yaml
+++ b/samples/ha/values.yaml
@@ -0,0 +1,126 @@
+components:
+  nappl: true
+  nappljobs: true
+  web: true
+  mon: true
+  rs: true
+  ilm: true
+  erpproxy: true
+  erpcmis: true
+  cmis: true
+  database: true
+  nstl: false
+  nstla: true
+  nstlb: true
+  pipeliner: false
+  application: true
+  administrator: true
+  webdav: true
+  rms: false
+  pam: true
+web:
+  replicaCount: 2
+  minReplicaCount: 1
+
+rs:
+  replicaCount: 2
+  minReplicaCount: 1
+
+ilm:
+  replicaCount: 2
+  minReplicaCount: 1
+
+erpproxy:
+  replicaCount: 2
+  minReplicaCount: 1
+
+erpcmis:
+  replicaCount: 2
+  minReplicaCount: 1
+
+cmis:
+  replicaCount: 2
+  minReplicaCount: 1
+
+webdav:
+  replicaCount: 2
+  minReplicaCount: 1
+
+administrator:
+  nappl:
+    host: "{{ .component.prefix }}nappljobs.{{ .Release.Namespace }}"
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 600"
+
+pam:
+  nappl:
+    host: "{{ .component.prefix }}nappljobs.{{ .Release.Namespace }}"
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 600"
+
+nappl:
+  replicaCount: 2
+  minReplicaCount: 1
+  jobs: false
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 600"
+nappljobs:
+  replicaCount: 1
+  jobs: true
+  disableSessionReplication: true
+  ingress:
+    enabled: false
+  snc:
+    enabled: true
+  waitFor:
+    - "-service {{ .component.prefix }}database.{{ .Release.Namespace }}.svc.cluster.local:5432 -timeout 600"
+
+application:
+  nstl: 
+    host: "{{ .component.prefix }}nstl-cluster.{{ .Release.Namespace }}"
+  nappl:
+    host: "{{ .component.prefix }}nappljobs.{{ .Release.Namespace }}"
+  waitFor:
+    - "-service {{ .component.prefix }}nappljobs.{{ .Release.Namespace }}.svc.cluster.local:{{ .this.nappl.port }} -timeout 1800"
+
+nstla:
+  minReplicaCountType: 1
+  accounting: true
+  logForwarder:
+    - name: Accounting
+      path: "/opt/ceyoniq/nscale-server/storage-layer/accounting/*.csv"
+  serverID: 4711
+  env:
+    NSTL_REMOTESERVER_MAINTAINCONNECTION: 1
+    NSTL_REMOTESERVER_SERVERID: 4712
+    NSTL_REMOTESERVER_ADDRESS: "nstlb"
+    NSTL_REMOTESERVER_NAME: "nstla"
+    NSTL_REMOTESERVER_USERNAME: "admin"
+    NSTL_REMOTESERVER_PASSWORD: "admin"
+    NSTL_REMOTESERVER_MAXCONNECTIONS: 10
+    NSTL_REMOTESERVER_MAXARCCONNECTIONS: 1
+    NSTL_REMOTESERVER_FORWARDDELETEJOBS: 0
+    NSTL_REMOTESERVER_ACCEPTRETRIEVAL: 1
+    NSTL_REMOTESERVER_ACCEPTDOCS: 1
+    NSTL_REMOTESERVER_ACCEPTDOCSWITHTHISSERVERID: 1
+    NSTL_REMOTESERVER_PERMANENTMIGRATION: 1
+nstlb:
+  accounting: true
+  logForwarder:
+    - name: Accounting
+      path: "/opt/ceyoniq/nscale-server/storage-layer/accounting/*.csv"
+  serverID: 4712
+  env:
+    NSTL_REMOTESERVER_MAINTAINCONNECTION: 1
+    NSTL_REMOTESERVER_SERVERID: 4711
+    NSTL_REMOTESERVER_ADDRESS: "nstla"
+    NSTL_REMOTESERVER_NAME: "nstla"
+    NSTL_REMOTESERVER_USERNAME: "admin"
+    NSTL_REMOTESERVER_PASSWORD: "admin"
+    NSTL_REMOTESERVER_MAXCONNECTIONS: 10
+    NSTL_REMOTESERVER_MAXARCCONNECTIONS: 1
+    NSTL_REMOTESERVER_FORWARDDELETEJOBS: 0
+    NSTL_REMOTESERVER_ACCEPTRETRIEVAL: 1
+    NSTL_REMOTESERVER_ACCEPTDOCS: 1
+    NSTL_REMOTESERVER_ACCEPTDOCSWITHTHISSERVERID: 1
+    NSTL_REMOTESERVER_PERMANENTMIGRATION: 1