Skip to main content

nat

Create cluster

gcloud container \
clusters create-auto "stage"\
--project "powr-staging" \
--region "us-east1" \
--release-channel "regular" \
--enable-private-nodes \
--network "projects/powr-staging/global/networks/default" \
--subnetwork "projects/powr-staging/regions/us-east1/subnetworks/default" \
--cluster-ipv4-cidr "/17" \
--services-ipv4-cidr "/22"

alpha cluster

gcloud beta container \
--project "powr-staging" \
clusters create "alpha" \
--region "us-west1" \
--no-enable-basic-auth \
--cluster-version "1.31.1-gke.1846000" \
--release-channel "regular" --machine-type "c2-standard-4" \
--image-type "COS_CONTAINERD" --disk-type "pd-balanced" \
--disk-size "100" \
--metadata disable-legacy-endpoints=true \
--scopes \
"https://www.googleapis.com/auth/devstorage.read_only",\
"https://www.googleapis.com/auth/logging.write",\
"https://www.googleapis.com/auth/monitoring",\
"https://www.googleapis.com/auth/servicecontrol",\
"https://www.googleapis.com/auth/service.management.readonly",\
"https://www.googleapis.com/auth/trace.append"\
--spot --num-nodes "1"
--logging=SYSTEM,WORKLOAD \
--monitoring=SYSTEM,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET
--enable-ip-alias \
--network "projects/powr-staging/global/networks/default" \
--subnetwork "projects/powr-staging/regions/us-west1/subnetworks/default" \
--no-enable-intra-node-visibility \
--default-max-pods-per-node "110" \
--enable-dns-access \
--enable-ip-access \
--security-posture=standard \
--workload-vulnerability-scanning=disabled \
--no-enable-master-authorized-networks \
--no-enable-google-cloud-access \
--addons HorizontalPodAutoscaling,HttpLoadBalancing,NodeLocalDNS,GcePersistentDiskCsiDriver \
--enable-autoupgrade \
--enable-autorepair \
--max-surge-upgrade 1 \
--max-unavailable-upgrade 0 \
--binauthz-evaluation-mode=DISABLED \
--enable-managed-prometheus \
--workload-pool "powr-staging.svc.id.goog" \
--enable-shielded-nodes \
--enable-image-streaming \
--placement-type=COMPACT \
--node-locations "us-west1-a"

Create TLS secret with wildcard certs

apiVersion: v1
data:
.dockerconfigjson: eyJhdXRocyI6eyJnaGNyLmlvIjp7ImF1dGgiOiJjRzkzY205d2N6cG5hSEJmT0ZKaGJHMXFaWGRYUTBOQ01FOUxSWE5uUmtOV1VFTktSREZSZEV4cU1UaHpRVlJRIn19fQ==
kind: Secret
metadata:
annotations:
meta.helm.sh/release-name: powr
meta.helm.sh/release-namespace: staging
creationTimestamp: "2022-05-13T11:27:45Z"
labels:
app.kubernetes.io/instance: powr
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: powr
app.kubernetes.io/version: 1.0.0
helm.sh/chart: powr-0.1.0
name: powr-ghcr
namespace: staging
resourceVersion: "130781373"
uid: 26315e97-01a1-497b-8469-04032d50e1fe
type: kubernetes.io/dockerconfigjson
kubectl create secret \
tls powr-staging-wildcard-tls \
--namespace ingress \
--key ~/Downloads/k.powr-staging.priv \
--cert ~/Downloads/k.powr-staging.pem

Create dockerconfig secret

kubectl create secret  secret-tiger-docker \
--docker-email=[email protected] \
--docker-username=tiger \
--docker-password=pass1234 \
--docker-server=my-registry.example:5000

Install ingress

Create EIP

gcloud compute addresses create powr-alpha-eip --project=powr-staging --region=us-east1
helm repo add nginx-stable https://helm.nginx.com/stable
helm repo update
# --set controller.service.loadBalancerIP="34.148.19.42" \
helm install ingress \
nginx-stable/nginx-ingress \
--set controller.service.loadBalancerIP="34.75.110.121" \
--set controller.enableSnippets=true \
--namespace ingress \
--create-namespace \
--set controller.wildcardTLS.secret=ingress/powr-alpha-wildcard-tls --dry-run

# to uninstall run following
helm uninstall ingress --namespace ingress

If you add secret later

    --set controller.service.loadBalancerIP="34.139.48.130" #staging2\
--set controller.service.loadBalancerIP="34.148.19.42" #staging\
--set controller.service.loadBalancerIP="34.75.110.121" #alpha\
--set controller.config.entries.client-max-body-size='"200m"' \
helm upgrade ingress \
nginx-stable/nginx-ingress \
--namespace ingress \
--set controller.enableSnippets=true \
--set controller.service.loadBalancerIP="34.139.48.130" \
--set controller.wildcardTLS.secret=ingress/powr-staging-wildcard-tls \
--set controller.autoscaling.enabled=true \
--set controller.replicaCount=2 \
--set controller.config.entries.worker-connections='"2048"' \
--set controller.config.entries.client-max-body-size='"200m"' \
--set controller.enableLatencyMetrics=true \
--set prometheus.create=true --dry-run

helm upgrade ingress \
nginx-stable/nginx-ingress \
--namespace ingress \
--set controller.enableSnippets=true \
--set controller.service.loadBalancerIP="34.75.110.121" \
--set controller.wildcardTLS.secret=ingress/powr-alpha-wildcard-tls \
--set controller.autoscaling.enabled=true \
--set controller.replicaCount=2 \
--set controller.config.entries.worker-connections='"2048"' \
--set controller.config.entries.client-max-body-size='"200m"' \
--set controller.config.entries.proxy-buffer-size='"256k"' \
--set controller.config.entries.proxy-buffers='"4 512k"' \
--set controller.config.entries.proxy-busy-buffers-size='"512k"' \
--set controller.enableLatencyMetrics=true \
--set prometheus.create=true --dry-run

Nginx safe upgrade

helm upgrade nginx-ingress oci://ghcr.io/nginxinc/charts/nginx-ingress \
--version 1.2.0 \
--namespace ingress \
--set controller.kind=deployment/daemonset \
--set controller.nginxplus=false \
--set controller.image.pullPolicy=Always \
--set serviceNameOverride="nginx-ingress-nginx-ingress" \
--set controller.name="" \
--set fullnameOverride="nginx-ingress-nginx-ingress" -f .helm/values.staging.yaml --dry-run

AWS Ingress setup

helm install ingress nginx-stable/nginx-ingress \
--namespace ingress \
--set controller.service.type=LoadBalancer \
--set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-type"="nlb" \
--set controller.enableSnippets=true \
--set controller.wildcardTLS.secret=ingress/powr-staging-wildcard-tls --dry-run

helm manual deploy

# without sha-service
export environment=staging GH_SERVICE_TOKEN=$GH_SERVICE_TOKEN GH_SERVICE_USER=$GH_SERVICE_USER \
SHA_SERVICE_SHA=sha-7fa6afa
WEB_SHA=latest
SIDEKIQ_SHA=latest
COUNTER_SHA=latest
OUTLET_SHA=latest; helm upgrade powr .helm \
--namespace ${environment} \
--cleanup-on-fail \
--create-namespace \
--install \
--timeout 420s \
--values .helm/values.${environment}.yaml \
--set ghcr.password=${GH_SERVICE_TOKEN} \
--set ghcr.username=${GH_SERVICE_USER} \
--set sha-service.app.image=ghcr.io/powrful/main/sha-service:${SHA_SERVICE_SHA} \
--set outlet.app.image=ghcr.io/powrful/${environment}/outlet:${OUTLET_SHA} \
--set counter.app.image=ghcr.io/powrful/${environment}/counter:${COUNTER_SHA} \
--set sidekiq.app.image=ghcr.io/powrful/${environment}/sidekiq:${SIDEKIQ_SHA} \
--set web.app.image=ghcr.io/powrful/${environment}/web:${WEB_SHA} --dry-run

# with sha-service
export environment=staging GH_SERVICE_TOKEN=$GH_SERVICE_TOKEN GH_SERVICE_USER=$GH_SERVICE_USER \
SHA_SERVICE_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/main/sha-service) \
WEB_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/web) \
SIDEKIQ_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/sidekiq) \
COUNTER_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/counter) \
OUTLET_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/outlet); helm upgrade powr .helm \
--namespace ${environment} \
--cleanup-on-fail \
--create-namespace \
--install \
--timeout 420s \
--values .helm/values.${environment}.yaml \
--set ghcr.password=${GH_SERVICE_TOKEN} \
--set ghcr.username=${GH_SERVICE_USER} \
--set sha-service.app.image=ghcr.io/powrful/main/sha-service:${SHA_SERVICE_SHA} \
--set outlet.app.image=ghcr.io/powrful/${environment}/outlet:${OUTLET_SHA} \
--set counter.app.image=ghcr.io/powrful/${environment}/counter:${COUNTER_SHA} \
--set sidekiq.app.image=ghcr.io/powrful/${environment}/sidekiq:${SIDEKIQ_SHA} \
--set web.app.image=ghcr.io/powrful/${environment}/web:${WEB_SHA} --dry-run

# elegant deploy command
export ENVI=staging && \
SHA_BASE="https://sha-service.powr-staging.io/api/v1/powrful" && \
helm upgrade powr .helm \
--namespace $ENVI \
--cleanup-on-fail \
--create-namespace \
--install \
--timeout 420s \
--values .helm/values${ENVI:+.$ENVI}.yaml \
--set ghcr.password=$GH_SERVICE_TOKEN \
--set ghcr.username=$GH_SERVICE_USER \
--set pgbouncer.env.pgbDatabaseUrl=$DB_URL \
--set cms.app.image=ghcr.io/powrful/staging/cms:$(curl -s $SHA_BASE/staging/cms) \
--set shaservice.app.image=ghcr.io/powrful/main/sha-service:$(curl -s $SHA_BASE/main/sha-service) \
--set outlet.app.image=ghcr.io/powrful/$ENVI/outlet:$(curl -s $SHA_BASE/$ENVI/outlet) \
--set counter.app.image=ghcr.io/powrful/$ENVI/counter:$(curl -s $SHA_BASE/$ENVI/counter) \
--set sidekiq.app.image=ghcr.io/powrful/$ENVI/web:$(curl -s $SHA_BASE/$ENVI/web) \
--set web.app.image=ghcr.io/powrful/$ENVI/web:$(curl -s $SHA_BASE/$ENVI/web) \
--set canaryWeb.app.image=ghcr.io/powrful/$ENVI/web:$(curl -s "$SHA_BASE/$ENVI/web?get=1") \
--set hipaapi.app.image=ghcr.io/powrful/$ENVI/hipaapi:$(curl -s $SHA_BASE/$ENVI/hipaapi) --dry-run

export environment=staging GH_SERVICE_TOKEN=$GH_SERVICE_TOKEN GH_SERVICE_USER=$GH_SERVICE_USER \
WEB_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/web) \
SIDEKIQ_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/sidekiq) \
COUNTER_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/counter) \
OUTLET_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/outlet); helm upgrade powr .helm \
--namespace ${environment} \
--cleanup-on-fail \
--create-namespace \
--install \
--timeout 420s \
--values .helm/values.${environment}.yaml \
--set ghcr.password=${GH_SERVICE_TOKEN} \
--set ghcr.username=${GH_SERVICE_USER} \
--set outlet.app.image=ghcr.io/powrful/${environment}/outlet:${OUTLET_SHA} \
--set counter.app.image=ghcr.io/powrful/${environment}/counter:${COUNTER_SHA} \
--set sidekiq.app.image=ghcr.io/powrful/${environment}/sidekiq:${SIDEKIQ_SHA} \
--set web.app.image=ghcr.io/powrful/${environment}/web:${WEB_SHA} --dry-run

export environment=alpha GH_SERVICE_TOKEN=$GH_SERVICE_TOKEN GH_SERVICE_USER=$GH_SERVICE_USER \
WEB_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/web) \
SIDEKIQ_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/sidekiq) \
COUNTER_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/counter) \
OUTLET_SHA=$(curl sha.api.powr-staging.io/api/v1/powrful/${environment}/outlet); \
helm install powr .helm \
--namespace ${environment} \
--cleanup-on-fail \
--create-namespace \
--install \
--timeout 420s \
--values .helm/values.${environment}.yaml \
--set ghcr.password=${ GH_SERVICE_TOKEN } \
--set ghcr.username=xcfw \
--set outlet.app.image=ghcr.io/powrful/${environment}/outlet:${OUTLET_SHA} \
--set counter.app.image=ghcr.io/powrful/${environment}/counter:${COUNTER_SHA} \
--set sidekiq.app.image=ghcr.io/powrful/${environment}/sidekiq:${SIDEKIQ_SHA} \
--set web.app.image=ghcr.io/powrful/${environment}/web:${WEB_SHA} --dry-run

Upgrade ingress version safely:

# Alpha
helm upgrade ingress \
oci://ghcr.io/nginxinc/charts/nginx-ingress \
--version 1.4.1 \
--namespace ingress \
--set controller.kind=deployment \
--set controller.nginxplus=false \
--set controller.image.pullPolicy=Always \
--set serviceNameOverride="nginx-ingress-nginx-ingress" \
--set controller.name="" \
--set fullnameOverride="nginx-ingress-nginx-ingress" \
--set controller.enableSnippets=true \
--set controller.service.loadBalancerIP="34.75.110.121" \
--set controller.wildcardTLS.secret=ingress/powr-alpha-wildcard-tls \
--set controller.autoscaling.enabled=true \
--set controller.replicaCount=2 \
--set controller.config.entries.worker-connections=2048 \
--set controller.config.entries.client-max-body-size=200m \
--set controller.config.entries.proxy-buffer-size=256k \
--set controller.config.entries.proxy-buffers=4 512k \
--set controller.config.entries.proxy-busy-buffers-size=512k \
--set controller.enableLatencyMetrics=true \
--set prometheus.create=true

# Staging
helm upgrade ingress oci://ghcr.io/nginxinc/charts/nginx-ingress \
--version 1.2.0 \
--namespace ingress \
--set controller.kind=deployment \
--set controller.nginxplus=false \
--set controller.image.pullPolicy=Always \
--set serviceNameOverride="nginx-ingress-nginx-ingress" \
--set controller.name="" \
--set fullnameOverride="nginx-ingress-nginx-ingress" -f .helm/values.alpha.yaml

Another approach for nlb

helm upgrade ingress nginx-stable/nginx-ingress \
--namespace ingress \
--set controller.service.externalTrafficPolicy=Local \
--set controller.setAsDefaultIngress=true \
--set controller.service.type=LoadBalancer \
--set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-type"="nlb" \
--set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-proxy-protocol"="*" \
--set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-backend-protocol"="tcp" \
--set controller.enableSnippets=true \
--set controller.wildcardTLS.secret=ingress/powr-staging-wildcard-tls --dry-run

After terraform apply on alpha:

cluster_endpoint = "https://7A0F8F8DD10C99CAA399BB60472A86DB.gr7.us-east-1.eks.amazonaws.com"
cluster_id = "powr-alpha-cluster"
cluster_name = "powr-alpha-cluster"
cluster_security_group_id = "sg-0f12585e1a419d7f8"
region = "us-east-1"

AWS health checks

Health check settings
Protocol
HTTP
Path
/healthz
Port
32373
Healthy threshold
2 consecutive health check successes
Unhealthy threshold
2 consecutive health check failures
Timeout
6 seconds
Interval
10 seconds
Success codes
200-399

Edit TLS wildcard secret

gcloud container \
clusters \
get-credentials stage \
--region us-east1 \
--project powr-staging \
&& kubectl edit secret \
powr-staging-wildcard-tls \
--namespace ingress

Gcloud NodeLocal DNSCache

gcloud container clusters update main --region=us-east1 \
--update-addons=NodeLocalDNS=ENABLED

Default change: During creation of nodepools or autoscaling configuration changes for cluster versions greater than 1.24.1-gke.800 a default location policy is applied. For Spot and PVM it defaults to ANY, and for all other VM kinds a BALANCED policy is used. To change the default values use the `--location-policy` flag.
Enabling/Disabling NodeLocal DNSCache causes a re-creation of all cluster nodes at versions 1.15 or above. This operation is long-running and will block other operations on the cluster
(including delete) until it has run to completion.

Do you want to continue (Y/n)?

Create a Cloud Router

gcloud compute routers create stage-router \
--network default \
--region us-east1

Add a configuration to the router

Source subnets & IP ranges
default: 10.142.0.0/20 (Primary)10.43.0.0/22 (Secondary: gke-stage-auto-services-b86b963a)10.42.128.0/17 (Secondary: gke-stage-auto-pods-b86b963a)

gcloud cli:

gcloud compute routers nats create stage-nat-config \
--router-region us-east1 \
--router stage-router \
??? --nat-all-subnet-ip-ranges \
--auto-allocate-nat-external-ips

Firewall

gcloud compute \
firewall-rules create \
allow-all-incoming-http-s-on-all-pods \
--project=powr-prod \
--direction=INGRESS \
--priority=9999 \
--network=default \
--action=ALLOW \
--rules=tcp:80,tcp:443,tcp:5432,udp:443,udp:5432 \
--source-ranges=0.0.0.0/0

Delete all review apps

kubectl get namespaces  --no-headers=true | grep -oP review-\d+ | xargs kubectl delete namespaces

Alternative command

kubectl get namespaces | rg "review-... " | awk '{print $1}' | xargs -l1 -- sh -c 'kubectl delete namespace "$1"' --

Deploy GCP Memorystore Redis

gcloud redis instances create --project=powr-staging  powr-alpha-redis --tier=basic --size=1 --region=us-east1 --redis-version=redis_6_x --network=projects/powr-staging/global/networks/default --connect-mode=PRIVATE_SERVICE_ACCESS --reserved-ip-range=default-ip-range --maintenance-window-day=MONDAY --maintenance-window-hour=8 --persistence-mode=RDB --rdb-snapshot-period=1h

Staging redis

gcloud redis instances create --project=powr-staging  powr-staging-redis --tier=basic --size=1 --region=us-east1 --redis-version=redis_6_x --network=projects/powr-staging/global/networks/default --connect-mode=PRIVATE_SERVICE_ACCESS --reserved-ip-range=default-ip-range --maintenance-window-day=FRIDAY --maintenance-window-hour=21 --persistence-mode=RDB --rdb-snapshot-period=1h

Production Redis

gcloud redis instances create --project=powr-prod  powr-prod-redis --tier=standard --size=16 --region=us-east1 --redis-version=redis_6_x --network=projects/powr-prod/global/networks/default --read-replicas-mode=READ_REPLICAS_ENABLED --replica-count=2 --connect-mode=PRIVATE_SERVICE_ACCESS --display-name="powr-prod-redis" --maintenance-window-day=SUNDAY --maintenance-window-hour=8 --persistence-mode=RDB --rdb-snapshot-period=1h

Gcloud VM instance SSH

gcloud compute ssh --zone "us-east1-b" "ruby-dev-vm"  --project "powr-staging"

CloudSQL dump and restore

Dump

pg_dump $DB_URL --format=custom --no-owner --no-acl > powr_outlet_prod.dmp

Restore

pg_restore -O -c -d $DB_URL powr_outlet_prod.dmp --verbose

PG and Redis dump and restore commands

# redis dump
upstash-redis-dump -db 0 -host $UPSTASHHOST -port $UPSTASHPORT -pass $UPSTASHPASS > upstashsidekiq.dump

# before doing restore fix dump with vim set ff=dos and change namespace
# redis restore
redis-cli -u redis://10.88.177.6:6379 --pipe < upstashsidekiq.dump

# counter dump
pg_dump $COUNTERDB --format=custom --no-owner --no-acl > prodcounter.dmp

# outlet dump
pg_dump $OUTLETDB --format=custom --no-owner --no-acl > prodoutlet.dmp

# counter restore
pg_restore -O -c -d $GCOUNTERDB prodcounter.dmp --verbose

# outlet restore
pg_restore -O -c -d $GOUTLETDB prodoutlet.dmp --verbose