Skip to content
Snippets Groups Projects
Unverified Commit 60925554 authored by Théo Zimmermann's avatar Théo Zimmermann
Browse files

Initial commit: document how to set up Coder on Kubernetes on OpenStack.

parents
No related branches found
No related tags found
No related merge requests found
.envrc 0 → 100644
use flake
openrc
.direnv
.ipynb_checkpoints
result
*.tar.zst
*.zip
*~
\ No newline at end of file
This diff is collapsed.
FROM inf110-workspace-base:latest
# Install the coq-lsp extension for code-server
RUN code-server --install-extension ejgallego.coq-lsp
# Install the Jupyter / Python extensions for code-server
RUN code-server --install-extension ms-toolsai.jupyter
RUN code-server --install-extension ms-python.python
# Suppress error message when running git pull
RUN git config --global --add safe.directory /home/coder/tp
# Add code-server user settings
COPY user-settings.json /home/coder/.local/share/code-server/User/settings.json
README.md 0 → 100644
## Installation process
This document describes the installation process of Coder for the INF110 labs on the R2 OpenStack cluster.
It should be possible to adapt this to other use cases or OpenStack clusters with minor modifications.
### Create the cluster template
```bash
source openrc # automatically done by the nix env / make sure to have this file
export KEYPAIR= # your keypair
openstack coe cluster template create \
--coe kubernetes \
--image coreos-35 \
--external-network $(openstack network show provider -f json | jq -r .id) \
--keypair $KEYPAIR \
--network-driver flannel \
--volume-driver cinder \
--dns-nameserver 137.194.2.16 \
--flavor m2.xlarge \
--master-flavor m1.medium \
--docker-storage-driver overlay2 \
--labels 'auto_healing_enabled=true,auto_scaling_enabled=true,min_node_count=1,max_node_count=100,boot_volume_type=LVM-NVME,etcd_volume_type=LVM-NVME,docker_volume_type=LVM-NVME' \
--registry-enabled \
--insecure-registry '10.0.0.99:5000' \
--server-type vm \
--floating-ip-disabled \
coder
```
### Create the Kubernetes cluster
```bash
openstack coe cluster create --cluster-template coder --node-count 1 inf110
```
It can happen that the cluster creation hangs. In that case, log in to the master node and check if it reports that the `etcd` service failed. If so, restart the `etcd` service and the cluster creation should continue.
```bash
sudo systemctl restart etcd
```
### Create a VM for the local Docker registry
This registry will be used to store the Docker image of the Coder workspace.
```bash
openstack server create \
--flavor smi-worker \
--image coreos-35 \
--no-network \
--security-group default \
--security-group registry \
--availability-zone nova \
--key-name $KEYPAIR \
inf110-registry
```
Because we have set a fixed IP for the Docker insecure registry in the cluster template (`10.0.0.99:5000`), we need to assign this IP to the VM.
```bash
openstack server add fixed ip \
--fixed-ip-address 10.0.0.99 \
inf110-registry \
inf110
```
### Create a VM for the reverse proxy
The reverse proxy will be used to access Coder via HTTPS.
```bash
openstack server create \
--flavor smi-worker \
--image ubuntu-jammy-19-09-2023 \
--network inf110 \
--security-group default \
--security-group ssh_icmp \
--security-group HTTP_HTTPS \
--availability-zone nova \
--key-name $KEYPAIR \
inf110-reverse-proxy
```
Because we have a DNS entry for the reverse proxy, we need to assign a specific floating IP (`137.194.210.143`) to it.
```bash
openstack server add floating ip \
inf110-reverse-proxy \
137.194.210.143
```
### Install the Docker registry
Launch the Docker registry, backed by Swift, on the registry VM.
```bash
ssh -J ubuntu@137.194.210.143 core@10.0.0.99 sudo docker run -d \
-p 5000:5000 \
-e REGISTRY_STORAGE=swift \
-e REGISTRY_STORAGE_SWIFT_CONTAINER=docker_registry \
-e REGISTRY_STORAGE_SWIFT_AUTHURL="$OS_AUTH_URL" \
-e REGISTRY_STORAGE_SWIFT_USERNAME="$OS_USERNAME" \
-e REGISTRY_STORAGE_SWIFT_PASSWORD="$OS_PASSWORD" \
-e REGISTRY_STORAGE_SWIFT_REGION="$OS_REGION_NAME" \
-e REGISTRY_STORAGE_SWIFT_DOMAIN="$OS_USER_DOMAIN_NAME" \
--restart=always \
--name registry \
registry:2
```
### Build the Coder workspace image
This part requires both Nix and Docker.
```bash
docker load -i $(nix build .#dockerContainers.inf110-workspace --print-out-paths)
docker save $(docker build -q .) -o inf110-workspace.tar
# Unpacking a zstd-compressed tarball is not supported by Docker on CoreOS 35
# and compressing with gzip or xz is too slow.
#docker save $(docker build -q .) | zstd > inf110-workspace.tar.zst
```
### Push the Coder workspace image to the insecure Docker registry
Since the Docker registry is not accessible publicly, we use `scp` to copy the image to a node in the cluster and push it from there.
```bash
export NODE_IP=$(openstack coe cluster show inf110 -f json | jq -r .node_addresses[0])
scp -J ubuntu@137.194.210.143 inf110-workspace.tar core@$NODE_IP:/tmp
ssh -J ubuntu@137.194.210.143 core@$NODE_IP
sudo -i
docker load -i /tmp/inf110-workspace.tar
docker push 10.0.0.99:5000/inf110-workspace:latest
rm /tmp/inf110-workspace.tar
```
### Expose the Kubernetes dashboard
For this, we need to log in to the master node.
```bash
export MASTER_IP=$(openstack coe cluster show inf110 -f json | jq -r .master_addresses[0])
ssh -J ubuntu@137.194.210.143 core@$MASTER_IP
sudo -i
kubectl expose deployment kubernetes-dashboard --type=LoadBalancer --name=kube-dashboard -n kube-system
kubectl get svc -n kube-system # get the external IP of the dashboard
kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-token | awk '{print $1}') # get the admin token to log into the dashboard
```
### Install Coder
Copy the `postgresql-pvc.yml`, `postgresql-values.yml`, and `coder-values.yml` files to the master node.
```bash
scp -J ubuntu@137.194.210.143 postgresql-pvc.yml postgresql-values.yml coder-values.yml core@$MASTER_IP:/tmp
```
Once the files are copied, log in to the master node and install Coder.
```bash
ssh -J ubuntu@137.194.210.143 core@$MASTER_IP
sudo -i
rpm-ostree install helm # no reboot required
kubectl create namespace coder
kubectl apply -f /tmp/postgresql-pvc.yml -n coder
helm repo add bitnami https://charts.bitnami.com/bitnami
helm install coder-db bitnami/postgresql -n coder -f /tmp/postgresql-values.yml --version 13.0.0 # latest compatible version with Helm 3.2 and thus coreos 35
kubectl create secret generic coder-db-url -n coder --from-literal=url="postgres://coder:coder@coder-db-postgresql.coder.svc.cluster.local:5432/coder?sslmode=disable"
helm repo add coder-v2 https://helm.coder.com/v2
helm install coder coder-v2/coder --namespace coder --values /tmp/coder-values.yml --version 2.13.5 # Latest stable version
```
A public IP is created for the Coder load balancer, but it won't be useful, as the reverse proxy will be used to access Coder via HTTPS. Thus, we can remove the public IP.
### Install the reverse proxy
Retrieve the private IP of the Coder load balancer.
```bash
openstack loadbalancer list -f json | jq -r \
'.[] | select(.name | contains("coder")) | .vip_address'
```
(This can also be done via the OpenStack dashboard. And it is also possible to remove the public IP there.)
Install the reverse proxy on the reverse proxy VM.
```bash
ssh ubuntu@137.194.210.143
sudo -i
apt install nginx python3-certbot-nginx
certbot --nginx -d tp-inf110.r2.enst.fr
```
Edit the `/etc/nginx/sites-available/default` file (`vim /etc/nginx/sites-available/default`) and replace the `location /` block in the SSL server block with the following:
```nginx
location / {
proxy_pass http://10.0.0.XXX;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
}
```
where `10.0.0.XXX` was replaced with the private IP of the Coder load balancer.
Restart the Nginx service.
```bash
systemctl restart nginx
```
## Coder management
Access Coder via the reverse proxy (https://tp-inf110.r2.enst.fr) and create the admin user.
Go to https://tp-inf110.r2.enst.fr/settings/tokens and create a new token that will be used to manage Coder via its API, in the `Coder-management.ipynb` notebook.
Add its value to the `openrc` file:
```bash
export CODER_TOKEN= # the token
```
Launch `jupyter lab`, open the `Coder-management.ipynb` notebook, and follow the instructions there.
## Troubleshooting
### Cluster auto-scaling issues
If the cluster is in "Update failed" state, and the underlying issue (e.g., quota exceeded) has been resolved, the cluster state can be restored using the following command:
```bash
openstack coe cluster resize inf110 $(openstack coe cluster show inf110 -f json | jq .node_count)
```
The above is a request to resize to the current (known) node count. If the current count is incorrect, you can specify a different count.
\ No newline at end of file
coder:
env:
- name: CODER_PG_CONNECTION_URL
valueFrom:
secretKeyRef:
name: coder-db-url
key: url
- name: CODER_ACCESS_URL
value: "https://tp-inf110.r2.enst.fr"
\ No newline at end of file
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1725816686,
"narHash": "sha256-0Kq2MkQ/sQX1rhWJ/ySBBQlBJBUK8mPMDcuDhhdBkSU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "add0443ee587a0c44f22793b8c8649a0dbc3bb00",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}
{
inputs = {
nixpkgs.url = "github:/NixOS/nixpkgs/nixpkgs-unstable";
};
outputs = { self, nixpkgs }:
let pkgs = nixpkgs.legacyPackages.x86_64-linux;
in
{
devShell.x86_64-linux =
pkgs.mkShell {
buildInputs = with pkgs; [
(python311.withPackages
(ps: with ps; [
# We need to make sure that all Python the programs are installed
# with the same version of Python to avoid overriding PYTHONPATH
jupyter
python-openstackclient
python-magnumclient
python-octaviaclient
httpie
]))
pwgen
jq
];
shellHook = ''
source openrc
'';
};
dockerContainers.inf110-workspace =
pkgs.dockerTools.buildLayeredImage {
name = "inf110-workspace-base";
tag = "latest";
# Update to the latest version of the image with the output of:
# $ nix run nixpkgs#nix-prefetch-docker -- --image-name codercom/code-server --image-tag latest --arch amd64 --os linux
fromImage = pkgs.dockerTools.pullImage {
imageName = "codercom/code-server";
imageDigest = "sha256:f32cfa5e7cc768f60969f1d2183fc4c7aa093b80b93c365535e83f79ccf004f2";
sha256 = "0aa297aqrhfi0c3wybd9wld95axsn84g0caprzqrihdfxzh9bw91";
finalImageName = "codercom/code-server";
finalImageTag = "latest";
};
config = {
User = "coder";
Env = [
"LANG=en_US.UTF-8"
"HOME=/home/coder"
];
};
contents = with pkgs; buildEnv {
name = "copyToRoot";
paths = [
bashInteractive
coq_8_19
coqPackages_8_19.coq-lsp
dockerTools.binSh
(python3.withPackages (ps: with ps; [ jupyter-client ipykernel ]))
];
pathsToLink = [ "/bin" ];
};
};
};
}
\ No newline at end of file
---
display_name: TP d'INF110
description: Espace de travail pour les TP d'INF110
icon: ../../../site/static/emojis/1f4d0.png
maintainer_github: coder
verified: true
tags: []
---
# Espace de travail pour les TP d'INF110
Ce template sert à créer les espaces de travail pour les TP d'INF110.
Un espace de travail a déjà été créé pour vous. Les fichiers des TP y ont été copiés. N'essayez pas de créer un nouvel espace de travail, car les fichiers des TP n'y seraient pas présents.
\ No newline at end of file
terraform {
required_providers {
coder = {
source = "coder/coder"
}
kubernetes = {
source = "hashicorp/kubernetes"
}
}
}
provider "coder" {
}
variable "use_kubeconfig" {
type = bool
description = <<-EOF
Use host kubeconfig? (true/false)
Set this to false if the Coder host is itself running as a Pod on the same
Kubernetes cluster as you are deploying workspaces to.
Set this to true if the Coder host is running outside the Kubernetes cluster
for workspaces. A valid "~/.kube/config" must be present on the Coder host.
EOF
default = false
}
variable "namespace" {
type = string
description = "The Kubernetes namespace to create workspaces in (must exist prior to creating workspaces). If the Coder host is itself running as a Pod on the same Kubernetes cluster as you are deploying workspaces to, set this to the same namespace."
default = "coder"
}
variable "cpu" {
type = string
description = "The number of CPU cores"
default = "2"
}
variable "memory" {
type = string
description = "The amount of memory in GB"
default = "2"
}
variable "disk_size" {
type = string
description = "The size of the persistent disk in GB"
default = "5"
}
provider "kubernetes" {
# Authenticate via ~/.kube/config or a Coder-specific ServiceAccount, depending on admin preferences
config_path = var.use_kubeconfig == true ? "~/.kube/config" : null
}
data "coder_workspace" "me" {}
data "coder_workspace_owner" "me" {}
resource "coder_agent" "inf110" {
os = "linux"
arch = "amd64"
startup_script_behavior = "blocking"
startup_script = <<-EOT
set -e
# run code-server
code-server --auth none --port 13337 >/tmp/code-server.log 2>&1 &
EOT
# The following metadata blocks are optional. They are used to display
# information about your workspace in the dashboard. You can remove them
# if you don't want to display any information.
# For basic resources, you can use the `coder stat` command.
# If you need more control, you can write your own script.
metadata {
display_name = "CPU Usage"
key = "0_cpu_usage"
script = "coder stat cpu"
interval = 10
timeout = 1
}
metadata {
display_name = "RAM Usage"
key = "1_ram_usage"
script = "coder stat mem"
interval = 10
timeout = 1
}
metadata {
display_name = "Disk Usage"
key = "2_disk_usage"
script = "coder stat disk --path $${HOME}/tp"
interval = 60
timeout = 1
}
metadata {
display_name = "CPU Usage (Host)"
key = "4_cpu_usage_host"
script = "coder stat cpu --host"
interval = 10
timeout = 1
}
metadata {
display_name = "Memory Usage (Host)"
key = "5_mem_usage_host"
script = "coder stat mem --host"
interval = 10
timeout = 1
}
metadata {
display_name = "Load Average (Host)"
key = "6_load_host"
# get load avg scaled by number of cores
script = <<EOT
echo "`cat /proc/loadavg | awk '{ print $1 }'` `nproc`" | awk '{ printf "%0.2f", $1/$2 }'
EOT
interval = 60
timeout = 1
}
display_apps {
# We disable access to other apps than code-server
vscode = false
vscode_insiders = false
web_terminal = false
ssh_helper = false
port_forwarding_helper = false
}
}
# code-server
resource "coder_app" "code-server" {
agent_id = coder_agent.inf110.id
slug = "code-server"
display_name = "Cliquez ici pour démarrer VS Code"
icon = "/icon/code.svg"
url = "http://localhost:13337?folder=/home/coder/tp"
subdomain = false
share = "owner"
healthcheck {
url = "http://localhost:13337/healthz"
interval = 3
threshold = 10
}
}
resource "kubernetes_persistent_volume_claim" "tp_inf110_folder" {
metadata {
name = "coder-pvc-${lower(data.coder_workspace_owner.me.name)}-tp-inf110-${lower(data.coder_workspace.me.name)}"
namespace = var.namespace
labels = {
"app.kubernetes.io/name" = "coder-pvc"
"app.kubernetes.io/instance" = "coder-pvc-${lower(data.coder_workspace_owner.me.name)}-tp-inf110-${lower(data.coder_workspace.me.name)}"
"app.kubernetes.io/part-of" = "coder"
//Coder-specific labels.
"com.coder.resource" = "true"
"com.coder.workspace.id" = data.coder_workspace.me.id
"com.coder.workspace.name" = data.coder_workspace.me.name
"com.coder.user.id" = data.coder_workspace_owner.me.id
"com.coder.user.username" = data.coder_workspace_owner.me.name
}
annotations = {
"com.coder.user.email" = data.coder_workspace_owner.me.email
}
}
wait_until_bound = false
spec {
access_modes = ["ReadWriteOnce"]
resources {
requests = {
storage = "${var.disk_size}Gi"
}
}
# This storage class was created during the Kubernetes cluster setup
# with the `postgresql-pvc.yml` file.
storage_class_name = "csi-sc-cinderplugin-lvm-nvme"
}
}
resource "kubernetes_deployment" "main" {
count = data.coder_workspace.me.start_count
depends_on = [
kubernetes_persistent_volume_claim.tp_inf110_folder
]
wait_for_rollout = false
metadata {
name = "coder-${lower(data.coder_workspace_owner.me.name)}-tp-inf110-${lower(data.coder_workspace.me.name)}"
namespace = var.namespace
labels = {
"app.kubernetes.io/name" = "coder-workspace"
"app.kubernetes.io/instance" = "coder-workspace-${lower(data.coder_workspace_owner.me.name)}-tp-inf110-${lower(data.coder_workspace.me.name)}"
"app.kubernetes.io/part-of" = "coder"
"com.coder.resource" = "true"
"com.coder.workspace.id" = data.coder_workspace.me.id
"com.coder.workspace.name" = data.coder_workspace.me.name
"com.coder.user.id" = data.coder_workspace_owner.me.id
"com.coder.user.username" = data.coder_workspace_owner.me.name
}
annotations = {
"com.coder.user.email" = data.coder_workspace_owner.me.email
}
}
spec {
replicas = 1
selector {
match_labels = {
"app.kubernetes.io/name" = "coder-workspace"
}
}
strategy {
type = "Recreate"
}
template {
metadata {
labels = {
"app.kubernetes.io/name" = "coder-workspace"
}
}
spec {
security_context {
run_as_user = 1000
fs_group = 1000
}
container {
name = "dev"
# We pull the image from a local Docker registry inside the Kubernetes cluster network
image = "10.0.0.99:5000/inf110-workspace:latest"
image_pull_policy = "Always"
command = ["sh", "-c", coder_agent.inf110.init_script]
security_context {
run_as_user = "1000"
}
env {
name = "CODER_AGENT_TOKEN"
value = coder_agent.inf110.token
}
resources {
# We make sure to request enough resources to avoid overloading nodes
# when all students are working in their workspaces at the same time
requests = {
"cpu" = "1"
"memory" = "${var.memory}Gi"
}
limits = {
"cpu" = "${var.cpu}"
"memory" = "${var.memory}Gi"
}
}
volume_mount {
# We mount the persistent volume claim to the /home/coder/tp directory
# and not to the /home/coder directory, because the latter contains
# the pre-installed code-server configuration and extensions.
mount_path = "/home/coder/tp"
name = "tp"
read_only = false
}
}
volume {
name = "tp"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.tp_inf110_folder.metadata.0.name
read_only = false
}
}
}
}
}
}
\ No newline at end of file
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: csi-sc-cinderplugin-lvm-nvme
parameters:
type: LVM-NVME
provisioner: cinder.csi.openstack.org
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: coder-db
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: csi-sc-cinderplugin-lvm-nvme
# define default database user, name, and password for PostgreSQL deployment
auth:
enablePostgresUser: true
username: "coder"
password: "coder"
database: "coder"
# The postgres helm chart deployment will be using PVC postgresql-data-claim
primary:
persistence:
enabled: true
existingClaim: "coder-db"
\ No newline at end of file
{
"workbench.startupEditor": "readme"
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment