cleanup and descritive README

This commit is contained in:
Tobias_Speer 2025-07-23 11:27:10 +02:00
parent ed40c478e9
commit 44e8e027e4
24 changed files with 1605 additions and 2 deletions

View file

@ -0,0 +1,13 @@
---
- name: Install a nvidia cuda repo from a deb
ansible.builtin.apt:
deb: "https://developer.download.nvidia.com/compute/cuda/repos/{{ ansible_facts['distribution'] | lower }}{{ ansible_facts['distribution_version'] | replace('.', '') }}/{{ ansible_facts['machine']}}/cuda-keyring_1.1-1_all.deb"
state: present
update_cache: true
- name: Install cuda
ansible.builtin.apt:
name: cuda
state: present
update_cache: true
async: 1200

View file

@ -0,0 +1,3 @@
---
- name: Install Cuda on host
ansible.builtin.include_tasks: "roles/cuda/tasks/{{ ansible_facts['distribution_file_variety'] | lower }}.yml"

View file

@ -0,0 +1,9 @@
{
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"args": [],
"path": "nvidia-container-runtime"
}
}
}

View file

@ -0,0 +1,102 @@
---
- name: Install docker dependencies
ansible.builtin.apt:
pkg:
- apt-transport-https
- ca-certificates
- curl
- gnupg-agent
- software-properties-common
- python3-docker
update_cache: true
- name: Prepare keyrings directory
ansible.builtin.file:
path: /etc/apt/keyrings
state: directory
mode: '0755'
- name: Add Docker and Nvidia GPG apt Key
block:
- name: Get keys via download
ansible.builtin.get_url:
url: "{{ item.url }}"
dest: /etc/apt/keyrings/{{ item.keyfile }}
mode: "a+r"
with_items:
- { url: https://download.docker.com/linux/ubuntu/gpg, keyfile: "docker.asc"}
- name: Add keys to apt key
ansible.builtin.apt_key:
url: "{{ item }}"
state: present
keyring:
with_items:
- https://download.docker.com/linux/ubuntu/gpg
- https://nvidia.github.io/libnvidia-container/gpgkey
- name: Add repos
ansible.builtin.apt_repository:
repo: "deb {{ item.repo }}"
state: present
with_items:
- repo: "[arch=amd64 signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_facts['distribution_release'] }} stable"
- repo: "https://nvidia.github.io/libnvidia-container/stable/deb/amd64 /"
- name: Install docker
ansible.builtin.apt:
name: "{{ item }}"
state: present
update_cache: true
with_items:
- docker-ce
- docker-ce-cli
- containerd.io
- docker-buildx-plugin
- nvidia-container-toolkit
- name: Configure nvidia-container-toolkit
ansible.builtin.command:
cmd: nvidia-ctk runtime configure --runtime=docker
register: nvidia_container_toolkit_configure
- name: Start and enable docker-rootful service
ansible.builtin.systemd:
name: "{{ item }}"
state: restarted
enabled: true
with_items:
- docker.service
- docker.socket
- name: Deploy daemon.json
ansible.builtin.copy:
src: daemon.json
dest: /etc/docker/daemon.json
mode: '0644'
- name: Check nvidia-container-toolkit deployment
block:
- name: Deploy nvidia test containe
ansible.builtin.shell:
cmd: "sudo docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi"
register: nvidia_container_toolkit_configure
- name: Print result of nvidia-ctk runtime configure --runtime=docker
ansible.builtin.debug:
msg: "{{ nvidia_container_toolkit_configure }}"
- name: Prepare docker group
ansible.builtin.user:
name: "{{ item }}"
group: docker
append: true
state: present
with_items:
- blurringtool
- name: Start and enable docker-rootful service
ansible.builtin.systemd:
name: "{{ item }}"
state: started
enabled: true
with_items:
- docker.service
- docker.socket

View file

@ -0,0 +1,7 @@
{
"auths": {
"rg.fr-par.scw.cloud": {
"auth": "base64 hash of the creds"
}
}
}

View file

@ -0,0 +1,37 @@
---
- name: Download blurit
ansible.builtin.get_url:
url: "https://cloud.wassa.io/s/Kuj5MDgqkMCdjRv/download"
dest: "/usr/bin/blurit"
mode: '0755'
- name: Ensure directories
ansible.builtin.file:
path: "/home/blurringtool/{{ item.dir }}"
state: directory
mode: '0755'
owner: "{{ item.owner }}"
group: "{{ item.owner }}"
with_items:
- {dir: ".config/blurit/ceertificates/", owner: blurringtool}
- {dir: ".docker/", owner: blurringtool}
- {dir: "blurit-op/input", owner: 1200}
- {dir: "blurit-op/output", owner: 1200}
- name: Copy config file for blurit
ansible.builtin.copy:
src: "{{ item.src }}"
dest: "/home/blurringtool/{{ item.dest }}"
mode: '0644'
owner: blurringtool
group: blurringtool
with_items:
- { src: config.json, dest: .docker/config.json, mode: '0600'}
- name: Pull all recommended Containers
become: false
community.docker.docker_image_pull:
name: "{{ item }}"
platform: amd64
with_items: "{{ docker }}"
async: 1800

View file

@ -0,0 +1,48 @@
---
- name: Ensure mount point"
ansible.builtin.stat:
path: "{{ item.split(' ')[1] }}"
with_items: "{{ fstab }}"
- name: "Create mount point"
ansible.builtin.file:
path: "{{ item.split(' ')[1] }}"
state: directory
mode: '0777'
with_items: "{{ fstab }}"
- name: Create passwd-s3fs file
ansible.builtin.file:
path: "/etc/passwd-s3fs"
state: touch
mode: '0600'
- name: Write s3fs mount options to fstab
ansible.builtin.lineinfile:
path: "/etc/fstab"
line: "{{ item }}"
with_items:
- "{{ fstab }}"
- name: Remove line from fstab
ansible.builtin.lineinfile:
path: "/etc/fstab"
regexp: ".*rw,.*"
state: absent
- name: Write s3fs password to passwd-s3fs
ansible.builtin.lineinfile:
path: "/etc/passwd-s3fs"
line: "{{ item }}"
state: present
with_items:
- "{{ passwd_s3fs }}"
- name: Mount s3-bucket
ansible.posix.mount:
path: "{{ item.split(' ')[1] }}"
src: "{{ item.split(' ')[0] }}"
opts: "{{ item.split(' ')[3] }}"
fstype: "{{ item.split(' ')[2] }}"
state: mounted
with_items: "{{ fstab }}"

View file

@ -0,0 +1,17 @@
---
- name: Reboot host
block:
- name: Rebooting
ansible.builtin.reboot:
reboot_timeout: 1200
test_command: uptime
register: reboot_result
- name: Wait for host to come back online
ansible.builtin.wait_for_connection:
delay: 10
timeout: 1800
when: reboot_result is changed
- name: Restart docker to kill Zombies
ansible.builtin.systemd_service:
name: docker
state: restarted

View file

@ -0,0 +1,11 @@
---
- name: Upgrade hosts
notify: "Reboot host"
block:
- name: Update cache
ansible.builtin.apt:
update_cache: true
- name: Upgrade OS
ansible.builtin.apt:
upgrade: yes