Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 18 additions & 16 deletions roles/helper/pci/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,22 @@ N/A
Role Variables
--------------

| Name | Type | Default | Description |
|-----------------------------|--------|-----------|-------------------------------------------------------------------------------------|
| `pci_devices` | `list` | `[]` | PCI devices configuration. |
| `pci_devices[].excluded` | `bool` | `false` | Do not process matching PCI devices. |
| `pci_devices[].unguarded` | `bool` | `false` | Do not protect matching PCI devices (this may cause primary NIC connectivity loss). |
| `pci_devices[].unlisted` | `bool` | `true` | Do not pass matching PCI devices to OpenNebula. |
| `pci_devices[].virtual` | `bool` | `false` | Do not fail query on missing virtual devices (SR-IOV). |
| `pci_devices[].address` | `str` | undefined | Glob PCI devices by PCI or MAC address. |
| `pci_devices[].vendor` | `str` | `*` | Glob PCI devices by PCI Vendor (if address is undefined). |
| `pci_devices[].device` | `str` | `*` | Glob PCI devices by PCI Device (if address is undefined). |
| `pci_devices[].class` | `str` | `*` | Glob PCI devices by PCI Class (if address is undefined). |
| `pci_devices[].set_counter` | `str` | undefined | Reset the "set_counter" internal counter that can be used with set_name ("{3}"). |
| `pci_devices[].set_driver` | `str` | `omit` | Use driverctl to override driver (unless "omit"). |
| `pci_devices[].set_name` | `str` | `omit` | Rename device in udev (unless "omit"). |
| `pci_devices[].set_numvfs` | `str` | `0` | Enable Virtual Functions for SR-IOV capable devices (integer >= 0 or "max"). |
| Name | Type | Default | Description |
|-------------------------------|--------|-----------|-------------------------------------------------------------------------------------|
| `pci_devices` | `list` | `[]` | PCI devices configuration. |
| `pci_devices[].excluded` | `bool` | `false` | Do not process matching PCI devices. |
| `pci_devices[].unguarded` | `bool` | `false` | Do not protect matching PCI devices (this may cause primary NIC connectivity loss). |
| `pci_devices[].unlisted` | `bool` | `true` | Do not pass matching PCI devices to OpenNebula. |
| `pci_devices[].virtual` | `bool` | `false` | Do not fail query on missing virtual devices (SR-IOV). |
| `pci_devices[].address` | `str` | undefined | Glob PCI devices by PCI or MAC address. |
| `pci_devices[].vendor` | `str` | `*` | Glob PCI devices by PCI Vendor (if address is undefined). |
| `pci_devices[].device` | `str` | `*` | Glob PCI devices by PCI Device (if address is undefined). |
| `pci_devices[].class` | `str` | `*` | Glob PCI devices by PCI Class (if address is undefined). |
| `pci_devices[].set_counter` | `str` | undefined | Reset the "set_counter" internal counter that can be used with set_name ("{3}"). |
| `pci_devices[].set_driver` | `str` | `omit` | Use driverctl to override driver (unless "omit"). |
| `pci_devices[].set_name` | `str` | `omit` | Rename device in udev (unless "omit"). |
| `pci_devices[].set_numvfs` | `str` | `0` | Enable Virtual Functions for SR-IOV capable devices (integer >= 0 or "max"). |
| `pci_devices[].set_switchdev` | `bool` | `false` | Toggle legacy/switchdev modes for SR-IOV capable devices. |

Dependencies
------------
Expand Down Expand Up @@ -138,11 +139,12 @@ Example Playbook
- hosts: node
vars:
pci_devices:
# Enable all available VFs for all existing Mellanox PFs.
# Enable all available VFs for all existing Mellanox PFs, then enable switchdev mode.
- vendor: "15b3"
device: "1015"
class: "0200"
set_numvfs: max
set_switchdev: true

# Rename all existing Mellanox VFs using custom counter (starting from 1), then pass them to OpenNebula.
- vendor: "15b3"
Expand Down
52 changes: 15 additions & 37 deletions roles/helper/pci/tasks/devices.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
ansible.builtin.package:
name: "{{ _common + _specific[ansible_os_family] }}"
vars:
_common: [bash, coreutils, driverctl, findutils, grep, pciutils]
_common: [bash, coreutils, driverctl, grep, pciutils]
_specific:
Debian: []
RedHat: []
Suse: []
Debian: [iproute2]
RedHat: [iproute]
Suse: [iproute2]
register: package
until: package is success
retries: 12
Expand Down Expand Up @@ -57,51 +57,25 @@
- when: lspci_devices | count > 0
block:
- name: Render sriov-enable service unit
ansible.builtin.copy:
ansible.builtin.template:
dest: "{{ item.dest }}"
src: "{{ item.src }}"
owner: 0
group: 0
mode: "{{ item.mode }}"
content: "{{ item.cmd }}"
loop:
- dest: /usr/local/sbin/sriov-manage.sh
src: sriov-manage.sh.jinja
mode: u=rwx,go=rx
cmd: |
#!/usr/bin/env bash
set -eu

# Split the input (e.g., 0000:27:00.0-4) into address and count
IFS='-' read -r PCI_ADDR VF_COUNT <<< "$1"

# Verify the device exists before writing
if [[ -d "/sys/bus/pci/devices/$PCI_ADDR" ]]; then
echo "Setting $VF_COUNT VFs on $PCI_ADDR"
echo "$VF_COUNT" > "/sys/bus/pci/devices/$PCI_ADDR/sriov_numvfs"
else
echo "Error: Device $PCI_ADDR not found" >&2
exit 1
fi
- dest: /etc/systemd/system/sriov-enable@.service
src: sriov-enable@.service.jinja
mode: u=rw,go=r
cmd: |
[Unit]
Description=Enable SR-IOV VFs on %I
After=network-pre.target

[Service]
Type=oneshot
RemainAfterExit=yes
# %I is replaced by the string after the @ in the command
ExecStart=/usr/local/sbin/sriov-manage.sh %i

[Install]
WantedBy=multi-user.target
register: copy_sriov_enable_service
register: template_sriov_enable_service

- name: Reload systemd
ansible.builtin.systemd_service:
daemon_reload: true
when: copy_sriov_enable_service is changed
when: template_sriov_enable_service is changed

- name: Override drivers (revert when needed)
ansible.builtin.shell:
Expand Down Expand Up @@ -141,7 +115,7 @@
file: "{{ role_path }}/tasks/query.yml"
when: shell_revert_drivers is changed

- name: (Re)Enable VFs
- name: Enable VFs
ansible.builtin.shell:
cmd: |
set -x -o errexit -o pipefail
Expand All @@ -156,7 +130,11 @@
{% endif %}
if [[ -n "$SRIOV_NUMVFS" ]]; then
ALL="$(systemctl show --all -P Id 'sriov-enable@{{ v.Slot }}-*.service' | grep -E -v '^\s*$')" ||:
{% if v.Set_switchdev == 'yes' %}
TO_ENABLE="sriov-enable@{{ v.Slot }}-$SRIOV_NUMVFS-switchdev.service"
{% else %}
TO_ENABLE="sriov-enable@{{ v.Slot }}-$SRIOV_NUMVFS.service"
{% endif %}
if [[ "$(head -n1 '/sys/bus/pci/devices/{{ v.Slot }}/sriov_numvfs')" == 0 ]]; then
# This handles the invalid case where VFs are no longer enabled but service is still active
TO_DISABLE="$ALL"
Expand Down
3 changes: 3 additions & 0 deletions roles/helper/pci/tasks/query.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
echo -e 'Set_driver:\t{{ v.set_driver | d('omit') }}'
echo -e 'Set_name:\t{{ v.set_name | d('omit') }}'
echo -e 'Set_numvfs:\t{{ v.set_numvfs | d(0) }}'
echo -e 'Set_switchdev:\t{{ v.set_switchdev | d(false) | bool | ternary('yes', 'no') }}'
echo -e 'Virtual:\t{{ v.virtual | d(false) | bool | ternary('yes', 'no') }}'
echo -e 'Unlisted:\t{{ v.unlisted | d(true) | bool | ternary('yes', 'no') }}'
echo -e 'Unguarded:\t{{ v.unguarded | d(false) | bool | ternary('yes', 'no') }}'
Expand Down Expand Up @@ -52,6 +53,7 @@
echo -e 'Set_driver:\t{{ v.set_driver | d('omit') }}'
echo -e 'Set_name:\t{{ v.set_name | d('omit') }}'
echo -e 'Set_numvfs:\t{{ v.set_numvfs | d(0) }}'
echo -e 'Set_switchdev:\t{{ v.set_switchdev | d(false) | bool | ternary('yes', 'no') }}'
echo -e 'Virtual:\t{{ v.virtual | d(false) | bool | ternary('yes', 'no') }}'
echo -e 'Unlisted:\t{{ v.unlisted | d(true) | bool | ternary('yes', 'no') }}'
echo -e 'Unguarded:\t{{ v.unguarded | d(false) | bool | ternary('yes', 'no') }}'
Expand Down Expand Up @@ -89,6 +91,7 @@
echo -e 'Set_driver:\t{{ v.set_driver | d('omit') }}'
echo -e 'Set_name:\t{{ v.set_name | d('omit') }}'
echo -e 'Set_numvfs:\t{{ v.set_numvfs | d(0) }}'
echo -e 'Set_switchdev:\t{{ v.set_switchdev | d(false) | bool | ternary('yes', 'no') }}'
echo -e 'Virtual:\t{{ v.virtual | d(false) | bool | ternary('yes', 'no') }}'
echo -e 'Unlisted:\t{{ v.unlisted | d(true) | bool | ternary('yes', 'no') }}'
echo -e 'Unguarded:\t{{ v.unguarded | d(false) | bool | ternary('yes', 'no') }}'
Expand Down
22 changes: 12 additions & 10 deletions roles/helper/pci/tasks/udev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,28 @@
{{ _all | selectattr('IOMMUGroup', 'defined') }}

_vf_to_pf: >-
{{ shell_vf_to_pf_fn.stdout_lines | d([])
| map('split', ';')
| items2dict(key_name=0, value_name=1) }}
{{ shell_pf_vf_fn.stdout_lines | d([])
| map('split', ';')
| items2dict(key_name=1, value_name=0) }}

_vf_to_fn: >-
{{ shell_vf_to_pf_fn.stdout_lines | d([])
| map('split', ';')
| items2dict(key_name=0, value_name=2) }}
{{ shell_pf_vf_fn.stdout_lines | d([])
| map('split', ';')
| items2dict(key_name=1, value_name=2) }}
block:
- name: Scan /sys/bus/pci/devices/*/virtfn* (SR-IOV)
ansible.builtin.shell:
cmd: |
set -o errexit -o pipefail
set -o errexit -o pipefail; shopt -s nullglob
{% for v in _sriov_devices %}
find -P "/sys/bus/pci/devices/{{ v.Slot }}/" -maxdepth 1 -type l -name 'virtfn*' -printf '%l/%P\n' | while IFS='/' read -r _ VF FN; do
echo "$VF;{{ v.Slot }};${FN#virtfn}"
for VF_PATH in '/sys/bus/pci/devices/{{ v.Slot }}/virtfn'*; do
VF="$(basename "$(realpath "$VF_PATH")")"
FN="$(basename "$VF_PATH")"
echo "{{ v.Slot }};$VF;${FN#virtfn}"
done
{% endfor %}
executable: /bin/bash
register: shell_vf_to_pf_fn
register: shell_pf_vf_fn
changed_when: false
when: _sriov_devices | count > 0

Expand Down
13 changes: 13 additions & 0 deletions roles/helper/pci/templates/sriov-enable@.service.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# managed by one-deploy; vim:syn=systemd:
[Unit]
Description=Enable SR-IOV VFs on %I
After=network-pre.target

[Service]
Type=oneshot
RemainAfterExit=yes
# %I is replaced by the string after the @ in the command
ExecStart=/usr/local/sbin/sriov-manage.sh %i

[Install]
WantedBy=multi-user.target
64 changes: 64 additions & 0 deletions roles/helper/pci/templates/sriov-manage.sh.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash
# managed by one-deploy; vim:syn=bash:

set -o errexit -o nounset -o pipefail; shopt -s nullglob

type -p basename devlink realpath &>/dev/null

# Split the input (e.g., 0000:27:00.0-4-switchdev) into address, count and options
IFS='-' read -r PCI_ADDR VF_COUNT OPTIONS <<< "$1"
IFS='-' read -ra OPTIONS <<< "$OPTIONS"

# Verify the device exists before writing
if ! [[ -d "/sys/bus/pci/devices/$PCI_ADDR/" ]]; then
echo "ERROR: No such device: $PCI_ADDR" >&2
exit 1
fi

# Handle extra options before writing
for OPT in "${OPTIONS[@]}"; do
case "$OPT" in
switchdev)
if ! ESWITCH_SHOW="$(devlink dev eswitch show "pci/$PCI_ADDR")"; then
echo "ERROR: Not an eswitch device: $PCI_ADDR" >&2
exit 1
fi

if [[ "$ESWITCH_SHOW" =~ (^|[[:space:]])mode[[:space:]]+switchdev([[:space:]]|$) ]]; then
echo "WARNING: Nothing to do for: $OPT" >&2
continue
fi

# Collect info about drivers in use
declare -A VF_DRIVER_MAP
for VF_PATH in "/sys/bus/pci/devices/$PCI_ADDR/virtfn"*; do
if ! VF_DRIVER="$(realpath -e "$VF_PATH/driver")"; then
echo "WARNING: No driver found: $VF_PATH/driver" >&2
continue
fi
VF_PCI_ADDR="$(basename "$(realpath "$VF_PATH")")"
VF_DRIVER_MAP["$VF_PCI_ADDR"]="$VF_DRIVER"
done

for VF_PCI_ADDR in "${!VF_DRIVER_MAP[@]}"; do
echo "Unbinding $VF_PCI_ADDR from ${VF_DRIVER_MAP["$VF_PCI_ADDR"]}" >&2
echo "$VF_PCI_ADDR" >"${VF_DRIVER_MAP["$VF_PCI_ADDR"]}/unbind"
done

echo "Enabling switchdev mode on $PCI_ADDR" >&2
devlink dev eswitch set "pci/$PCI_ADDR" mode switchdev

for VF_PCI_ADDR in "${!VF_DRIVER_MAP[@]}"; do
echo "Binding $VF_PCI_ADDR to ${VF_DRIVER_MAP["$VF_PCI_ADDR"]}" >&2
echo "$VF_PCI_ADDR" >"${VF_DRIVER_MAP["$VF_PCI_ADDR"]}/bind"
done
;;
*)
echo "ERROR: Unrecognized option: $OPT" >&2
exit 1
;;
esac
done

echo "Setting $VF_COUNT VFs on $PCI_ADDR"
echo "$VF_COUNT" >"/sys/bus/pci/devices/$PCI_ADDR/sriov_numvfs"