#!/usr/bin/python3
#
# Copyright (C) 2020-2022 Canonical, Ltd.
# Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
# Author: Lukas Märdian <slyon@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import json
import logging
import os
import subprocess
import typing
from typing import Dict, List, Optional, Set
from . import utils
from ..configmanager import ConfigurationError
import netplan
# PCIDevice class originates from mlnx_switchdev_mode/sriovify.py
# Copyright 2019 Canonical Ltd, Apache License, Version 2.0
# https://github.com/openstack-charmers/mlnx-switchdev-mode
class PCIDevice(object):
"""Helper class for interaction with a PCI device"""
def __init__(self, pci_addr: str):
"""Initialise a new PCI device handler
:param pci_addr: PCI address of device
:type: str
"""
self.pci_addr = pci_addr
@property
def sys(self) -> str:
"""sysfs path (can be overridden for testing)
:return: full path to /sys filesystem
:rtype: str
"""
return "/sys"
@property
def path(self) -> str:
"""/sys path for PCI device
:return: full path to PCI device in /sys filesystem
:rtype: str
"""
return os.path.join(self.sys, "bus/pci/devices", self.pci_addr)
def subpath(self, subpath: str) -> str:
"""/sys subpath helper for PCI device
:param subpath: subpath to construct path for
:type: str
:return: self.path + subpath
:rtype: str
"""
return os.path.join(self.path, subpath)
@property
def driver(self) -> str:
"""Kernel driver for PCI device
:return: kernel driver in use for device
:rtype: str
"""
driver = ''
if os.path.exists(self.subpath("driver")):
driver = os.path.basename(os.readlink(self.subpath("driver")))
return driver
@property
def bound(self) -> bool:
"""Determine if device is bound to a kernel driver
:return: whether device is bound to a kernel driver
:rtype: bool
"""
return os.path.exists(self.subpath("driver"))
@property
def is_pf(self) -> bool:
"""Determine if device is a SR-IOV Physical Function
:return: whether device is a PF
:rtype: bool
"""
return os.path.exists(self.subpath("sriov_numvfs"))
@property
def is_vf(self) -> bool:
"""Determine if device is a SR-IOV Virtual Function
:return: whether device is a VF
:rtype: bool
"""
return os.path.exists(self.subpath("physfn"))
@property
def vf_addrs(self) -> list:
"""List Virtual Function addresses associated with a Physical Function
:return: List of PCI addresses of Virtual Functions
:rtype: list[str]
"""
vf_addrs = []
i = 0
while True:
try:
vf_addrs.append(
os.path.basename(
os.readlink(self.subpath("virtfn{}".format(i)))
)
)
except FileNotFoundError:
break
i += 1
return vf_addrs
@property
def vfs(self) -> list:
"""List Virtual Function associated with a Physical Function
:return: List of PCI devices of Virtual Functions
:rtype: list[PCIDevice]
"""
return [PCIDevice(addr) for addr in self.vf_addrs]
def devlink_set(self, obj_name: str, prop: str, value: str):
"""Set devlink options for the PCI device
:param obj_name: devlink object to set options on
:type: str
:param prop: property to set
:type: str
:param value: value to set for property
:type: str
"""
subprocess.check_call(
[
"/sbin/devlink",
"dev",
obj_name,
"set",
"pci/{}".format(self.pci_addr),
prop,
value,
]
)
def devlink_eswitch_mode(self) -> str:
"""Query eswitch mode via devlink for the PCI device
:return: the eswitch mode or '__undetermined' if it can't be retrieved
:rtype: str
"""
pci = f"pci/{self.pci_addr}"
try:
output = subprocess.check_output(
[
"/sbin/devlink",
"-j",
"dev",
"eswitch",
"show",
pci,
],
stderr=subprocess.DEVNULL,
)
except subprocess.CalledProcessError:
return '__undetermined'
json_output = json.loads(output)
# The JSON document looks like this when the 'mode' is available:
# {"dev":{"pci/0000:03:00.0":{"mode":"switchdev"}}}
# and like this when it's not available
# {"dev":{}}
return json_output.get("dev", {}).get(pci, {}).get('mode', '__undetermined')
def __str__(self) -> str:
"""String represenation of object
:return: PCI address of string
:rtype: str
"""
return self.pci_addr
def bind_vfs(vfs: typing.Iterable[PCIDevice], driver):
"""Bind unbound VFs to driver."""
bound_vfs = []
for vf in vfs:
if not vf.bound:
with open("/sys/bus/pci/drivers/{}/bind".format(driver), "wt") as f:
f.write(vf.pci_addr)
bound_vfs.append(vf)
return bound_vfs
def unbind_vfs(vfs: typing.Iterable[PCIDevice], driver) -> typing.Iterable[PCIDevice]:
"""Unbind bound VFs from driver."""
unbound_vfs = []
for vf in vfs:
if vf.bound:
with open("/sys/bus/pci/drivers/{}/unbind".format(driver), "wt") as f:
f.write(vf.pci_addr)
unbound_vfs.append(vf)
return unbound_vfs
def _interface_matches(netdef: netplan.NetDefinition, interface: str) -> bool:
return netdef._match_interface(
iface_name=interface,
iface_driver=utils.get_interface_driver_name(interface),
iface_mac=utils.get_interface_macaddress(interface))
def _get_interface_name_for_netdef(netdef: netplan.NetDefinition) -> Optional[str]:
"""
Try to match a netdef with the real system network interface.
Throws ConfigurationError if there is more than one match.
"""
interfaces: List[str] = utils.get_interfaces()
if netdef._has_match:
# now here it's a bit tricky
set_name: str = netdef.set_name
if set_name and set_name in interfaces:
# if we had a match: stanza and set-name: this means we should
# assume that, if found, the interface has already been
# renamed - use the new name
return set_name
else:
matches: Set[str] = set()
# we walk through all the system interfaces to determine if there is
# more than one matched interface
for interface in interfaces:
if not _interface_matches(netdef, interface):
continue
# we have a matching PF
# error out if we matched more than one
if len(matches) > 1:
raise ConfigurationError('matched more than one interface for a PF device: %s' % netdef.id)
matches.add(interface)
if matches:
return list(matches)[0]
else:
# no match field, assume entry name is the interface name
if netdef.id in interfaces:
return netdef.id
return None
def _get_pci_slot_name(netdev):
"""
Read PCI slot name for given interface name
"""
uevent_path = os.path.join('/sys/class/net', netdev, 'device/uevent')
try:
with open(uevent_path) as f:
pci_slot_name = None
for line in f.readlines():
line = line.strip()
if line.startswith('PCI_SLOT_NAME='):
pci_slot_name = line.split('=', 2)[1]
return pci_slot_name
except IOError as e:
raise RuntimeError('failed parsing PCI slot name for %s: %s' % (netdev, str(e)))
def _get_physical_functions(np_state: netplan.State) -> Dict[str, str]:
"""
Go through the list of netplan ethernet devices and identify which are
PFs matching them with actual network interfaces.
"""
pfs = {}
for netdef in np_state.ethernets.values():
# If the sriov_link is present, the interface is a VF and link is the PF
if link := netdef.links.get('sriov'):
if iface := _get_interface_name_for_netdef(np_state[link.id]):
pfs[link.id] = iface
else:
# If a netdef also defines the embedded_switch_mode key we consider it's a PF
# This enables us to change the eswitch mode even when the PF has no VFs.
if netdef._embedded_switch_mode:
if iface := _get_interface_name_for_netdef(netdef):
pfs[netdef.id] = iface
# If the netdef has any (positive) number of VFs that's because it's a PF
try:
count = netdef._vf_count
except netplan.NetplanException as e:
raise ConfigurationError(str(e))
if count > 0:
if iface := _get_interface_name_for_netdef(netdef):
pfs[netdef.id] = iface
return pfs
def _get_vf_number_per_pf(np_state: netplan.State) -> Dict[str, int]:
"""
Go through the list of netplan ethernet devices and identify which ones
have VFs. netdef._vf_count ultimately calls _netplan_state_get_vf_count_for_def
from libnetplan which return MAX(sriov_explicit_vf_count, number of VF netdefs).
"""
vf_counts = {}
for netdef in np_state.ethernets.values():
try:
count = netdef._vf_count
except netplan.NetplanException as e:
raise ConfigurationError(str(e))
if count > 0:
if iface := _get_interface_name_for_netdef(netdef):
vf_counts[iface] = count
return vf_counts
def _get_virtual_functions(np_state: netplan.State) -> Set[str]:
"""
Go through the list of netplan ethernet devices and identify which ones
are virtual functions
"""
vfs = set()
for netdef in np_state.ethernets.values():
# If the sriov_link is present and the PF is also present in the system we save the VF
if link := netdef.links.get('sriov'):
if _get_interface_name_for_netdef(np_state[link.id]):
vfs.add(netdef.id)
return vfs
def set_numvfs_for_pf(pf, vf_count):
"""
Allocate the required number of VFs for the selected PF.
"""
if vf_count > 256:
raise ConfigurationError(
'cannot allocate more VFs for PF %s than the SR-IOV maximum: %s > 256' % (pf, vf_count))
devdir = os.path.join('/sys/class/net', pf, 'device')
numvfs_path = os.path.join(devdir, 'sriov_numvfs')
totalvfs_path = os.path.join(devdir, 'sriov_totalvfs')
try:
with open(totalvfs_path) as f:
vf_max = int(f.read().strip())
except IOError as e:
raise RuntimeError('failed parsing sriov_totalvfs for %s: %s' % (pf, str(e)))
except ValueError:
raise RuntimeError('invalid sriov_totalvfs value for %s' % pf)
if vf_count > vf_max:
raise ConfigurationError(
'cannot allocate more VFs for PF %s than supported: %s > %s (sriov_totalvfs)' % (pf, vf_count, vf_max))
try:
with open(numvfs_path, 'w') as f:
f.write(str(vf_count))
except IOError as e:
bail = True
if e.errno == 16: # device or resource busy
logging.warning('device or resource busy while setting sriov_numvfs for %s, trying workaround' % pf)
try:
# doing this in two open/close sequences so that
# it's as close to writing via shell as possible
with open(numvfs_path, 'w') as f:
f.write('0')
with open(numvfs_path, 'w') as f:
f.write(str(vf_count))
except IOError as e_inner:
e = e_inner
else:
bail = False
if bail:
raise RuntimeError('failed setting sriov_numvfs to %s for %s: %s' % (vf_count, pf, str(e)))
return True
def perform_hardware_specific_quirks(pf):
"""
Perform any hardware-specific quirks for the given SR-IOV device to make
sure all the VF-count changes are applied.
"""
devdir = os.path.join('/sys/class/net', pf, 'device')
try:
with open(os.path.join(devdir, 'vendor')) as f:
device_id = f.read().strip()[2:]
with open(os.path.join(devdir, 'device')) as f:
vendor_id = f.read().strip()[2:]
except IOError as e:
raise RuntimeError('could not determine vendor and device ID of %s: %s' % (pf, str(e)))
combined_id = ':'.join([vendor_id, device_id])
quirk_devices = () # TODO: add entries to the list
if combined_id in quirk_devices: # pragma: nocover (empty quirk_devices)
# some devices need special handling, so this is the place
# Currently this part is empty, but has been added as a preemptive
# measure, as apparently a lot of SR-IOV cards have issues with
# dynamically allocating VFs. Some cards seem to require a full
# kernel module reload cycle after changing the sriov_numvfs value
# for the changes to come into effect.
# Any identified card/vendor can then be special-cased here, if
# needed.
pass
def apply_vlan_filter_for_vf(pf, vf, vlan_name, vlan_id, prefix='/'):
"""
Apply the hardware VLAN filtering for the selected VF.
"""
# this is more complicated, because to do this, we actually need to have
# the vf index - just knowing the vf interface name is not enough
vf_index = None
# the prefix argument is here only for unit testing purposes
vf_devdir = os.path.join(prefix, 'sys/class/net', vf, 'device')
vf_dev_id = os.path.basename(os.readlink(vf_devdir))
pf_devdir = os.path.join(prefix, 'sys/class/net', pf, 'device')
for f in os.listdir(pf_devdir):
if 'virtfn' in f:
dev_path = os.path.join(pf_devdir, f)
dev_id = os.path.basename(os.readlink(dev_path))
if dev_id == vf_dev_id:
vf_index = f[6:]
break
if not vf_index:
raise RuntimeError(
'could not determine the VF index for %s while configuring vlan %s' % (vf, vlan_name))
# now, create the VLAN filter
# TODO: would be best if we did this directl via python, without calling
# the iproute tooling
try:
subprocess.check_call(['ip', 'link', 'set',
'dev', pf,
'vf', vf_index,
'vlan', str(vlan_id)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
raise RuntimeError(
'failed setting SR-IOV VLAN filter for vlan %s (ip link set command failed)' % vlan_name)
def apply_sriov_config(config_manager, rootdir='/'):
"""
Go through all interfaces, identify which ones are SR-IOV VFs, create
them and perform all other necessary setup.
"""
config_manager.parse()
interfaces = utils.get_interfaces()
np_state = config_manager.np_state
# for sr-iov devices, we identify VFs by them having a link: field
# pointing to an PF. So let's browse through all ethernet devices,
# find all that are VFs and count how many of those are linked to
# particular PFs, as we need to then set the numvfs for each.
vf_counts = _get_vf_number_per_pf(np_state)
# we also store all matches between VF/PF netplan entry names and
# interface that they're currently matching to
vfs_set = _get_virtual_functions(np_state)
pfs = _get_physical_functions(np_state)
# setup the required number of VFs per PF
# at the same time store which PFs got changed in case the NICs
# require some special quirks for the VF number to change
vf_count_changed = []
if vf_counts:
for pf, vf_count in vf_counts.items():
if not set_numvfs_for_pf(pf, vf_count):
continue
vf_count_changed.append(pf)
if vf_count_changed:
# some cards need special treatment when we want to change the
# number of enabled VFs
for pf in vf_count_changed:
perform_hardware_specific_quirks(pf)
# also, since the VF number changed, the interfaces list also
# changed, so we need to refresh it
interfaces = utils.get_interfaces()
# now in theory we should have all the new VFs set up and existing;
# this is needed because we will have to now match the defined VF
# entries to existing interfaces, otherwise we won't be able to set
# filtered VLANs for those.
# XXX: does matching those even make sense?
vfs = {}
for vf in vfs_set:
netdef = np_state[vf]
if netdef._has_match:
# right now we only match by name, as I don't think matching per
# driver and/or macaddress makes sense
# TODO: print warning if other matches are provided
for interface in interfaces:
if netdef._match_interface(iface_name=interface):
if vf in vfs and vfs[vf]:
raise ConfigurationError('matched more than one interface for a VF device: %s' % vf)
vfs[vf] = interface
else:
if vf in interfaces:
vfs[vf] = vf
# Walk the SR-IOV PFs and check if we need to change the eswitch mode
for netdef_id, iface in pfs.items():
netdef = np_state[netdef_id]
eswitch_mode = netdef._embedded_switch_mode
if eswitch_mode in ['switchdev', 'legacy']:
pci_addr = _get_pci_slot_name(iface)
pcidev = PCIDevice(pci_addr)
current_eswitch_mode_system = pcidev.devlink_eswitch_mode()
if eswitch_mode != current_eswitch_mode_system:
if pcidev.is_pf:
logging.debug("Found VFs of {}: {}".format(pcidev, pcidev.vf_addrs))
if pcidev.vfs:
try:
unbind_vfs(pcidev.vfs, pcidev.driver)
except Exception as e:
logging.warning(f'Unbinding of VFs for {netdef_id} failed: {str(e)}')
logging.debug(f'Changing eswitch mode from {current_eswitch_mode_system} to {eswitch_mode} for: {netdef_id}')
pcidev.devlink_set('eswitch', 'mode', eswitch_mode)
if pcidev.vfs:
if not netdef._delay_virtual_functions_rebind:
bind_vfs(pcidev.vfs, pcidev.driver)
filtered_vlans_set = set()
for vlan, netdef in np_state.vlans.items():
# there is a special sriov vlan renderer that one can use to mark
# a selected vlan to be done in hardware (VLAN filtering)
if netdef._has_sriov_vlan_filter:
# this only works for SR-IOV VF interfaces
link = netdef.links.get('vlan')
vlan_id = netdef._vlan_id
vf = vfs.get(link.id)
if not vf:
# it is possible this is not an error, for instance when
# the configuration has been defined 'for the future'
# XXX: but maybe we should error out here as well?
logging.warning(
'SR-IOV vlan defined for %s but link %s is either not a VF or has no matches' % (vlan, link.id))
continue
# get the parent pf interface
# first we fetch the related vf netplan entry
# and finally, get the matched pf interface
pf = pfs.get(link.links.get('sriov').id)
if vf in filtered_vlans_set:
raise ConfigurationError(
'interface %s for netplan device %s (%s) already has an SR-IOV vlan defined' % (vf, link.id, vlan))
# TODO: make sure that we don't apply the filter twice
apply_vlan_filter_for_vf(pf, vf, vlan, vlan_id)
filtered_vlans_set.add(vf)
|