From 30ccd51acdff1b06452bb690c7a283abd1fb2e99 Mon Sep 17 00:00:00 2001
From: Chad Smith <chad.smith@canonical.com>
Date: Tue, 29 Mar 2022 14:25:33 -0600
Subject: [PATCH] ds-identify: also discover LXD by presence from DMI
 board_name = LXD (#1311)

VMs will not start lxd-agent.service in systemd generator timeframe
which means /dev/lxd/sock will not exist yet on LXD VM.

For VM support, ds-identify will return DS_FOUND when
/sys/class/dmi/id/board_name == "LXD" which exists at
early boot regardless of LXD socket status.

Later, in cloud-init init-local boot stage, cloud-init will only
discover the LXDDatasource if the /dev/lxd/sock is active.
This allows consumers to disable the LXD datasource
behavior by running:
   lxc config set MACHINE_NAME security.devlxd=false
---
 doc/rtd/topics/datasources/lxd.rst            | 24 ++++++++---
 .../datasources/test_lxd_discovery.py         | 36 ++++++++++++++--
 tests/unittests/test_ds_identify.py           | 41 +++++++++++++------
 tools/ds-identify                             | 25 ++++++++++-
 4 files changed, 102 insertions(+), 24 deletions(-)

--- a/doc/rtd/topics/datasources/lxd.rst
+++ b/doc/rtd/topics/datasources/lxd.rst
@@ -20,18 +20,22 @@ The LXD socket device ``/dev/lxd/sock``
 when the instance configuration has ``security.devlxd=true`` (default).
 Disabling ``security.devlxd`` configuration setting at initial launch will
 ensure that cloud-init uses the :ref:`datasource_nocloud` datasource.
-Disabling ``security.devlxd`` ove the life of the container will result in
+Disabling ``security.devlxd`` over the life of the container will result in
 warnings from cloud-init and cloud-init will keep the originally detected LXD
 datasource.
 
-The LXD datasource provides cloud-init the opportunity to react to meta-data,
+The LXD datasource is detected as viable by ``ds-identify`` during systemd
+generator time when either ``/dev/lxd/sock`` exists or
+``/sys/class/dmi/id/board_name`` matches "LXD".
+
+The LXD datasource provides cloud-init the ability to react to meta-data,
 vendor-data, user-data and network-config changes and render the updated
 configuration across a system reboot.
 
-One can manipulate what meta-data, vendor-data or user-data is provided to
-the launched container using the LXD profiles or
-``lxc launch ... -c <key>="<value>"`` at initial container launch using one of
-the following keys:
+To modify what meta-data, vendor-data or user-data are provided to the
+launched container, use either LXD profiles or
+``lxc launch ... -c <key>="<value>"`` at initial container launch setting one
+of the following keys:
 
  - user.meta-data: YAML metadata which will be appended to base meta-data
  - user.vendor-data: YAML which overrides any meta-data values
@@ -44,6 +48,14 @@ the following keys:
    used by both `#template: jinja` #cloud-config templates and
    the `cloud-init query` command.
 
+Note: LXD version 4.22 introduced a new scope of config keys prefaced by
+``cloud-init.`` which are preferred above the related ``user.*`` keys:
+
+ - cloud-init.meta-data
+ - cloud-init.vendor-data
+ - cloud-init.network-config
+ - cloud-init.user-data
+
 
 By default, network configuration from this datasource will be:
 
--- a/tests/integration_tests/datasources/test_lxd_discovery.py
+++ b/tests/integration_tests/datasources/test_lxd_discovery.py
@@ -9,9 +9,34 @@ from tests.integration_tests.util import
 
 
 def _customize_envionment(client: IntegrationInstance):
+    # Assert our platform can detect LXD during sytemd generator timeframe.
+    ds_id_log = client.execute("cat /run/cloud-init/ds-identify.log").stdout
+    assert "check for 'LXD' returned found" in ds_id_log
+
+    # At some point Jammy will fail this test. We want to be informed
+    # when Jammy images no longer ship NoCloud template files (LP: #1958460).
+    assert "check for 'NoCloud' returned found" in ds_id_log
+    if client.settings.PLATFORM == "lxd_vm":
+        # ds-identify runs at systemd generator time before /dev/lxd/sock.
+        # Assert we can expected artifact which indicates LXD is viable.
+        result = client.execute("cat /sys/class/dmi/id/board_name")
+        if not result.ok:
+            raise AssertionError(
+                "Missing expected /sys/class/dmi/id/board_name"
+            )
+        if "LXD" != result.stdout:
+            raise AssertionError(f"DMI board_name is not LXD: {result.stdout}")
+
+    # Having multiple datasources prevents ds-identify from short-circuiting
+    # detection logic with a log like:
+    #     single entry in datasource_list (LXD) use that.
+    # Also, NoCloud is detected during init-local timeframe.
+
+    # If there is a race on VMs where /dev/lxd/sock is not setup in init-local
+    # cloud-init will fallback to NoCloud and fail this test.
     client.write_to_file(
-        "/etc/cloud/cloud.cfg.d/99-detect-lxd.cfg",
-        "datasource_list: [LXD]\n",
+        "/etc/cloud/cloud.cfg.d/99-detect-lxd-first.cfg",
+        "datasource_list: [LXD, NoCloud]\n",
     )
     client.execute("cloud-init clean --logs")
     client.restart()
@@ -24,9 +49,9 @@ def _customize_envionment(client: Integr
 @pytest.mark.ubuntu  # Because netplan
 def test_lxd_datasource_discovery(client: IntegrationInstance):
     """Test that DataSourceLXD is detected instead of NoCloud."""
+
     _customize_envionment(client)
-    nic_dev = "enp5s0" if client.settings.PLATFORM == "lxd_vm" else "eth0"
-    result = client.execute("cloud-init status --long")
+    result = client.execute("cloud-init status --wait --long")
     if not result.ok:
         raise AssertionError("cloud-init failed:\n%s", result.stderr)
     if "DataSourceLXD" not in result.stdout:
@@ -35,6 +60,9 @@ def test_lxd_datasource_discovery(client
         )
     netplan_yaml = client.execute("cat /etc/netplan/50-cloud-init.yaml")
     netplan_cfg = yaml.safe_load(netplan_yaml)
+
+    platform = client.settings.PLATFORM
+    nic_dev = "eth0" if platform == "lxd_container" else "enp5s0"
     assert {
         "network": {"ethernets": {nic_dev: {"dhcp4": True}}, "version": 2}
     } == netplan_cfg
--- a/tests/unittests/test_ds_identify.py
+++ b/tests/unittests/test_ds_identify.py
@@ -77,6 +77,7 @@ RC_FOUND = 0
 RC_NOT_FOUND = 1
 DS_NONE = "None"
 
+P_BOARD_NAME = "sys/class/dmi/id/board_name"
 P_CHASSIS_ASSET_TAG = "sys/class/dmi/id/chassis_asset_tag"
 P_PRODUCT_NAME = "sys/class/dmi/id/product_name"
 P_PRODUCT_SERIAL = "sys/class/dmi/id/product_serial"
@@ -101,8 +102,6 @@ MOCK_VIRT_IS_XEN = {"name": "detect_virt
 MOCK_UNAME_IS_PPC64 = {"name": "uname", "out": UNAME_PPC64EL, "ret": 0}
 MOCK_UNAME_IS_FREEBSD = {"name": "uname", "out": UNAME_FREEBSD, "ret": 0}
 
-DEFAULT_MOCKS = [MOCK_NOT_LXD_DATASOURCE]
-
 shell_true = 0
 shell_false = 1
 
@@ -119,6 +118,7 @@ class DsIdentifyBase(CiTestCase):
         self,
         rootd=None,
         mocks=None,
+        no_mocks=None,
         func="main",
         args=None,
         files=None,
@@ -165,7 +165,8 @@ class DsIdentifyBase(CiTestCase):
             return SHELL_MOCK_TMPL % ddata
 
         mocklines = []
-        defaults = [
+        default_mocks = [
+            MOCK_NOT_LXD_DATASOURCE,
             {"name": "detect_virt", "RET": "none", "ret": 1},
             {"name": "uname", "out": UNAME_MYSYS},
             {"name": "blkid", "out": BLKID_EFI_ROOT},
@@ -189,7 +190,9 @@ class DsIdentifyBase(CiTestCase):
         written = [d["name"] for d in mocks]
         for data in mocks:
             mocklines.append(write_mock(data))
-        for d in defaults:
+        for d in default_mocks:
+            if no_mocks and d["name"] in no_mocks:
+                continue
             if d["name"] not in written:
                 mocklines.append(write_mock(d))
 
@@ -221,6 +224,7 @@ class DsIdentifyBase(CiTestCase):
         # return output of self.call with a dict input like VALID_CFG[item]
         xwargs = {"rootd": rootd}
         passthrough = (
+            "no_mocks",  # named mocks to ignore
             "mocks",
             "func",
             "args",
@@ -233,14 +237,6 @@ class DsIdentifyBase(CiTestCase):
                 xwargs[k] = data[k]
             if k in kwargs:
                 xwargs[k] = kwargs[k]
-        if "mocks" not in xwargs:
-            xwargs["mocks"] = DEFAULT_MOCKS
-        else:
-            mocked_funcs = [m["name"] for m in xwargs["mocks"]]
-            for default_mock in DEFAULT_MOCKS:
-                if default_mock["name"] not in mocked_funcs:
-                    xwargs["mocks"].append(default_mock)
-
         return self.call(**xwargs)
 
     def _test_ds_found(self, name):
@@ -338,6 +334,14 @@ class TestDsIdentify(DsIdentifyBase):
         """Older gce compute instances must be identified by serial."""
         self._test_ds_found("GCE-serial")
 
+    def test_lxd_kvm(self):
+        """LXD KVM has race on absent /dev/lxd/socket. Use DMI board_name."""
+        self._test_ds_found("LXD-kvm")
+
+    def test_lxd_containers(self):
+        """LXD containers will have /dev/lxd/socket at generator time."""
+        self._test_ds_found("LXD")
+
     def test_config_drive(self):
         """ConfigDrive datasource has a disk with LABEL=config-2."""
         self._test_ds_found("ConfigDrive")
@@ -1020,6 +1024,19 @@ VALID_CFG = {
         "files": {P_PRODUCT_SERIAL: "GoogleCloud-8f2e88f\n"},
         "mocks": [MOCK_VIRT_IS_KVM],
     },
+    "LXD-kvm": {
+        "ds": "LXD",
+        "files": {P_BOARD_NAME: "LXD\n"},
+        # /dev/lxd/sock does not exist and KVM virt-type
+        "mocks": [{"name": "is_socket_file", "ret": 1}, MOCK_VIRT_IS_KVM],
+        "no_mocks": ["dscheck_LXD"],  # Don't default mock dscheck_LXD
+    },
+    "LXD": {
+        "ds": "LXD",
+        # /dev/lxd/sock exists
+        "mocks": [{"name": "is_socket_file", "ret": 0}],
+        "no_mocks": ["dscheck_LXD"],  # Don't default mock dscheck_LXD
+    },
     "NoCloud": {
         "ds": "NoCloud",
         "mocks": [
--- a/tools/ds-identify
+++ b/tools/ds-identify
@@ -96,6 +96,7 @@ DI_BLKID_EXPORT_OUT=""
 DI_GEOM_LABEL_STATUS_OUT=""
 DI_DEFAULT_POLICY="search,found=all,maybe=all,notfound=${DI_DISABLED}"
 DI_DEFAULT_POLICY_NO_DMI="search,found=all,maybe=all,notfound=${DI_ENABLED}"
+DI_DMI_BOARD_NAME=""
 DI_DMI_CHASSIS_ASSET_TAG=""
 DI_DMI_PRODUCT_NAME=""
 DI_DMI_SYS_VENDOR=""
@@ -460,6 +461,10 @@ is_container() {
     esac
 }
 
+is_socket_file() {
+    [ -S $1 ] && return 0 || return 1
+}
+
 read_kernel_cmdline() {
     cached "${DI_KERNEL_CMDLINE}" && return
     local cmdline="" fpath="${PATH_PROC_CMDLINE}"
@@ -477,6 +482,12 @@ read_kernel_cmdline() {
     DI_KERNEL_CMDLINE="$cmdline"
 }
 
+read_dmi_board_name() {
+    cached "${DI_DMI_BOARD_NAME}" && return
+    get_dmi_field board_name
+    DI_DMI_BOARD_NAME="$_RET"
+}
+
 read_dmi_chassis_asset_tag() {
     cached "${DI_DMI_CHASSIS_ASSET_TAG}" && return
     get_dmi_field chassis_asset_tag
@@ -806,7 +817,16 @@ dscheck_MAAS() {
 # LXD datasource requires active /dev/lxd/sock
 # https://linuxcontainers.org/lxd/docs/master/dev-lxd
 dscheck_LXD() {
-    [ -S /dev/lxd/sock ] && return ${DS_FOUND} || return ${DS_NOT_FOUND}
+    if is_socket_file /dev/lxd/sock; then
+        return ${DS_FOUND}
+    fi
+    # On LXD KVM instances, /dev/lxd/sock is not yet setup by
+    # lxd-agent-loader's systemd lxd-agent.service.
+    # Rely on DMI product information that is present on all LXD images.
+    if [ "${DI_VIRT}" = "kvm" ]; then
+        [ "${DI_DMI_BOARD_NAME}" = "LXD" ] && return ${DS_FOUND}
+    fi
+    return ${DS_NOT_FOUND}
 }
 
 dscheck_NoCloud() {
@@ -1466,6 +1486,7 @@ collect_info() {
     read_config
     read_datasource_list
     read_dmi_sys_vendor
+    read_dmi_board_name
     read_dmi_chassis_asset_tag
     read_dmi_product_name
     read_dmi_product_serial
@@ -1482,7 +1503,7 @@ _print_info() {
     local n="" v="" vars=""
     vars="DMI_PRODUCT_NAME DMI_SYS_VENDOR DMI_PRODUCT_SERIAL"
     vars="$vars DMI_PRODUCT_UUID PID_1_PRODUCT_NAME DMI_CHASSIS_ASSET_TAG"
-    vars="$vars FS_LABELS ISO9660_DEVS KERNEL_CMDLINE VIRT"
+    vars="$vars DMI_BOARD_NAME FS_LABELS ISO9660_DEVS KERNEL_CMDLINE VIRT"
     vars="$vars UNAME_KERNEL_NAME UNAME_KERNEL_RELEASE UNAME_KERNEL_VERSION"
     vars="$vars UNAME_MACHINE UNAME_NODENAME UNAME_OPERATING_SYSTEM"
     vars="$vars DSNAME DSLIST"
