testcase.py 30.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

29 30
# pyright: strict

31
from __future__ import annotations
32
import contextlib
33
import fnmatch
34 35
import glob
import logging
36
import os
37
import shutil
38
import sqlite3
39
import unittest
40
import warnings
41

42
from urllib.parse import urlparse
43

44 45
from netaddr import valid_ipv6

46
from .utils import getPortFromPath
47
from .utils import ManagedResource
48 49 50 51

from ..slap.standalone import StandaloneSlapOS
from ..slap.standalone import SlapOSNodeCommandError
from ..slap.standalone import PathTooDeepError
52

53
from ..util import mkdir_p
54
from ..slap import ComputerPartition
55
from .check_software import checkSoftware
56

57 58
from ..proxy.db_version import DB_VERSION

59 60 61 62 63
from typing import (
  Callable,
  ClassVar,
  Dict,
  Iterable,
64
  Iterator,
65 66 67 68 69 70
  Mapping,
  Sequence,
  Tuple,
  Type,
  TypeVar,
)
71

72
ManagedResourceType = TypeVar("ManagedResourceType", bound=ManagedResource)
73

74 75
IPV4_ADDRESS_DEFAULT: str = os.environ["SLAPOS_TEST_IPV4"]
IPV6_ADDRESS_DEFAULT: str = os.environ["SLAPOS_TEST_IPV6"]
76
DEBUG_DEFAULT: bool = bool(
77
  int(os.environ.get("SLAPOS_TEST_DEBUG", 0)),
78 79
)
VERBOSE_DEFAULT: bool = bool(
80
  int(os.environ.get("SLAPOS_TEST_VERBOSE", 0)),
81 82
)
SKIP_SOFTWARE_CHECK_DEFAULT: bool = bool(
83
  int(os.environ.get("SLAPOS_TEST_SKIP_SOFTWARE_CHECK", 0))
84 85
)
SKIP_SOFTWARE_REBUILD_DEFAULT: bool = bool(
86
  int(os.environ.get("SLAPOS_TEST_SKIP_SOFTWARE_REBUILD", 0))
87 88 89 90
)
SHARED_PART_LIST_DEFAULT: Sequence[str] = [
  os.path.expanduser(p)
  for p in os.environ.get(
91 92 93 94
    "SLAPOS_TEST_SHARED_PART_LIST",
    "",
  ).split(os.pathsep)
  if p
95 96
]
SNAPSHOT_DIRECTORY_DEFAULT: str | None = os.environ.get(
97
  "SLAPOS_TEST_LOG_DIRECTORY",
98
)
99

100

101
def makeModuleSetUpAndTestCaseClass(
102
  software_url: str | os.PathLike[str],
103 104 105 106 107 108 109 110 111 112
  *,
  base_directory: str | None = None,
  ipv4_address: str = IPV4_ADDRESS_DEFAULT,
  ipv6_address: str = IPV6_ADDRESS_DEFAULT,
  debug: bool = DEBUG_DEFAULT,
  verbose: bool = VERBOSE_DEFAULT,
  skip_software_check: bool = SKIP_SOFTWARE_CHECK_DEFAULT,
  skip_software_rebuild: bool = SKIP_SOFTWARE_REBUILD_DEFAULT,
  shared_part_list: Iterable[str] = SHARED_PART_LIST_DEFAULT,
  snapshot_directory: str | None = SNAPSHOT_DIRECTORY_DEFAULT,
113
  software_id: str | None = None,
114 115 116 117 118
) -> Tuple[Callable[[], None], Type[SlapOSInstanceTestCase]]:
  """
  Create a setup module function and a testcase for testing `software_url`.

  Note:
119
    SlapOS itself and some services running in SlapOS uses unix sockets and
120 121
    (sometimes very) deep paths, which do not play very well together.
    To workaround this, users can set ``SLAPOS_TEST_WORKING_DIR`` environment
122 123 124 125
    variable to the path of a short enough directory and local slapos will
    use this directory.
    The partitions references will be named after the unittest class name,
    which can also lead to long paths. For this, unit test classes can define
126
    a ``__partition_reference__`` attribute which will be used as partition
127
    reference. If the class names are long, the trick is then to use a shorter
128
    ``__partition_reference__``.
129
    See https://lab.nexedi.com/kirr/slapns for a solution to this problem.
130 131

  Args:
132
    software_url: The URL or path of the software to test.
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
    base_directory: The base directory used for SlapOS.
      By default, it will use the value in the environment variable
      ``SLAPOS_TEST_WORKING_DIR``.
      If that is not defined, it will default to ``.slapos`` in the current
      directory.
    ipv4_address: IPv4 address used for the instance. By default it will use
      the one defined in the environment variable ``SLAPOS_TEST_IPV4``.
    ipv6_address: IPv6 address used for the instance. By default it will use
      the one defined in the environment variable ``SLAPOS_TEST_IPV6``.
    debug: Enable debugging mode, which will drop in a debugger session when
      errors occur.
      By default it will be controlled by the value of the environment variable
      ``SLAPOS_TEST_DEBUG`` if it is defined. Otherwise it will be disabled.
    verbose: ``True`` to enable verbose logging, so that the test framework
      logs information describing the actions taken (sets logging level to
      ``DEBUG``).
      By default it will be controlled by the value of the environment variable
150
      ``SLAPOS_TEST_VERBOSE`` if it is defined. Otherwise it will be disabled.
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    skip_software_check: Skips costly software checks.
      By default it will be controlled by the value of the environment variable
      ``SLAPOS_TEST_SKIP_SOFTWARE_CHECK`` if it is defined. Otherwise it will
      be disabled.
    skip_software_rebuild: Skips costly software builds.
      By default it will be controlled by the value of the environment variable
      ``SLAPOS_TEST_SKIP_SOFTWARE_REBUILD`` if it is defined. Otherwise it will
      be disabled.
    shared_part_list: Additional paths to search for existing shared parts.
      This test class will use its own directory for shared parts and also
      the paths defined in this argument.
      By default it is controlled by the ``SLAPOS_TEST_SHARED_PART_LIST``
      environment variable if defined, which should contain the paths in a
      string separated by colons (':').
    snapshot_directory: Directory to save snapshot files (for further
      inspection) and logs.
      If it is ``None`` or the empty string, logs will be stored in
      ``base_directory``, and no snapshots will be stored.
      By default it will use the value of the environment variable
      ``SLAPOS_TEST_LOG_DIRECTORY`` if it is defined, and ``None`` otherwise.
    software_id: A short name for the software, to be used in logs and to
      name the snapshots.
      By default it is computed automatically from the software URL, but can
      also be passed explicitly, to use a different name for different kind of
      tests, like for example upgrade tests.

  Returns:
    A tuple of two arguments:
      - A function to install the software, to be used as `unittest`'s
        `setUpModule`.
      - A base class for test cases.
182

183
  """
184 185
  software_url = os.fspath(software_url)

186 187
  if base_directory is None:
    base_directory = os.path.realpath(
188
      os.environ.get(
189
        "SLAPOS_TEST_WORKING_DIR",
190 191
        os.path.join(
          os.getcwd(),
192 193
          ".slapos",
        ),
194 195
      )
    )
196

197
  if not software_id:
198
    software_id = urlparse(software_url).path.split("/")[-2]
199 200

  logging.basicConfig(
201
    level=logging.DEBUG,
202
    format=f"%(asctime)s - {software_id} - %(name)s - %(levelname)s - %(message)s",
203 204
    filename=os.path.join(
      snapshot_directory or base_directory,
205
      "testcase.log",
206
    ),
207 208 209 210
  )
  logger = logging.getLogger()
  console_handler = logging.StreamHandler()
  console_handler.setLevel(
211 212
    logging.DEBUG if verbose else logging.WARNING,
  )
213 214
  logger.addHandler(console_handler)

215 216 217
  if debug:
    unittest.installHandler()

218 219 220
  # TODO: fail if already running ?
  try:
    slap = StandaloneSlapOS(
221 222 223 224 225
      base_directory=base_directory,
      server_ip=ipv4_address,
      server_port=getPortFromPath(base_directory),
      shared_part_list=shared_part_list,
    )
226 227
  except PathTooDeepError:
    raise RuntimeError(
228 229
      f"base directory ( {base_directory} ) is too deep, try setting "
      f"SLAPOS_TEST_WORKING_DIR to a shallow enough directory",
230
    )
231

232
  cls = type(
233
    f"SlapOSInstanceTestCase for {software_url}",
234 235
    (SlapOSInstanceTestCase,),
    {
236 237 238 239 240 241 242 243 244 245
      "slap": slap,
      "getSoftwareURL": classmethod(lambda _cls: software_url),
      "software_id": software_id,
      "_debug": debug,
      "_skip_software_check": skip_software_check,
      "_skip_software_rebuild": skip_software_rebuild,
      "_ipv4_address": ipv4_address,
      "_ipv6_address": ipv6_address,
      "_base_directory": base_directory,
      "_test_file_snapshot_directory": snapshot_directory,
246 247
    },
  )
248

249
  class SlapOSInstanceTestCase_(
250
    cls,
251 252
    SlapOSInstanceTestCase,
  ):
253 254 255
    # useless intermediate class so that editors provide completion anyway.
    pass

256
  def setUpModule() -> None:
257 258 259 260 261
    installSoftwareUrlList(cls, [software_url], debug=debug)

  return setUpModule, SlapOSInstanceTestCase_


262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
def installSoftwareUrlList(
  cls: Type[SlapOSInstanceTestCase],
  software_url_list: Sequence[str],
  max_retry: int = 10,
  debug: bool = False,
) -> None:
  """
  Install softwares on the current testing slapos, for use in `setUpModule`.

  This also check softwares with `checkSoftware`.

  Args:
    cls: The test case class used for the installation.
    software_url_list: List of URLs or paths to install.
    max_retry: Number of times that the installation will be retried if there
      is an error.
    debug: If set to ``True`` the software will not be automatically removed
      if there is an error during the installation process, in order to
      facilitate inspection during debug.
281 282

  """
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311

  def _storeSoftwareSnapshot(name: str) -> None:
    for path in (
      glob.glob(
        os.path.join(
          cls._base_directory,  # pyright: ignore[reportPrivateUsage]
          "var/log/*",
        )
      )
      + glob.glob(
        os.path.join(
          cls.slap.software_directory,
          "*/*.cfg",
        )
      )
      + glob.glob(
        os.path.join(
          cls.slap.software_directory,
          "*/.installed.cfg",
        )
      )
      + glob.glob(
        os.path.join(
          cls.slap.shared_directory,
          "*/*/.slapos.recipe.cmmi.signature",
        )
      )
    ):
      cls._copySnapshot(path, name)  # pyright: ignore[reportPrivateUsage]
312

313
  try:
314
    cls.logger.debug("Starting SlapOS")
315
    cls.slap.start()
316 317 318 319
    for software_url in software_url_list:
      cls.logger.debug("Supplying %s", software_url)
      cls.slap.supply(software_url)
    cls.logger.debug("Waiting for slapos node software to build")
320 321 322 323 324 325 326
    cls.slap.waitForSoftware(
      max_retry=max_retry,
      debug=debug,
      install_all=not cls._skip_software_rebuild,  # pyright: ignore[reportPrivateUsage]
    )
    _storeSoftwareSnapshot("setupModule")
    if not cls._skip_software_check:  # pyright: ignore[reportPrivateUsage]
327 328 329 330 331 332 333
      for software_url in software_url_list:
        cls.logger.debug("Checking software %s", software_url)
        checkSoftware(cls.slap, software_url)
        cls.logger.debug("Done checking software %s", software_url)
    else:
      cls.logger.debug("Software checks skipped")

334 335
  except BaseException:
    _storeSoftwareSnapshot("setupModule failed installing software")
336 337 338 339 340 341 342 343 344 345
    if not debug:
      cls.logger.exception("Error building software, removing")
      try:
        for software_url in software_url_list:
          cls.logger.debug("Removing %s", software_url)
          cls.slap.supply(software_url, state="destroyed")
        cls.logger.debug("Waiting for slapos node software to remove")
        cls.slap.waitForSoftware(max_retry=max_retry, debug=debug)
      except BaseException:
        cls.logger.exception("Error removing software")
346 347
        _storeSoftwareSnapshot("setupModule removing software")
    cls._cleanup("setupModule")  # pyright: ignore[reportPrivateUsage]
348
    raise
349 350 351


class SlapOSInstanceTestCase(unittest.TestCase):
352 353
  """
  Install one slapos instance.
354 355 356 357 358 359 360 361 362

  This test case install software(s) and request one instance
  during `setUpClass` and destroy that instance during `tearDownClass`.

  Software Release URL, Instance Software Type and Instance Parameters
  can be defined on the class.

  All tests from the test class will run with the same instance.

363 364 365
  Note:
    This class is not supposed to be imported directly, but needs to be setup
    by calling makeModuleSetUpAndTestCaseClass.
366 367

  Attributes:
368 369 370 371 372 373 374
    computer_partition: The computer partition instance.
    computer_partition_root_path: The path of the instance root directory.
    computer_partition_ipv6_address: The IPv6 of the instance.
    instance_max_retry: Maximum retries for ``slapos node instance``.
    report_max_retry: Maximum retries for ``slapos node report``.
    partition_count: Number of partitions needed for this instance.
    default_partition_reference: Reference of the default requested partition.
375 376
    request_instance: Whether an instance needs to be requested for this test
        case.
377 378 379 380
    software_id: A short name of that software URL. E.g. helloworld instead of
        https://lab.nexedi.com/nexedi/slapos/raw/software/helloworld/software.cfg .
    logger: A logger for messages of the testing framework.
    slap: Standalone SlapOS instance.
381 382
  """

383 384 385
  instance_max_retry: ClassVar[int] = 20
  report_max_retry: ClassVar[int] = 20
  partition_count: ClassVar[int] = 10
386
  default_partition_reference: ClassVar[str] = "testing partition 0"
387
  request_instance: ClassVar[bool] = True
388
  software_id: ClassVar[str] = ""
389

390
  logger: ClassVar[logging.Logger] = logging.getLogger(__name__)
391 392

  # Dynamic members
393 394 395 396 397 398 399
  slap: ClassVar[StandaloneSlapOS]
  computer_partition: ClassVar[ComputerPartition]
  computer_partition_root_path: ClassVar[str]
  computer_partition_ipv6_address: ClassVar[str]

  # Private settings

400 401 402
  # Partition reference: use when default length is too long.
  __partition_reference__: ClassVar[str]

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
  # True to enable debugging utilities.
  _debug: ClassVar[bool] = False

  # True to skip software checks.
  _skip_software_check: ClassVar[bool] = False

  # True to skip software rebuild.
  _skip_software_rebuild: ClassVar[bool] = False

  # The IPv4 address used by ``slapos node format``.
  _ipv4_address: ClassVar[str] = ""

  # The IPv6 address used by ``slapos node format``.
  _ipv6_address: ClassVar[str] = ""

  # Used resources.
  _resources: ClassVar[Dict[str, ManagedResource]] = {}

  # Instance parameters
  _instance_parameter_dict: ClassVar[Mapping[str, object]]

  # Base directory for standalone SlapOS.
425
  _base_directory: ClassVar[str] = ""
426 427

  # Directory to save snapshot files for inspections.
428
  _test_file_snapshot_directory: ClassVar[str | None] = ""
429 430 431

  # Patterns of files to save for inspection, relative to instance directory.
  _save_instance_file_pattern_list: ClassVar[Sequence[str]] = (
432 433 434 435 436 437 438 439 440
    "*/bin/*",
    "*/etc/*",
    "*/var/log/*",
    "*/srv/monitor/*",
    "*/srv/backup/logrotate/*",
    "*/.*log",
    "*/.*cfg",
    "*/*cfg",
    "etc/",
441 442
  )

443
  @classmethod
444 445 446 447 448 449 450
  def getManagedResource(
    cls,
    resource_name: str,
    resource_class: Type[ManagedResourceType],
  ) -> ManagedResourceType:
    """
    Get the managed resource for this name.
451

452
    If resource was not created yet, it is created and `open`. The
453 454
    resource will automatically be `close` at the end of the test
    class.
455 456 457 458 459 460 461 462 463 464 465

    Args:
      resource_name: The name of the resource.
      resource_class: The desired class of the resource. If the resource
        exists, but is not an instance of this class, an exception will be
        raised. Otherwise, if the resource does not exist, this class will be
        used to construct a new resource with that name.

    Returns:
      A resource with name ``resource_name`` and class ``resource_class``.

466 467 468 469 470 471 472 473 474 475 476
    """
    try:
      existing_resource = cls._resources[resource_name]
    except KeyError:
      resource = resource_class(cls, resource_name)
      cls._resources[resource_name] = resource
      resource.open()
      return resource
    else:
      if not isinstance(existing_resource, resource_class):
        raise ValueError(
477 478 479
          f"Resource {resource_name} is of unexpected "
          f"class {existing_resource}",
        )
480 481
      return existing_resource

482 483
  # Methods to be defined by subclasses.
  @classmethod
484 485 486
  def getSoftwareURL(cls) -> str:
    """
    Return URL of software release to request instance.
487 488 489

    This method will be defined when initialising the class
    with makeModuleSetUpAndTestCaseClass.
490 491 492 493

    Returns:
      URL of the software release to request.

494 495 496 497
    """
    raise NotImplementedError()

  @classmethod
498 499 500
  def getInstanceParameterDict(cls) -> Mapping[str, object]:
    """
    Return instance parameters.
501 502 503

    To be defined by subclasses if they need to request instance
    with specific parameters.
504 505 506 507

    Returns:
      A mapping with the parameters to be set in the instance.

508 509 510 511
    """
    return {}

  @classmethod
512 513 514
  def getInstanceSoftwareType(cls) -> str | None:
    """
    Return software type for instance, default None.
515 516 517

    To be defined by subclasses if they need to request instance with specific
    software type.
518 519 520 521

    Returns:
      Name of the software type, or `None` to use the default software type.

522
    """
523
    return None
524 525

  # Unittest methods
526
  @classmethod
527 528 529 530 531 532 533 534
  def waitForInstance(cls) -> None:
    """
    Wait for the instance to be ready.

    This method does retry several times until either the instance is ready or
    `cls.instance_max_retry` unsuccessful retries have been done.

    """
535 536 537 538 539 540 541 542 543 544 545 546
    # waitForInstance does not tolerate any error but with instances,
    # promises sometimes fail on first run, because services did not
    # have time to start.
    # To make debug usable, we tolerate instance_max_retry-1 errors and
    # only debug the last.
    if cls._debug and cls.instance_max_retry:
      try:
        cls.slap.waitForInstance(max_retry=cls.instance_max_retry - 1)
      except SlapOSNodeCommandError:
        cls.slap.waitForInstance(debug=True)
    else:
      cls.slap.waitForInstance(
547 548 549
        max_retry=cls.instance_max_retry,
        debug=cls._debug,
      )
550 551

  @classmethod
552 553
  def formatPartitions(cls) -> None:
    """Format the instance partitions."""
554
    cls.logger.debug(
555
      "Formatting to remove old partitions XXX should not be needed because we delete ..."
556 557 558 559
    )
    cls.slap.format(0, cls._ipv4_address, cls._ipv6_address)
    cls.logger.debug("Formatting with %s partitions", cls.partition_count)
    cls.slap.format(
560 561 562 563 564
      cls.partition_count,
      cls._ipv4_address,
      cls._ipv6_address,
      getattr(cls, "__partition_reference__", f"{cls.__name__}-"),
    )
565

566
  @classmethod
567
  def _setUpClass(cls) -> None:
568 569 570 571 572
    cls.slap.start()

    # (re)format partitions
    cls.formatPartitions()

573
    # request
574 575
    if cls.request_instance:
      cls.requestDefaultInstance()
576

577 578 579
      # slapos node instance
      cls.logger.debug("Waiting for instance")
      cls.waitForInstance()
580

581 582 583
      # expose some class attributes so that tests can use them:
      # the main ComputerPartition instance, to use getInstanceParameterDict
      cls.computer_partition = cls.requestDefaultInstance()
584

585 586
      # the path of the instance on the filesystem, for low level inspection
      cls.computer_partition_root_path = os.path.join(
587
        cls.slap._instance_root,  # pyright: ignore[reportPrivateUsage]
588 589
        cls.computer_partition.getId(),
      )
590

591 592 593 594
      # the ipv6 of the instance
      cls.computer_partition_ipv6_address = cls.getPartitionIPv6(
        cls.computer_partition.getId(),
      )
595

596
  @classmethod
597
  @contextlib.contextmanager
598
  def _snapshotManager(cls, snapshot_name: str) -> Iterator[None]:
599
    try:
600
      yield
601
    except BaseException:
602 603
      cls._storeSystemSnapshot(snapshot_name)
      cls._cleanup(snapshot_name)
604
      raise
605 606
    else:
      cls._storeSystemSnapshot(snapshot_name)
607 608 609

  @classmethod
  def setUpClass(cls):
610
    """Request an instance."""
611 612 613 614 615 616 617 618 619
    cls.logger.debug("Starting setUpClass %s", cls)
    cls._instance_parameter_dict = cls.getInstanceParameterDict()
    snapshot_name = "{}.{}.setUpClass".format(cls.__module__, cls.__name__)

    with cls._snapshotManager(snapshot_name):
      try:
        cls._setUpClass()
      except BaseException:
        cls.logger.exception("Error during setUpClass")
620
        cls.setUp = lambda self: self.fail("Setup Class failed.")
621
        raise
622
    cls.logger.debug("setUpClass done")
623 624 625

  @classmethod
  def tearDownClass(cls):
626 627
    """Tear down class, stop the processes and destroy instance."""
    cls._cleanup(f"{cls.__module__}.{cls.__name__}.tearDownClass")
628
    if not cls._debug:
629
      cls.logger.debug(
630 631 632 633 634 635 636 637 638
        "cleaning up slapos log files in %s",
        cls.slap._log_directory,  # pyright: ignore[reportPrivateUsage]
      )
      for log_file in glob.glob(
        os.path.join(
          cls.slap._log_directory,  # pyright: ignore[reportPrivateUsage]
          "*",
        )
      ):
639
        os.unlink(log_file)
640

641
  @classmethod
642 643 644 645 646 647 648 649 650
  def _storePartitionSnapshot(cls, name: str) -> None:
    """
    Store snapshot of partitions.

    This uses the definition from class attribute
    `_save_instance_file_pattern_list`.

    Args:
      name: Name of the snapshot.
651

652
    """
653
    # copy config and log files from partitions
654
    for dirpath, dirnames, filenames in os.walk(cls.slap.instance_directory):
655 656
      for dirname in list(dirnames):
        dirabspath = os.path.join(dirpath, dirname)
657 658
        if any(
          fnmatch.fnmatch(
659 660
            dirabspath,
            pattern,
661 662 663
          )
          for pattern in cls._save_instance_file_pattern_list
        ):
664 665 666 667 668
          cls._copySnapshot(dirabspath, name)
          # don't recurse, since _copySnapshot is already recursive
          dirnames.remove(dirname)
      for filename in filenames:
        fileabspath = os.path.join(dirpath, filename)
669 670
        if any(
          fnmatch.fnmatch(
671 672
            fileabspath,
            pattern,
673 674 675
          )
          for pattern in cls._save_instance_file_pattern_list
        ):
676
          cls._copySnapshot(fileabspath, name)
677

678
  @classmethod
679 680 681 682 683 684 685 686 687
  def _storeSystemSnapshot(cls, name: str) -> None:
    """
    Store a snapshot of standalone slapos and partitions.

    Does not include software log, because this is stored at the end of
    software installation and software log is large.

    Args:
      name: Name of the snapshot.
688 689 690

    """
    # copy log files from standalone
691 692
    for standalone_log in glob.glob(
      os.path.join(
693
        cls._base_directory,
694 695 696 697
        "var/log/*",
      )
    ):
      if not standalone_log.startswith("slapos-node-software.log"):
698 699
        cls._copySnapshot(standalone_log, name)
    # store slapproxy database
700 701 702 703
    cls._copySnapshot(
      cls.slap._proxy_database,  # pyright: ignore[reportPrivateUsage]
      name,
    )
704
    cls._storePartitionSnapshot(name)
705

706
  def tearDown(self):
707
    self._storePartitionSnapshot(self.id())
708 709

  @classmethod
710 711 712
  def _copySnapshot(cls, source_file_name: str, name: str) -> None:
    """
    Save a file, symbolic link or directory for later inspection.
713 714 715

    The path are made relative to slapos root directory and
    we keep the same directory structure.
716 717 718 719 720

    Args:
      source_file_name: The name of the file or directory to copy.
      name: Name of the snapshot.

721
    """
722 723
    if not cls._test_file_snapshot_directory:
      warnings.warn("No snapshot directory configured, skipping snapshot")
724 725 726
      warnings.warn(
        "Snapshot directory can be configured with SLAPOS_TEST_LOG_DIRECTORY environment"
      )
727
      return
728
    # we cannot use os.path.commonpath on python2, so implement something similar
729
    common_path = os.path.commonprefix((source_file_name, cls._base_directory))
730 731 732
    if not os.path.isdir(common_path):
      common_path = os.path.dirname(common_path)

733
    relative_path = source_file_name[len(common_path) :]
734 735 736
    if relative_path[0] == os.sep:
      relative_path = relative_path[1:]
    destination = os.path.join(
737 738 739 740
      cls._test_file_snapshot_directory,
      cls.software_id,
      name,
      relative_path,
741
    )
742 743
    destination_dirname = os.path.dirname(destination)
    mkdir_p(destination_dirname)
744 745 746
    if os.path.islink(source_file_name) and not os.path.exists(
      source_file_name
    ):
747
      cls.logger.debug(
748 749 750 751 752 753
        "copy broken symlink %s as %s",
        source_file_name,
        destination,
      )
      with open(destination, "w") as f:
        f.write(f"broken symink to {os.readlink(source_file_name)}\n")
754
    elif os.path.isfile(source_file_name):
755
      shutil.copy(source_file_name, destination)
756 757 758 759
    elif os.path.isdir(source_file_name):
      # we copy symlinks as symlinks, so that this does not fail when
      # we copy a directory containing broken symlinks.
      shutil.copytree(source_file_name, destination, symlinks=True)
760

761 762
  # implementation methods
  @classmethod
763 764 765 766
  def _cleanup(cls, snapshot_name: str) -> None:
    """
    Destroy all instances and stop subsystem.

767
    Catches and log all exceptions and take snapshot named `snapshot_name` + the failing step.
768 769 770 771

    Args:
      snapshot_name: Name of the snapshot that will be taken in case of exception.

772
    """
773 774 775 776 777 778
    for resource_name in list(cls._resources):
      cls.logger.debug("closing resource %s", resource_name)
      try:
        cls._resources.pop(resource_name).close()
      except:
        cls.logger.exception("Error closing resource %s", resource_name)
779
    try:
780 781
      if cls.request_instance and hasattr(cls, "_instance_parameter_dict"):
        cls.requestDefaultInstance(state="destroyed")
782 783
    except:
      cls.logger.exception("Error during request destruction")
784
      cls._storeSystemSnapshot(f"{snapshot_name}._cleanup request destroy")
785
    try:
786 787
      # To make debug usable, we tolerate report_max_retry-1 errors and
      # only debug the last.
788
      for _ in range(3):
789 790 791 792 793 794
        if cls._debug and cls.report_max_retry:
          try:
            cls.slap.waitForReport(max_retry=cls.report_max_retry - 1)
          except SlapOSNodeCommandError:
            cls.slap.waitForReport(debug=True)
        else:
795 796 797
          cls.slap.waitForReport(
            max_retry=cls.report_max_retry, debug=cls._debug
          )
798 799
    except:
      cls.logger.exception("Error during actual destruction")
800
      cls._storeSystemSnapshot(f"{snapshot_name}._cleanup waitForReport")
801
    leaked_partitions = [
802 803 804
      cp
      for cp in cls.slap.computer.getComputerPartitionList()
      if cp.getState() != "destroyed"
805 806 807
    ]
    if leaked_partitions:
      cls.logger.critical(
808 809 810
        "The following partitions were not cleaned up: %s",
        [cp.getId() for cp in leaked_partitions],
      )
811
      cls._storeSystemSnapshot(
812 813
        "{}._cleanup leaked_partitions".format(snapshot_name)
      )
814 815
      for cp in leaked_partitions:
        try:
816
          # XXX is this really the reference ?
817 818
          partition_reference = cp.getInstanceParameterDict()["instance_title"]
          assert isinstance(partition_reference, str)
819
          cls.slap.request(
820 821 822 823 824
            software_release=cp.getSoftwareRelease().getURI(),
            # software_type=cp.getType(), # TODO
            partition_reference=partition_reference,
            state="destroyed",
          )
825 826
        except:
          cls.logger.exception(
827 828
            "Error during request destruction of leaked partition",
          )
829
          cls._storeSystemSnapshot(
830 831
            f"{snapshot_name}._cleanup leaked_partitions request destruction",
          )
832
      try:
833 834
        # To make debug usable, we tolerate report_max_retry-1 errors and
        # only debug the last.
835
        for _ in range(3):
836 837 838 839 840 841
          if cls._debug and cls.report_max_retry:
            try:
              cls.slap.waitForReport(max_retry=cls.report_max_retry - 1)
            except SlapOSNodeCommandError:
              cls.slap.waitForReport(debug=True)
          else:
842 843 844 845
            cls.slap.waitForReport(
              max_retry=cls.report_max_retry,
              debug=cls._debug,
            )
846
      except:
847
        cls.logger.exception(
848 849
          "Error during leaked partitions actual destruction",
        )
850
        cls._storeSystemSnapshot(
851 852
          f"{snapshot_name}._cleanup leaked_partitions waitForReport",
        )
853 854 855 856
    try:
      cls.slap.stop()
    except:
      cls.logger.exception("Error during stop")
857
      cls._storeSystemSnapshot(f"{snapshot_name}._cleanup stop")
858
    leaked_supervisor_configs = glob.glob(
859 860 861 862 863
      os.path.join(
        cls.slap.instance_directory,
        "etc/supervisord.conf.d/*.conf",
      )
    )
864
    if leaked_supervisor_configs:
865 866 867
      for config in leaked_supervisor_configs:
        os.unlink(config)
      raise AssertionError(
868 869
        f"Test leaked supervisor configurations: {leaked_supervisor_configs}",
      )
870 871

  @classmethod
872 873 874 875
  def requestDefaultInstance(
    cls,
    state: str = "started",  # TODO: Change to enum/Literal when all code is Python 3.
  ) -> ComputerPartition:
876 877 878
    software_url = cls.getSoftwareURL()
    software_type = cls.getInstanceSoftwareType()
    cls.logger.debug(
879 880 881 882 883 884 885
      'requesting "%s" software:%s type:%r state:%s parameters:%s',
      cls.default_partition_reference,
      software_url,
      software_type,
      state,
      cls._instance_parameter_dict,
    )
886
    return cls.slap.request(
887 888 889 890 891 892
      software_release=software_url,
      software_type=software_type,
      partition_reference=cls.default_partition_reference,
      partition_parameter_kw=cls._instance_parameter_dict,
      state=state,
    )
893 894

  @classmethod
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
  def getPartitionId(cls, instance_name: str) -> str:
    """
    Get the id of the partition.

    Args:
      instance_name: Name of the instance.

    Returns:
      Id of the partition.

    """
    query = (
      f"SELECT reference FROM partition{DB_VERSION} "
      f"WHERE partition_reference=?"
    )
    with sqlite3.connect(
      os.path.join(
        cls._base_directory,
        "var/proxy.db",
      )
    ) as db:
916 917 918
      return db.execute(query, (instance_name,)).fetchall()[0][0]

  @classmethod
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
  def getPartitionIPv6(cls, partition_id: str) -> str:
    """
    Get the IP address of the partition.

    Args:
      partition_id: Id of the partition.

    Returns:
      An IPv6 address in presentation (string) format.

    """
    query = (
      f"SELECT address FROM partition_network{DB_VERSION} "
      f"WHERE partition_reference=?"
    )
    with sqlite3.connect(
      os.path.join(
        cls._base_directory,
        "var/proxy.db",
      )
    ) as db:
940 941 942 943 944 945
      rows = db.execute(query, (partition_id,)).fetchall()
    # do not assume the partition's IPv6 address is the second one,
    # instead find the first address that is IPv6
    for (address,) in rows:
      if valid_ipv6(address):
        return address
946 947

    raise ValueError("Missing IPv6 address")