diff --git a/.gitignore b/.gitignore index 9d6708b7e..11f282766 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ data.py vm_data.py /scripts/guests/windows/id_rsa.pub .envrc +pytest-logs/ diff --git a/conftest.py b/conftest.py index 903683048..84effd32a 100644 --- a/conftest.py +++ b/conftest.py @@ -32,7 +32,10 @@ from lib.pool import Pool from lib.sr import SR from lib.vm import VM, vm_cache_key_from_def +from lib.unixvm import UnixVM +from lib.windowsvm import WindowsVM from lib.xo import xo_cli +from lib.serial_console_logger import SerialConsoleLogger # Import package-scoped fixtures. Although we need to define them in a separate file so that we can # then import them in individual packages to fix the buggy package scope handling by pytest, we also @@ -813,6 +816,67 @@ def session_log_dir(): # postpone directory creation only if some test failed return log_dir +# Serial console logging fixtures + +@pytest.fixture(scope='module') +def unix_vm_with_serial_console(imported_vm: VM, host: Host) -> Generator[UnixVM, None, None]: + vm = imported_vm + + if not vm.is_running(): + vm.start() + vm.wait_for_os_booted() + + # TODO: to be replaced by a knock_port() once PR is merged + wait_for(lambda: not os.system(f"ping -c1 {vm.ip} > /dev/null 2>&1"), + "Wait for host up", timeout_secs=10 * 60, retry_delay_secs=10) + wait_for(lambda: not os.system(f"nc -zw5 {vm.ip} 22"), + "Wait for ssh up on host", timeout_secs=10 * 60, retry_delay_secs=5) + + # Auto-detect distro and convert to distro-specific class + vm = UnixVM.from_vm_auto_detect(vm) + logging.info(f"Serial console will be configured for {vm.__class__.__name__}") + vm.configure_serial_console() + + # Need a reboot for VM to take into account + vm.reboot() + vm.wait_for_os_booted() + + yield vm + +@pytest.fixture(scope='session') +def serial_console_logger_session(request, session_log_dir): + logger = SerialConsoleLogger(session_log_dir, scope="session") + yield logger + logger.cleanup() + +@pytest.fixture(scope='class') +def serial_console_logger_class(request, session_log_dir): + logger = SerialConsoleLogger(session_log_dir, scope="class") + yield logger + logger.cleanup() + +@pytest.fixture(scope='function', autouse=True) +def track_vms_for_session_logger(request, serial_console_logger_session): + _, test_vms = _introspect_test_fixtures(request) + logger = serial_console_logger_session + for vm in test_vms: + try: + logger.add_vm(vm) + except Exception: + pass + yield + +@pytest.fixture(scope='function', autouse=True) +def track_vms_for_class_logger(request, serial_console_logger_class): + _, test_vms = _introspect_test_fixtures(request) + logger = serial_console_logger_class + for vm in test_vms: + try: + logger.add_vm(vm) + except Exception: + pass + yield + # Helper function for immediate console capture def capture_vm_console(vm: VM, log_dir: str) -> str: diff --git a/jobs.py b/jobs.py index 5f60fa52e..addb2ab7c 100755 --- a/jobs.py +++ b/jobs.py @@ -454,7 +454,8 @@ ], "nb_pools": 1, "params": {}, - "paths": ["tests/misc/test_log_collection.py"], + "paths": ["tests/misc/test_log_collection.py", + "tests/misc/test_serial_console_logging.py"], } } diff --git a/lib/distros/__init__.py b/lib/distros/__init__.py new file mode 100644 index 000000000..115230b43 --- /dev/null +++ b/lib/distros/__init__.py @@ -0,0 +1,18 @@ + +from lib.distros.alma import AlmaVM +from lib.distros.alpine import AlpineVM +from lib.distros.centos import CentOSVM +from lib.distros.debian import DebianVM +from lib.distros.freebsd import FreeBsdVM +from lib.distros.opensuse import OpenSuseVM +from lib.distros.ubuntu import UbuntuVM + +__all__ = [ + 'AlpineVM', + 'AlmaVM', + 'CentOSVM', + 'DebianVM', + 'FreeBsdVM', + 'OpenSuseVM', + 'UbuntuVM', +] diff --git a/lib/distros/alma.py b/lib/distros/alma.py new file mode 100644 index 000000000..f525c90e7 --- /dev/null +++ b/lib/distros/alma.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class AlmaVM(UnixVM): + + def __init__(self, uuid: str, host: Host): + super().__init__(uuid, host) + + def configure_serial_console(self): + self.ssh('export F=/etc/default/grub ; ' + '[ -f ${F} ] && ' + ' ( grep -q " console=ttyS" ${F} || ' + ' sed -i \'/^GRUB_CMDLINE_LINUX=/ { s/[ ]*quiet[ ]*/ / ; s/"$/ console=ttyS,115200"/ }\' ${F} ) && ' + ' grub2-mkconfig -o /boot/grub2/grub.cfg', + check=False) diff --git a/lib/distros/alpine.py b/lib/distros/alpine.py new file mode 100644 index 000000000..e1b268c39 --- /dev/null +++ b/lib/distros/alpine.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class AlpineVM(UnixVM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + def configure_serial_console(self): + + # Try extlinux.conf first (common in Alpine) + self.ssh('export F=/boot/extlinux.conf ; ' + '[ -f ${F} ] && ' + ' ( grep -q "APPEND.*console=ttyS" ${F} || ' + ' sed -i "/APPEND.*root=/ { s/$/ console=ttyS,115200/; s/ quiet / /}" ${F} )', + check=False) + + # Also try GRUB (Alpine UEFI - alpine-mini) + self.ssh( + 'export F=/etc/default/grub ; ' + '[ -f ${F} ] && ' + ' ( grep -q " console=ttyS" ${F} || ' + ' sed -i \'/^GRUB_CMDLINE_LINUX_DEFAULT/ { s/ quiet / /; ' + ' s/ *"$/ console=ttyS,115200"/ }\' ${F} ) && ' + ' grub-mkconfig -o /boot/grub/grub.cfg', + check=False + ) diff --git a/lib/distros/centos.py b/lib/distros/centos.py new file mode 100644 index 000000000..29893bb9f --- /dev/null +++ b/lib/distros/centos.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class CentOSVM(UnixVM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + def configure_serial_console(self): + self.ssh('sed -i "/^[[:space:]]*kernel / { s/ *quiet */ /; s/$/ console=ttyS,115200/ }" /boot/grub/grub.conf', + check=False) diff --git a/lib/distros/debian.py b/lib/distros/debian.py new file mode 100644 index 000000000..531249b48 --- /dev/null +++ b/lib/distros/debian.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class DebianVM(UnixVM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + def configure_serial_console(self): + self.ssh('echo GRUB_CMDLINE_LINUX_DEFAULT="console=ttyS,115200" > /etc/default/grub.d/console.cfg &&' + ' grub-mkconfig -o /boot/grub/grub.cfg', + check=False) diff --git a/lib/distros/freebsd.py b/lib/distros/freebsd.py new file mode 100644 index 000000000..188d69672 --- /dev/null +++ b/lib/distros/freebsd.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import logging + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class FreeBsdVM(UnixVM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + def configure_serial_console(self): + """ TODO: Implement FreeBSD serial console configuration. """ + logging.warning("Serial console configuration for FreeBSD not yet implemented") diff --git a/lib/distros/opensuse.py b/lib/distros/opensuse.py new file mode 100644 index 000000000..f8bf8a20c --- /dev/null +++ b/lib/distros/opensuse.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class OpenSuseVM(UnixVM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + def configure_serial_console(self): + self.ssh('export F=/etc/default/grub ; ' + '[ -f ${F} ] && ' + ' ( grep -q " console=ttyS" ${F} || ' + ' sed -i \'/^GRUB_CMDLINE_LINUX_DEFAULT/ { ' + ' s/ *quiet */ /; s/ *splash=silent */ /; s/ *"$/ console=ttyS,115200"/ }\' ${F} ) && ' + ' grub2-mkconfig -o /boot/grub2/grub.cfg', + check=False) diff --git a/lib/distros/ubuntu.py b/lib/distros/ubuntu.py new file mode 100644 index 000000000..70e44c62c --- /dev/null +++ b/lib/distros/ubuntu.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from lib.unixvm import UnixVM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class UbuntuVM(UnixVM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + def configure_serial_console(self): + self.ssh('echo GRUB_CMDLINE_LINUX_DEFAULT="console=ttyS,115200" > /etc/default/grub.d/console.cfg &&' + ' update-grub', + check=False) diff --git a/lib/serial_console_logger.py b/lib/serial_console_logger.py new file mode 100644 index 000000000..25347ef7d --- /dev/null +++ b/lib/serial_console_logger.py @@ -0,0 +1,311 @@ +""" +Serial Console Logger - Background logging of VM serial console output + +This module provides facilities to automatically log VM serial console output +during test execution via SSH subprocesses running `xl console`. +""" + +from __future__ import annotations + +import datetime +import logging +import os +import subprocess +import threading + +from typing import TYPE_CHECKING, Dict, Optional, Set + +if TYPE_CHECKING: + from lib.host import Host + from lib.vm import VM + + +class SerialConsoleLogger: + """ + Manages background SSH processes that log VMs serial console output. + Can log and monitor multiple VMs at a time. + + This class handles: + - Starting SSH subprocesses with `xl console` for each VM + - Tracking VM domids and detecting changes + - Background monitoring to detect domid changes during tests + - Writing console output to log files + """ + + def __init__(self, log_dir: str, scope: str = "session", + enable_monitoring: bool = True, monitor_interval: float = 5.0, + keep_log: bool = True): + + self.log_dir = log_dir + self.scope = scope + self.keep_log = keep_log + self.processes: Dict[str, subprocess.Popen | None] = {} # vm_uuid -> Popen + self.vm_domids: Dict[str, str] = {} # vm_uuid -> domid + self.vm_hosts: Dict[str, Host] = {} # vm_uuid -> Host + self.log_files: Dict[str, str] = {} # vm_uuid -> log_file_path + + # Background monitoring + self.enable_monitoring = enable_monitoring + self.monitor_interval = monitor_interval + self.monitor_thread: Optional[threading.Thread] = None + self.monitor_stop_event = threading.Event() + + logging.debug( + f"SerialConsoleLogger initialized (scope={scope}, log_dir={log_dir}, " + f"monitoring={enable_monitoring}, interval={monitor_interval}s)" + ) + + # Start monitoring if enabled + if self.enable_monitoring: + self.start_monitoring() + + def _get_log_filename(self, vm_uuid: str, host: Host) -> str: + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + host_ip_safe = host.hostname_or_ip.replace(':', '_').replace('.', '_') + vm_uuid_short = vm_uuid[:8] + + filename = f"{host_ip_safe}_vm_{vm_uuid_short}_serial_{timestamp}.log" + return os.path.join(self.log_dir, filename) + + def _vm_param_get(self, vm_uuid: str, param: str): + host = self.vm_hosts.get(vm_uuid) + assert host + return host.xe( + 'vm-param-get', + {'uuid': vm_uuid, 'param-name': param}, + minimal=True + ) + + def add_vm(self, vm: VM, host: Optional[Host] = None) -> bool: + if vm.uuid in self.processes: + return True + + if host is None: + host = vm.host + + assert host is not None, "Host must be provided or VM must have a host" + + if not vm.is_running(): + return False + + domid = vm.param_get("dom-id") + if not domid or domid == '-1': + return False + + return self._start_logging(vm.uuid, host, domid) + + def remove_vm(self, vm: VM) -> None: + self._remove_vm_by_uuid(vm.uuid) + + # Background monitoring methods + + def start_monitoring(self) -> None: + if self.monitor_thread is not None: + return + + logging.debug(f"Starting background monitoring thread (interval={self.monitor_interval}s)") + self.monitor_thread = threading.Thread( + target=self._monitor_loop, + daemon=True, + name=f"SerialConsoleMonitor-{self.scope}" + ) + self.monitor_thread.start() + + def _monitor_loop(self) -> None: + while not self.monitor_stop_event.wait(self.monitor_interval): + # Get snapshot of current VMs being logged + vm_uuids = list(self.processes.keys()) + + for vm_uuid in vm_uuids: + try: + self._check_vm_state_by_uuid(vm_uuid) + except Exception as e: + # Ignore monitoring errors for individual VMs + logging.warning(f"something went wrong: {e}") + pass + + def _check_vm_state_by_uuid(self, vm_uuid: str): + host = self.vm_hosts.get(vm_uuid) + process = self.processes.get(vm_uuid) + tracked_domid = self.vm_domids.get(vm_uuid) + + logging.warning(f"check VM state: {host} {process} {tracked_domid}") + assert host + + # Check VM power state + power_state = self._vm_param_get(vm_uuid, 'power-state') + if power_state != 'running': + # VM not running, stop logging + logging.warning(f"stopping: state={power_state}") + self._remove_vm_by_uuid(vm_uuid) + return + + # If we don't have process here + if not process: + try: + current_domid = self._vm_param_get(vm_uuid, 'dom-id') + if current_domid and current_domid != '-1': + # VM is running, try to start logging + self._restart_logging_by_uuid(vm_uuid) + return + except Exception: + logging.warning("restart failed, retry later") + return + + try: + try: + process.poll() + except Exception: + logging.debug( + f"[monitor] Serial console process for VM {vm_uuid[:8]} " + f"terminated (exit code {process.returncode}), restarting" + ) + self._restart_logging_by_uuid(vm_uuid) + return True + + current_domid = self._vm_param_get(vm_uuid, 'dom-id') + + # Check if domid is -1 (VM doesn't exist or was destroyed) + if not current_domid: + # VM destroyed or halted (domid=-1), stop logging + self._remove_vm_by_uuid(vm_uuid) + return False + + # Both domid can be -1, if VM is not started yet + if current_domid != -1 and current_domid != tracked_domid: + logging.debug( + f"[monitor] VM {vm_uuid[:8]} domid changed from {tracked_domid} " + f"to {current_domid}, restarting serial console logging" + ) + return self._restart_logging_by_uuid(vm_uuid) + + return True + + except Exception: + return False + + def _restart_logging_by_uuid(self, vm_uuid: str) -> bool: + host = self.vm_hosts.get(vm_uuid) + if not host: + return False + + self._stop_logging(vm_uuid) + + try: + domid = self._vm_param_get(vm_uuid, 'dom-id') + except Exception: + return False + if domid == '-1' or not domid: + return False + + success = self._start_logging(vm_uuid, host, domid) + return success + + def _start_logging(self, vm_uuid: str, host: Host, domid: str) -> bool: + try: + os.makedirs(self.log_dir, exist_ok=True) + + ssh_cmd = [ + "ssh", "-t", + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-o", "LogLevel=ERROR", + f"{host.user}@{host.hostname_or_ip}", + f"xl console -t serial {domid} 2> /dev/null" + ] + + # Fill dictionnaries before running command in case VM is not + # started yet + self.processes[vm_uuid] = None + self.vm_domids[vm_uuid] = domid + self.vm_hosts[vm_uuid] = host + + if self.keep_log and self.log_files.get(vm_uuid): + log_file = self.log_files[vm_uuid] + else: + log_file = self._get_log_filename(vm_uuid, self.vm_hosts[vm_uuid]) + self.log_files[vm_uuid] = log_file + + logging.debug(f"Starting serial console logging for VM {vm_uuid[:8]} (domid={domid}) -> {log_file}") + + with open(log_file, 'a') as log_fh: + process = subprocess.Popen( + ssh_cmd, + stdout=log_fh, + stderr=subprocess.STDOUT, + stdin=subprocess.DEVNULL + ) + self.processes[vm_uuid] = process + return True + + except Exception as e: + logging.warning(f"Failed to start process: {e}") + return False + + def _stop_logging(self, vm_uuid: str) -> bool: + if vm_uuid not in self.processes: + return True + + process = self.processes[vm_uuid] + log_file = self.log_files[vm_uuid] + + try: + assert process + + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + logging.warning(f"Process {process.pid} didn't terminate, force killing") + process.kill() + process.wait() + + logging.debug(f"Stopped serial console logging for VM {vm_uuid[:8]}, log: {log_file}") + self.processes[vm_uuid] = None + + if os.stat(log_file).st_size == 0: + os.unlink(log_file) + + return True + except Exception: + logging.warning("Exception received will stopping logging") + return False + + def _remove_vm_by_uuid(self, vm_uuid: str) -> None: + logging.warning(f"Remove VM: {vm_uuid}") + if self._stop_logging(vm_uuid): + + if vm_uuid in self.processes: + del self.processes[vm_uuid] + if vm_uuid in self.vm_domids: + del self.vm_domids[vm_uuid] + if vm_uuid in self.vm_hosts: + del self.vm_hosts[vm_uuid] + if vm_uuid in self.log_files: + del self.log_files[vm_uuid] + + def cleanup(self) -> None: + """Stop monitoring thread and all logging processes.""" + logging.warning("Monitor cleanup called") + + # Stop monitoring thread first + if self.monitor_thread is not None: + self.monitor_stop_event.set() + + # Wait for thread to finish (max 2 seconds) + self.monitor_thread.join(timeout=2.0) + + if self.monitor_thread.is_alive(): + logging.warning("Monitoring thread did not stop cleanly") + + self.monitor_thread = None + + # Make a copy of the list since we'll be modifying it + vm_uuids = list(self.processes.keys()) + + for vm_uuid in vm_uuids: + self._remove_vm_by_uuid(vm_uuid) + + def get_logged_vms(self) -> Set[str]: + """Return set of VM UUIDs currently being logged.""" + return set(self.processes.keys()) diff --git a/lib/unixvm.py b/lib/unixvm.py new file mode 100644 index 000000000..be7502a2f --- /dev/null +++ b/lib/unixvm.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import logging + +from lib.vm import VM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class UnixVM(VM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) + + @classmethod + def detect_distro(cls, vm: VM) -> str: + # Try VM's built-in distro() method first + distro = vm.distro() + if distro: + return str(distro).strip() + + # Fallback: read /etc/os-release or check for centos-release + result = vm.ssh( + '([ -e /etc/os-release ] && . /etc/os-release && echo $ID) ||' + ' ([ -e /etc/centos-release ] && echo centos)', + check=False + ) + if isinstance(result, bytes): + return result.decode('utf-8').strip() + return str(result).strip() + + @classmethod + def get_distro_class(cls, distro: str) -> type['UnixVM']: + # Import here to avoid circular dependencies + from lib.distros import ( + AlmaVM, + AlpineVM, + CentOSVM, + DebianVM, + FreeBsdVM, + OpenSuseVM, + UbuntuVM, + ) + + distro_map = { + 'alpine': AlpineVM, + 'almalinux': AlmaVM, + 'centos': CentOSVM, + 'debian': DebianVM, + 'freebsd': FreeBsdVM, + 'opensuse-leap': OpenSuseVM, + 'ubuntu': UbuntuVM, + } + + if distro not in distro_map: + raise ValueError(f"Unknown distro: {distro}") + + return distro_map[distro] + + @classmethod + def from_vm(cls, vm: VM) -> 'UnixVM': + instance = cls(vm.uuid, vm.host) + # Copy runtime state from the original VM + instance.ip = vm.ip + instance.previous_host = vm.previous_host + instance.is_windows = vm.is_windows + instance.is_uefi = vm.is_uefi + if hasattr(vm, 'vdis'): + instance.vdis = vm.vdis + return instance + + @classmethod + def from_vm_auto_detect(cls, vm: VM) -> 'UnixVM': + distro = cls.detect_distro(vm) + logging.debug(f"Detected distro: {distro}") + distro_class = cls.get_distro_class(distro) + return distro_class.from_vm(vm) + + def configure_serial_console(self) -> None: + raise NotImplementedError( + f"Serial console configuration not implemented for {self.__class__.__name__}" + ) diff --git a/lib/windowsvm.py b/lib/windowsvm.py new file mode 100644 index 000000000..e720e8e58 --- /dev/null +++ b/lib/windowsvm.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from lib.vm import VM + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from lib.host import Host + + +class WindowsVM(VM): + + def __init__(self, uuid: str, host: 'Host'): + super().__init__(uuid, host) diff --git a/tests/misc/test_serial_console_logging.py b/tests/misc/test_serial_console_logging.py new file mode 100644 index 000000000..bae69404d --- /dev/null +++ b/tests/misc/test_serial_console_logging.py @@ -0,0 +1,49 @@ +""" +Tests demonstrating serial console logging for VMs. +""" + +import pytest + +from lib.unixvm import UnixVM + +@pytest.mark.small_vm +@pytest.mark.usefixtures("serial_console_logger_session") +class TestWithSessionLogging: + def test_vm_start_stop(self, unix_vm_with_serial_console: UnixVM): + vm = unix_vm_with_serial_console + + if not vm.is_running(): + vm.start() + else: + vm.reboot() + + vm.wait_for_os_booted() + vm.shutdown(verify=True) + + def test_vm_reboots(self, unix_vm_with_serial_console: UnixVM): + """ test domid tracking in case of reboot """ + vm = unix_vm_with_serial_console + + if not vm.is_running(): + vm.start() + + vm.wait_for_os_booted() + + # First reboot + vm.reboot(verify=True) + vm.wait_for_os_booted() + + # Second reboot + vm.reboot(verify=True) + vm.wait_for_os_booted() + + vm.shutdown(verify=True) + + +@pytest.mark.small_vm +@pytest.mark.usefixtures("serial_console_logger_class") +class TestWithClassLogging: + def test_suspend_resume(self, running_vm: UnixVM): + running_vm.suspend(verify=True) + running_vm.resume() + running_vm.wait_for_os_booted()