qemu/tests/functional/test_virtio_balloon.py
Daniel P. Berrangé 1456e90653 hw/virtio: reset virtio balloon stats on machine reset
When a machine is first booted, all virtio balloon stats are initialized
to their default value -1 (18446744073709551615 when represented as
unsigned).

They remain that way while the firmware is loading, and early phase of
guest OS boot, until the virtio-balloon driver is activated. Thereafter
the reported stats reflect the guest OS activity.

When a machine reset is performed, however, the virtio-balloon stats are
left unchanged by QEMU, despite the guest OS no longer updating them,
nor indeed even still existing.

IOW, the mgmt app keeps getting stale stats until the guest OS starts
once more and loads the virtio-balloon driver (if ever). At that point
the app will see a discontinuity in the reported values as they sudden
jump from the stale value to the new value. This jump is indigituishable
from a valid data update.

While there is an "last-updated" field to report on the freshness of
the stats, that does not unambiguously tell the mgmt app whether the
stats are still conceptually relevant to the current running workload.

It is more conceptually useful to reset the stats to their default
values on machine reset, given that the previous guest workload the
stats reflect no longer exists. The mgmt app can now clearly identify
that there are is no stats information available from the current
executing workload.

The 'last-updated' time is also reset back to 0.

IOW, on every machine reset, the virtio stats are in the same clean
state they were when the macine first powered on.

A functional test is added to validate this behaviour with a real
world guest OS.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20250204094202.2183262-1-berrange@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2025-02-21 07:18:42 -05:00

161 lines
5.6 KiB
Python
Executable file

#!/usr/bin/env python3
#
# virtio-balloon tests
#
# This work is licensed under the terms of the GNU GPL, version 2 or
# later. See the COPYING file in the top-level directory.
import time
from qemu_test import QemuSystemTest, Asset
from qemu_test import wait_for_console_pattern
from qemu_test import exec_command_and_wait_for_pattern
UNSET_STATS_VALUE = 18446744073709551615
class VirtioBalloonx86(QemuSystemTest):
ASSET_KERNEL = Asset(
('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
'/31/Server/x86_64/os/images/pxeboot/vmlinuz'),
'd4738d03dbbe083ca610d0821d0a8f1488bebbdccef54ce33e3adb35fda00129')
ASSET_INITRD = Asset(
('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
'/31/Server/x86_64/os/images/pxeboot/initrd.img'),
'277cd6c7adf77c7e63d73bbb2cded8ef9e2d3a2f100000e92ff1f8396513cd8b')
ASSET_DISKIMAGE = Asset(
('https://archives.fedoraproject.org/pub/archive/fedora/linux/releases'
'/31/Cloud/x86_64/images/Fedora-Cloud-Base-31-1.9.x86_64.qcow2'),
'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0')
DEFAULT_KERNEL_PARAMS = ('root=/dev/vda1 console=ttyS0 net.ifnames=0 '
'rd.rescue')
def wait_for_console_pattern(self, success_message, vm=None):
wait_for_console_pattern(
self,
success_message,
failure_message="Kernel panic - not syncing",
vm=vm,
)
def mount_root(self):
self.wait_for_console_pattern('Entering emergency mode.')
prompt = '# '
self.wait_for_console_pattern(prompt)
exec_command_and_wait_for_pattern(self, 'mount /dev/vda1 /sysroot',
prompt)
exec_command_and_wait_for_pattern(self, 'chroot /sysroot',
prompt)
exec_command_and_wait_for_pattern(self, "modprobe virtio-balloon",
prompt)
def assert_initial_stats(self):
ret = self.vm.qmp('qom-get',
{'path': '/machine/peripheral/balloon',
'property': 'guest-stats'})['return']
when = ret.get('last-update')
assert when == 0
stats = ret.get('stats')
for name, val in stats.items():
assert val == UNSET_STATS_VALUE
def assert_running_stats(self, then):
ret = self.vm.qmp('qom-get',
{'path': '/machine/peripheral/balloon',
'property': 'guest-stats'})['return']
when = ret.get('last-update')
now = time.time()
assert when > then and when < now
stats = ret.get('stats')
# Stat we expect this particular Kernel to have set
expectData = [
"stat-available-memory",
"stat-disk-caches",
"stat-free-memory",
"stat-htlb-pgalloc",
"stat-htlb-pgfail",
"stat-major-faults",
"stat-minor-faults",
"stat-swap-in",
"stat-swap-out",
"stat-total-memory",
]
for name, val in stats.items():
if name in expectData:
assert val != UNSET_STATS_VALUE
else:
assert val == UNSET_STATS_VALUE
def test_virtio_balloon_stats(self):
self.set_machine('q35')
kernel_path = self.ASSET_KERNEL.fetch()
initrd_path = self.ASSET_INITRD.fetch()
diskimage_path = self.ASSET_DISKIMAGE.fetch()
self.vm.set_console()
self.vm.add_args("-S")
self.vm.add_args("-cpu", "max")
self.vm.add_args("-m", "2G")
# Slow down BIOS phase with boot menu, so that after a system
# reset, we can reliably catch the clean stats again in BIOS
# phase before the guest OS launches
self.vm.add_args("-boot", "menu=on")
self.vm.add_args("-machine", "q35,accel=kvm:tcg")
self.vm.add_args("-device", "virtio-balloon,id=balloon")
self.vm.add_args('-drive',
f'file={diskimage_path},if=none,id=drv0,snapshot=on')
self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,' +
'drive=drv0,id=virtio-disk0,bootindex=1')
self.vm.add_args(
"-kernel",
kernel_path,
"-initrd",
initrd_path,
"-append",
self.DEFAULT_KERNEL_PARAMS
)
self.vm.launch()
# Poll stats at 100ms
self.vm.qmp('qom-set',
{'path': '/machine/peripheral/balloon',
'property': 'guest-stats-polling-interval',
'value': 100 })
# We've not run any guest code yet, neither BIOS or guest,
# so stats should be all default values
self.assert_initial_stats()
self.vm.qmp('cont')
then = time.time()
self.mount_root()
self.assert_running_stats(then)
# Race window between these two commands, where we
# rely on '-boot menu=on' to (hopefully) ensure we're
# still executing the BIOS when QEMU processes the
# 'stop', and thus have not loaded the virtio-balloon
# driver in the guest
self.vm.qmp('system_reset')
self.vm.qmp('stop')
# If the above assumption held, we're in BIOS now and
# stats should be all back at their default values
self.assert_initial_stats()
self.vm.qmp('cont')
then = time.time()
self.mount_root()
self.assert_running_stats(then)
if __name__ == '__main__':
QemuSystemTest.main()