From 388b849fb6c33882b481123568995a749a54f648 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Oct 2024 18:07:16 +0200 Subject: [PATCH 01/49] stubs: avoid duplicate symbols in libqemuutil.a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qapi_event_send_device_deleted is always included (together with the rest of QAPI) in libqemuutil.a if either system-mode emulation or tools are being built, and in that case the stub causes a duplicate symbol to appear in libqemuutil.a. Add the symbol only if events are not being requested. Cc: qemu-stable@nongnu.org Reviewed-by: Alex Bennée Tested-by: Alex Bennée Signed-off-by: Paolo Bonzini --- stubs/meson.build | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/stubs/meson.build b/stubs/meson.build index 772a3e817d..e91614a874 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -55,7 +55,12 @@ endif if have_user # Symbols that are used by hw/core. stub_ss.add(files('cpu-synchronize-state.c')) - stub_ss.add(files('qdev.c')) + + # Stubs for QAPI events. Those can always be included in the build, but + # they are not built at all for --disable-system --disable-tools builds. + if not (have_system or have_tools) + stub_ss.add(files('qdev.c')) + endif endif if have_system From 548de8f8dc291448df94b13fed5c57421c21edea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Tue, 15 Oct 2024 14:39:24 +0100 Subject: [PATCH 02/49] tests: add 'rust' and 'bindgen' to CI package list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although we're not enabling rust by default yet, we can still add rust and bindgen to the CI package list. This demonstrates that we're not accidentally triggering unexpected build behaviour merely from Rust being present. When we do dev work to enable rust by default, this will show we're building correctly on all platforms we target. Signed-off-by: Daniel P. Berrangé Link: https://lore.kernel.org/r/20241015133925.311587-2-berrange@redhat.com Signed-off-by: Paolo Bonzini --- .gitlab-ci.d/cirrus/freebsd-14.vars | 2 +- .gitlab-ci.d/cirrus/macos-14.vars | 2 +- .gitlab-ci.d/cirrus/macos-15.vars | 2 +- scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml | 2 ++ scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml | 2 ++ tests/docker/dockerfiles/alpine.docker | 2 ++ tests/docker/dockerfiles/centos9.docker | 2 ++ tests/docker/dockerfiles/debian-amd64-cross.docker | 3 +++ tests/docker/dockerfiles/debian-arm64-cross.docker | 3 +++ tests/docker/dockerfiles/debian-armhf-cross.docker | 3 +++ tests/docker/dockerfiles/debian-i686-cross.docker | 3 +++ tests/docker/dockerfiles/debian-mips64el-cross.docker | 3 +++ tests/docker/dockerfiles/debian-mipsel-cross.docker | 3 +++ tests/docker/dockerfiles/debian-ppc64el-cross.docker | 3 +++ tests/docker/dockerfiles/debian-s390x-cross.docker | 3 +++ tests/docker/dockerfiles/debian.docker | 2 ++ tests/docker/dockerfiles/fedora-rust-nightly.docker | 2 ++ tests/docker/dockerfiles/fedora-win64-cross.docker | 2 ++ tests/docker/dockerfiles/fedora.docker | 2 ++ tests/docker/dockerfiles/opensuse-leap.docker | 2 ++ tests/docker/dockerfiles/ubuntu2204.docker | 2 ++ tests/lcitool/projects/qemu.yml | 2 ++ tests/vm/generated/freebsd.json | 2 ++ 23 files changed, 51 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.d/cirrus/freebsd-14.vars b/.gitlab-ci.d/cirrus/freebsd-14.vars index c0655b21e9..044cec7c14 100644 --- a/.gitlab-ci.d/cirrus/freebsd-14.vars +++ b/.gitlab-ci.d/cirrus/freebsd-14.vars @@ -11,6 +11,6 @@ MAKE='/usr/local/bin/gmake' NINJA='/usr/local/bin/ninja' PACKAGING_COMMAND='pkg' PIP3='/usr/local/bin/pip-3.8' -PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk-vnc gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle ninja opencv pixman pkgconf png py311-numpy py311-pillow py311-pip py311-pyyaml py311-sphinx py311-sphinx_rtd_theme py311-tomli python3 rpm2cpio sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 xorriso zstd' +PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk-vnc gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle ninja opencv pixman pkgconf png py311-numpy py311-pillow py311-pip py311-pyyaml py311-sphinx py311-sphinx_rtd_theme py311-tomli python3 rpm2cpio rust rust-bindgen-cli sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 xorriso zstd' PYPI_PKGS='' PYTHON='/usr/local/bin/python3' diff --git a/.gitlab-ci.d/cirrus/macos-14.vars b/.gitlab-ci.d/cirrus/macos-14.vars index 24cfec3b89..1fd03d68b9 100644 --- a/.gitlab-ci.d/cirrus/macos-14.vars +++ b/.gitlab-ci.d/cirrus/macos-14.vars @@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake' NINJA='/opt/homebrew/bin/ninja' PACKAGING_COMMAND='brew' PIP3='/opt/homebrew/bin/pip3' -PKGS='bash bc bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' +PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli' PYTHON='/opt/homebrew/bin/python3' diff --git a/.gitlab-ci.d/cirrus/macos-15.vars b/.gitlab-ci.d/cirrus/macos-15.vars index 23b2c1d22f..b6b6d71d47 100644 --- a/.gitlab-ci.d/cirrus/macos-15.vars +++ b/.gitlab-ci.d/cirrus/macos-15.vars @@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake' NINJA='/opt/homebrew/bin/ninja' PACKAGING_COMMAND='brew' PIP3='/opt/homebrew/bin/pip3' -PKGS='bash bc bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' +PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli' PYTHON='/opt/homebrew/bin/python3' diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml index 71a0f0c433..dd89ba1b3a 100644 --- a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml +++ b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml @@ -7,6 +7,7 @@ packages: - bash - bc + - bindgen - bison - bsdextrautils - bzip2 @@ -113,6 +114,7 @@ packages: - python3-venv - python3-yaml - rpm2cpio + - rustc - sed - socat - sparse diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml index d8de967b18..74f14d8d0f 100644 --- a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml +++ b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml @@ -7,6 +7,7 @@ packages: - bash - bc + - bindgen - bison - bsdextrautils - bzip2 @@ -111,6 +112,7 @@ packages: - python3-venv - python3-yaml - rpm2cpio + - rustc - sed - socat - sparse diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker index 54b9721997..9ce7b5280c 100644 --- a/tests/docker/dockerfiles/alpine.docker +++ b/tests/docker/dockerfiles/alpine.docker @@ -90,6 +90,8 @@ RUN apk update && \ py3-yaml \ python3 \ rpm2cpio \ + rust \ + rust-bindgen \ samurai \ sdl2-dev \ sdl2_image-dev \ diff --git a/tests/docker/dockerfiles/centos9.docker b/tests/docker/dockerfiles/centos9.docker index 0256865b9e..a9681c8a96 100644 --- a/tests/docker/dockerfiles/centos9.docker +++ b/tests/docker/dockerfiles/centos9.docker @@ -16,6 +16,7 @@ RUN dnf distro-sync -y && \ alsa-lib-devel \ bash \ bc \ + bindgen-cli \ bison \ brlapi-devel \ bzip2 \ @@ -102,6 +103,7 @@ RUN dnf distro-sync -y && \ python3-sphinx_rtd_theme \ python3-tomli \ rdma-core-devel \ + rust \ sed \ snappy-devel \ socat \ diff --git a/tests/docker/dockerfiles/debian-amd64-cross.docker b/tests/docker/dockerfiles/debian-amd64-cross.docker index 136c3a79a1..b86949b2e6 100644 --- a/tests/docker/dockerfiles/debian-amd64-cross.docker +++ b/tests/docker/dockerfiles/debian-amd64-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -170,6 +172,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/x86_64-linux-gnu && \ ENV ABI "x86_64-linux-gnu" ENV MESON_OPTS "--cross-file=x86_64-linux-gnu" +ENV RUST_TARGET "x86_64-unknown-linux-gnu" ENV QEMU_CONFIGURE_OPTS --cross-prefix=x86_64-linux-gnu- ENV DEF_TARGET_LIST x86_64-softmmu,x86_64-linux-user,i386-softmmu,i386-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-arm64-cross.docker b/tests/docker/dockerfiles/debian-arm64-cross.docker index 233f6ee1de..6878979112 100644 --- a/tests/docker/dockerfiles/debian-arm64-cross.docker +++ b/tests/docker/dockerfiles/debian-arm64-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -169,6 +171,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/aarch64-linux-gnu && \ ENV ABI "aarch64-linux-gnu" ENV MESON_OPTS "--cross-file=aarch64-linux-gnu" +ENV RUST_TARGET "aarch64-unknown-linux-gnu" ENV QEMU_CONFIGURE_OPTS --cross-prefix=aarch64-linux-gnu- ENV DEF_TARGET_LIST aarch64-softmmu,aarch64-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-armhf-cross.docker b/tests/docker/dockerfiles/debian-armhf-cross.docker index f26385e0b9..e38b8fcc41 100644 --- a/tests/docker/dockerfiles/debian-armhf-cross.docker +++ b/tests/docker/dockerfiles/debian-armhf-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -169,6 +171,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/arm-linux-gnueabihf && \ ENV ABI "arm-linux-gnueabihf" ENV MESON_OPTS "--cross-file=arm-linux-gnueabihf" +ENV RUST_TARGET "armv7-unknown-linux-gnueabihf" ENV QEMU_CONFIGURE_OPTS --cross-prefix=arm-linux-gnueabihf- ENV DEF_TARGET_LIST arm-softmmu,arm-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-i686-cross.docker b/tests/docker/dockerfiles/debian-i686-cross.docker index 2328ee1732..b4d0618739 100644 --- a/tests/docker/dockerfiles/debian-i686-cross.docker +++ b/tests/docker/dockerfiles/debian-i686-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -168,6 +170,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/i686-linux-gnu && \ ENV ABI "i686-linux-gnu" ENV MESON_OPTS "--cross-file=i686-linux-gnu" +ENV RUST_TARGET "i686-unknown-linux-gnu" ENV QEMU_CONFIGURE_OPTS --cross-prefix=i686-linux-gnu- ENV DEF_TARGET_LIST x86_64-softmmu,x86_64-linux-user,i386-softmmu,i386-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-mips64el-cross.docker b/tests/docker/dockerfiles/debian-mips64el-cross.docker index bfa96cb507..4f6c816b3f 100644 --- a/tests/docker/dockerfiles/debian-mips64el-cross.docker +++ b/tests/docker/dockerfiles/debian-mips64el-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -158,6 +160,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/mips64el-linux-gnuabi64 && \ ENV ABI "mips64el-linux-gnuabi64" ENV MESON_OPTS "--cross-file=mips64el-linux-gnuabi64" +ENV RUST_TARGET "mips64el-unknown-linux-gnuabi64" ENV QEMU_CONFIGURE_OPTS --cross-prefix=mips64el-linux-gnuabi64- ENV DEF_TARGET_LIST mips64el-softmmu,mips64el-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-mipsel-cross.docker b/tests/docker/dockerfiles/debian-mipsel-cross.docker index 4ac314e22e..a238526b4c 100644 --- a/tests/docker/dockerfiles/debian-mipsel-cross.docker +++ b/tests/docker/dockerfiles/debian-mipsel-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -166,6 +168,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/mipsel-linux-gnu && \ ENV ABI "mipsel-linux-gnu" ENV MESON_OPTS "--cross-file=mipsel-linux-gnu" +ENV RUST_TARGET "mipsel-unknown-linux-gnu" ENV QEMU_CONFIGURE_OPTS --cross-prefix=mipsel-linux-gnu- ENV DEF_TARGET_LIST mipsel-softmmu,mipsel-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-ppc64el-cross.docker b/tests/docker/dockerfiles/debian-ppc64el-cross.docker index 8c1dcec9cf..b6c6f5b7b0 100644 --- a/tests/docker/dockerfiles/debian-ppc64el-cross.docker +++ b/tests/docker/dockerfiles/debian-ppc64el-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -168,6 +170,7 @@ endian = 'little'\n" > /usr/local/share/meson/cross/powerpc64le-linux-gnu && \ ENV ABI "powerpc64le-linux-gnu" ENV MESON_OPTS "--cross-file=powerpc64le-linux-gnu" +ENV RUST_TARGET "powerpc64le-unknown-linux-gnu" ENV QEMU_CONFIGURE_OPTS --cross-prefix=powerpc64le-linux-gnu- ENV DEF_TARGET_LIST ppc64-softmmu,ppc64-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker index 72668e0315..14f169984e 100644 --- a/tests/docker/dockerfiles/debian-s390x-cross.docker +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -53,6 +54,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ @@ -167,6 +169,7 @@ endian = 'big'\n" > /usr/local/share/meson/cross/s390x-linux-gnu && \ ENV ABI "s390x-linux-gnu" ENV MESON_OPTS "--cross-file=s390x-linux-gnu" +ENV RUST_TARGET "s390x-unknown-linux-gnu" ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu- ENV DEF_TARGET_LIST s390x-softmmu,s390x-linux-user # As a final step configure the user (if env is defined) diff --git a/tests/docker/dockerfiles/debian.docker b/tests/docker/dockerfiles/debian.docker index 42bd0067d1..22b064cbf0 100644 --- a/tests/docker/dockerfiles/debian.docker +++ b/tests/docker/dockerfiles/debian.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -120,6 +121,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ diff --git a/tests/docker/dockerfiles/fedora-rust-nightly.docker b/tests/docker/dockerfiles/fedora-rust-nightly.docker index e642db163c..c2029cc4c6 100644 --- a/tests/docker/dockerfiles/fedora-rust-nightly.docker +++ b/tests/docker/dockerfiles/fedora-rust-nightly.docker @@ -23,6 +23,7 @@ exec "$@"\n' > /usr/bin/nosync && \ alsa-lib-devel \ bash \ bc \ + bindgen-cli \ bison \ brlapi-devel \ bzip2 \ @@ -113,6 +114,7 @@ exec "$@"\n' > /usr/bin/nosync && \ python3-sphinx_rtd_theme \ python3-zombie-imp \ rdma-core-devel \ + rust \ sed \ snappy-devel \ socat \ diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker index 6b264d901f..3ba62b55ad 100644 --- a/tests/docker/dockerfiles/fedora-win64-cross.docker +++ b/tests/docker/dockerfiles/fedora-win64-cross.docker @@ -20,6 +20,7 @@ exec "$@"\n' > /usr/bin/nosync && \ nosync dnf install -y \ bash \ bc \ + bindgen-cli \ bison \ bzip2 \ ca-certificates \ @@ -53,6 +54,7 @@ exec "$@"\n' > /usr/bin/nosync && \ python3-sphinx \ python3-sphinx_rtd_theme \ python3-zombie-imp \ + rust \ sed \ socat \ sparse \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index ecdefaff1a..1980421f6a 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -23,6 +23,7 @@ exec "$@"\n' > /usr/bin/nosync && \ alsa-lib-devel \ bash \ bc \ + bindgen-cli \ bison \ brlapi-devel \ bzip2 \ @@ -113,6 +114,7 @@ exec "$@"\n' > /usr/bin/nosync && \ python3-sphinx_rtd_theme \ python3-zombie-imp \ rdma-core-devel \ + rust \ sed \ snappy-devel \ socat \ diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker index e359a4e5c1..53b9461030 100644 --- a/tests/docker/dockerfiles/opensuse-leap.docker +++ b/tests/docker/dockerfiles/opensuse-leap.docker @@ -96,6 +96,8 @@ RUN zypper update -y && \ python311-pip \ python311-setuptools \ rdma-core-devel \ + rust \ + rust-bindgen \ sed \ snappy-devel \ sndio-devel \ diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker index 3a7de6a318..ce3aa39d4f 100644 --- a/tests/docker/dockerfiles/ubuntu2204.docker +++ b/tests/docker/dockerfiles/ubuntu2204.docker @@ -13,6 +13,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ eatmydata apt-get install --no-install-recommends -y \ bash \ bc \ + bindgen \ bison \ bsdextrautils \ bzip2 \ @@ -120,6 +121,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ python3-venv \ python3-yaml \ rpm2cpio \ + rustc \ sed \ socat \ sparse \ diff --git a/tests/lcitool/projects/qemu.yml b/tests/lcitool/projects/qemu.yml index 252e871f80..6852918642 100644 --- a/tests/lcitool/projects/qemu.yml +++ b/tests/lcitool/projects/qemu.yml @@ -3,6 +3,7 @@ packages: - alsa - bash - bc + - bindgen - bison - brlapi - bzip2 @@ -101,6 +102,7 @@ packages: - python3-tomli - python3-venv - rpm2cpio + - rust - sdl2 - sdl2-image - sed diff --git a/tests/vm/generated/freebsd.json b/tests/vm/generated/freebsd.json index 1eb2757c95..5da8d30bcd 100644 --- a/tests/vm/generated/freebsd.json +++ b/tests/vm/generated/freebsd.json @@ -61,6 +61,8 @@ "py311-tomli", "python3", "rpm2cpio", + "rust", + "rust-bindgen-cli", "sdl2", "sdl2_image", "snappy", From 15195de6a93438be99fdf9a90992c4228527130d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Tue, 15 Oct 2024 14:39:25 +0100 Subject: [PATCH 03/49] ci: enable rust in the Fedora system build job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We previously added a new job running Fedora with nightly rust toolchain. The standard rust toolchain distributed by Fedora is new enough, however, to let us enable a CI build with that too. Signed-off-by: Daniel P. Berrangé Link: https://lore.kernel.org/r/20241015133925.311587-3-berrange@redhat.com Signed-off-by: Paolo Bonzini --- .gitlab-ci.d/buildtest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index f0cbdf1992..19ba5b9c81 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -115,7 +115,7 @@ build-system-fedora: job: amd64-fedora-container variables: IMAGE: fedora - CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs --enable-crypto-afalg + CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs --enable-crypto-afalg --enable-rust TARGETS: microblaze-softmmu mips-softmmu xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu MAKE_CHECK_ARGS: check-build From 14bde8cd7613753182baee636f216cb2d840a9e3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 21 Oct 2024 08:59:03 +0200 Subject: [PATCH 04/49] target/i386: fix CPUID check for LFENCE and SFENCE LFENCE and SFENCE were introduced with the original SSE instruction set; marking them incorrectly as cpuid(SSE2) causes failures for CPU models that lack SSE2, for example pentium3. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 1f19371646..48bf730cd3 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -345,9 +345,9 @@ static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), - [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00), + [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE) p_00), [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00), - [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00), + [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE) p_00), }; static const X86OpEntry group15_mem[8] = { From b57e4e785b408b59b82d834501b37a57b837d203 Mon Sep 17 00:00:00 2001 From: Pierrick Bouvier Date: Wed, 23 Oct 2024 00:39:14 -0700 Subject: [PATCH 05/49] scripts: remove erroneous file that breaks git clone on Windows This file was created by mistake in recent ed7667188 (9p: remove 'proxy' filesystem backend driver). When cloning the repository using native git for windows, we see this: Error: error: invalid path 'scripts/meson-buildoptions.' Error: The process 'C:\Program Files\Git\bin\git.exe' failed with exit code 128 Link: https://lore.kernel.org/r/20241023073914.895438-1-pierrick.bouvier@linaro.org Signed-off-by: Paolo Bonzini --- scripts/meson-buildoptions. | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 scripts/meson-buildoptions. diff --git a/scripts/meson-buildoptions. b/scripts/meson-buildoptions. deleted file mode 100644 index e69de29bb2..0000000000 From 0665b3f9925ee7041e6a8eee9d1deda59a726383 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 27 Oct 2024 14:04:10 +0100 Subject: [PATCH 06/49] configure: detect 64-bit MIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While right now 64-bit MIPS and 32-bit MIPS share the code in QEMU, Rust uses different rules for the target. Set $cpu correctly to either mips or mips64 (--cpu=mips64* is already accepted in the case statement that canonicalizes cpu/host_arch/linux_arch), and adjust the checks to account for the different between $cpu (which handles mips/mips64 separately) and $host_arch (which does not). Fixes: 1a6ef6ff624 ("configure, meson: detect Rust toolchain", 2024-10-11) Acked-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- configure | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 4a0159e183..f24940ca77 100755 --- a/configure +++ b/configure @@ -395,7 +395,11 @@ elif check_define _ARCH_PPC ; then cpu="ppc" fi elif check_define __mips__ ; then - cpu="mips" + if check_define __mips64 ; then + cpu="mips64" + else + cpu="mips" + fi elif check_define __s390__ ; then if check_define __s390x__ ; then cpu="s390x" @@ -1230,7 +1234,7 @@ EOF fi fi - case "$host_arch" in + case "$cpu" in arm) # e.g. arm-unknown-linux-gnueabi, arm-unknown-linux-gnueabihf write_c_skeleton @@ -1278,7 +1282,7 @@ EOF test "$rust_arch" = arm && test "$rust_os" != linux && rust_arch=armv7 ;; - mips|mips64) + mips) # preserve ISA version (mipsisa64r6 etc.) and include endianness rust_arch=${raw_cpu%el} test "$bigendian" = no && rust_arch=${rust_arch}el From 14ed29da419f2bfcf177af21348e0e3c643ac6b0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 27 Oct 2024 14:07:01 +0100 Subject: [PATCH 07/49] configure, meson: deprecate 32-bit MIPS The mipsel architecture is not available in Debian Trixie, and it will likely be a hard failure as soon as we drop support for the old Rust toolchain in Debian Bookworm. Prepare by deprecating 32-bit little endian MIPS in QEMU 9.2. Signed-off-by: Paolo Bonzini --- docs/about/build-platforms.rst | 2 +- docs/about/deprecated.rst | 12 ++++++++---- meson.build | 8 ++++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst index 8fd7da140a..b779eb5493 100644 --- a/docs/about/build-platforms.rst +++ b/docs/about/build-platforms.rst @@ -41,7 +41,7 @@ Those hosts are officially supported, with various accelerators: - Accelerators * - Arm - kvm (64 bit only), tcg, xen - * - MIPS (little endian only) + * - MIPS (64 bit little endian only) - kvm, tcg * - PPC - kvm, tcg diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 1e1e9f5f18..7c2be89325 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -164,15 +164,19 @@ property types. Host Architectures ------------------ -BE MIPS (since 7.2) -''''''''''''''''''' +Big endian MIPS since 7.2; 32-bit little endian MIPS since 9.2 +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' As Debian 10 ("Buster") moved into LTS the big endian 32 bit version of MIPS moved out of support making it hard to maintain our cross-compilation CI tests of the architecture. As we no longer have CI coverage support may bitrot away before the deprecation process -completes. The little endian variants of MIPS (both 32 and 64 bit) are -still a supported host architecture. +completes. + +Likewise, the little endian variant of 32 bit MIPS is not supported by +Debian 13 ("Trixie") and newer. + +64 bit little endian MIPS is still a supported host architecture. System emulation on 32-bit x86 hosts (since 8.0) '''''''''''''''''''''''''''''''''''''''''''''''' diff --git a/meson.build b/meson.build index f7d4517521..bae7a4370d 100644 --- a/meson.build +++ b/meson.build @@ -4720,6 +4720,14 @@ if host_arch == 'unknown' message('configure has succeeded and you can continue to build, but') message('QEMU will use a slow interpreter to emulate the target CPU.') endif +elif host_arch == 'mips' + message() + warning('DEPRECATED HOST CPU') + message() + message('Support for CPU host architecture ' + cpu + ' is going to be') + message('dropped as soon as the QEMU project stops supporting Debian 12') + message('("Bookworm"). Going forward, the QEMU project will not guarantee') + message('that QEMU will compile or work on this host CPU.') endif if not supported_oses.contains(host_os) From 3139ad088b52ab840d760d40962fc0db57c62d83 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Tue, 22 Oct 2024 10:36:28 +0800 Subject: [PATCH 08/49] MAINTAINERS: Add myself as a reviewer of x86 general architecture support X86 architecture has always been a focus of my work. I would like to help to review more related patches. Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20241022023628.1743686-1-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f48d9142b8..58c71f753a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -132,6 +132,7 @@ F: configs/targets/mips* X86 general architecture support M: Paolo Bonzini +R: Zhao Liu S: Maintained F: configs/devices/i386-softmmu/default.mak F: configs/targets/i386-softmmu.mak From 8aade934dfa6ef9a1ca20666078e7c2d19e56368 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Oct 2024 10:43:16 +0100 Subject: [PATCH 09/49] accel: remove dead statement and useless assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ops is assigned again just below, and the result of the assignment must be non-NULL. Originally, the check for NULL was meant to be a check for the existence of the ops class: ops = ACCEL_OPS_CLASS(object_class_by_name(ops_name)); ... g_assert(ops != NULL); (where the ops assignment begot the one that I am removing); but this is meaningless now that oc is checked to be non-NULL before ops is assigned (commit 5141e9a23fc, "accel: abort if we fail to load the accelerator plugin", 2022-11-06). Reviewed-by: Alex Bennée Signed-off-by: Paolo Bonzini --- accel/accel-system.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/accel/accel-system.c b/accel/accel-system.c index f6c947dd82..61d689935e 100644 --- a/accel/accel-system.c +++ b/accel/accel-system.c @@ -73,19 +73,17 @@ void accel_system_init_ops_interfaces(AccelClass *ac) g_assert(ac_name != NULL); ops_name = g_strdup_printf("%s" ACCEL_OPS_SUFFIX, ac_name); - ops = ACCEL_OPS_CLASS(module_object_class_by_name(ops_name)); oc = module_object_class_by_name(ops_name); if (!oc) { error_report("fatal: could not load module for type '%s'", ops_name); exit(1); } g_free(ops_name); - ops = ACCEL_OPS_CLASS(oc); /* * all accelerators need to define ops, providing at least a mandatory * non-NULL create_vcpu_thread operation. */ - g_assert(ops != NULL); + ops = ACCEL_OPS_CLASS(oc); if (ops->ops_init) { ops->ops_init(ops); } From 855bdb6c8a60ae20043531dc965fcb1ed171d7d9 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Sun, 29 Sep 2024 04:57:47 -0400 Subject: [PATCH 10/49] i386/cpu: Drop the check of phys_bits in host_cpu_realizefn() The check of cpu->phys_bits to be in range between [32, TARGET_PHYS_ADDR_SPACE_BITS] in host_cpu_realizefn() is duplicated with check in x86_cpu_realizefn(). Since the ckeck in x86_cpu_realizefn() is called later and can cover all the x86 cases. Remove the one in host_cpu_realizefn(). Opportunistically adjust cpu->phys_bits directly in host_cpu_adjust_phys_bits(), which matches more with the function name. Signed-off-by: Xiaoyao Li Reviewed-by: Igor Mammedov Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240929085747.2023198-1-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini --- target/i386/host-cpu.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index 8b8bf5afec..03b9d1b169 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -42,7 +42,7 @@ static uint32_t host_cpu_phys_bits(void) return host_phys_bits; } -static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) +static void host_cpu_adjust_phys_bits(X86CPU *cpu) { uint32_t host_phys_bits = host_cpu_phys_bits(); uint32_t phys_bits = cpu->phys_bits; @@ -66,7 +66,7 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) } } - return phys_bits; + cpu->phys_bits = phys_bits; } bool host_cpu_realizefn(CPUState *cs, Error **errp) @@ -75,17 +75,7 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp) CPUX86State *env = &cpu->env; if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { - uint32_t phys_bits = host_cpu_adjust_phys_bits(cpu); - - if (phys_bits && - (phys_bits > TARGET_PHYS_ADDR_SPACE_BITS || - phys_bits < 32)) { - error_setg(errp, "phys-bits should be between 32 and %u " - " (but is %u)", - TARGET_PHYS_ADDR_SPACE_BITS, phys_bits); - return false; - } - cpu->phys_bits = phys_bits; + host_cpu_adjust_phys_bits(cpu); } return true; } From 845b54efafa5c28040570dcb6d7f8f84d23e37f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Oct 2024 10:20:46 +0100 Subject: [PATCH 11/49] qom: remove unused function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function has been unused since commit 4fa28f23906 ("ppc/pnv: Instantiate cores separately", 2019-12-17). The idea was that you could use it to build an array of objects via pointer arithmetic, but no one is doing it anymore. Cc: Dr. David Alan Gilbert Reviewed-by: Daniel P. Berrangé Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- include/qom/object.h | 8 -------- qom/object.c | 8 -------- 2 files changed, 16 deletions(-) diff --git a/include/qom/object.h b/include/qom/object.h index 2af9854675..43c135984a 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -2032,14 +2032,6 @@ int object_child_foreach_recursive(Object *obj, */ Object *container_get(Object *root, const char *path); -/** - * object_type_get_instance_size: - * @typename: Name of the Type whose instance_size is required - * - * Returns the instance_size of the given @typename. - */ -size_t object_type_get_instance_size(const char *typename); - /** * object_property_help: * @name: the name of the property diff --git a/qom/object.c b/qom/object.c index 11424cf471..8b26941448 100644 --- a/qom/object.c +++ b/qom/object.c @@ -262,14 +262,6 @@ static size_t type_object_get_align(TypeImpl *ti) return 0; } -size_t object_type_get_instance_size(const char *typename) -{ - TypeImpl *type = type_get_by_name(typename); - - g_assert(type != NULL); - return type_object_get_size(type); -} - static bool type_is_ancestor(TypeImpl *type, TypeImpl *target_type) { assert(target_type); From b801e3cb2a7fd631a219222a8cbe9d554c906070 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Oct 2024 10:45:51 +0100 Subject: [PATCH 12/49] qom: use object_new_with_class when possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A small optimization/code simplification, that also makes it clear that we won't look for a type in a not-loaded-yet module---the module will have been loaded by a call to module_object_class_by_name(), if present. Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/core/qdev.c | 5 +++-- qom/object_interfaces.c | 2 +- qom/qom-qmp-cmds.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index db36f54d91..2f740fa55e 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -167,10 +167,11 @@ DeviceState *qdev_new(const char *name) DeviceState *qdev_try_new(const char *name) { - if (!module_object_class_by_name(name)) { + ObjectClass *oc = module_object_class_by_name(name); + if (!oc) { return NULL; } - return DEVICE(object_new(name)); + return DEVICE(object_new_with_class(oc)); } static QTAILQ_HEAD(, DeviceListener) device_listeners diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c index e0833c8bfe..1f2aa13306 100644 --- a/qom/object_interfaces.c +++ b/qom/object_interfaces.c @@ -108,7 +108,7 @@ Object *user_creatable_add_type(const char *type, const char *id, } assert(qdict); - obj = object_new(type); + obj = object_new_with_class(klass); object_set_properties_from_qdict(obj, qdict, v, &local_err); if (local_err) { goto out; diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c index e91a235347..69a8e17aa8 100644 --- a/qom/qom-qmp-cmds.c +++ b/qom/qom-qmp-cmds.c @@ -141,7 +141,7 @@ ObjectPropertyInfoList *qmp_device_list_properties(const char *typename, return NULL; } - obj = object_new(typename); + obj = object_new_with_class(klass); object_property_iter_init(&iter, obj); while ((prop = object_property_iter_next(&iter))) { From 144d80f69e9ee614bf7fb06ad586cef610cec0f7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Oct 2024 10:31:30 +0100 Subject: [PATCH 13/49] qom: centralize module-loading functionality Put together the common code of object_initialize() and module_object_class_by_name() into a function that supports Error **. Rename the existing function type_get_by_name() to clarify that it will only look at defined types; this is often okay within object.c to look at the parents, but not outside it. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- qom/object.c | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/qom/object.c b/qom/object.c index 8b26941448..29155c6463 100644 --- a/qom/object.c +++ b/qom/object.c @@ -195,7 +195,7 @@ void type_register_static_array(const TypeInfo *infos, int nr_infos) } } -static TypeImpl *type_get_by_name(const char *name) +static TypeImpl *type_get_by_name_noload(const char *name) { if (name == NULL) { return NULL; @@ -204,10 +204,32 @@ static TypeImpl *type_get_by_name(const char *name) return type_table_lookup(name); } +static TypeImpl *type_get_or_load_by_name(const char *name, Error **errp) +{ + TypeImpl *type = type_get_by_name_noload(name); + +#ifdef CONFIG_MODULES + if (!type) { + int rv = module_load_qom(name, errp); + if (rv > 0) { + type = type_get_by_name_noload(name); + } else { + error_prepend(errp, "could not load a module for type '%s'", name); + return NULL; + } + } +#endif + if (!type) { + error_setg(errp, "unknown type '%s'", name); + } + + return type; +} + static TypeImpl *type_get_parent(TypeImpl *type) { if (!type->parent_type && type->parent) { - type->parent_type = type_get_by_name(type->parent); + type->parent_type = type_get_by_name_noload(type->parent); if (!type->parent_type) { fprintf(stderr, "Type '%s' is missing its parent '%s'\n", type->name, type->parent); @@ -363,7 +385,7 @@ static void type_initialize(TypeImpl *ti) } for (i = 0; i < ti->num_interfaces; i++) { - TypeImpl *t = type_get_by_name(ti->interfaces[i].typename); + TypeImpl *t = type_get_by_name_noload(ti->interfaces[i].typename); if (!t) { error_report("missing interface '%s' for object '%s'", ti->interfaces[i].typename, parent->name); @@ -557,23 +579,7 @@ static void object_initialize_with_type(Object *obj, size_t size, TypeImpl *type void object_initialize(void *data, size_t size, const char *typename) { - TypeImpl *type = type_get_by_name(typename); - -#ifdef CONFIG_MODULES - if (!type) { - int rv = module_load_qom(typename, &error_fatal); - if (rv > 0) { - type = type_get_by_name(typename); - } else { - error_report("missing object type '%s'", typename); - exit(1); - } - } -#endif - if (!type) { - error_report("missing object type '%s'", typename); - abort(); - } + TypeImpl *type = type_get_or_load_by_name(typename, &error_fatal); object_initialize_with_type(data, size, type); } @@ -784,7 +790,7 @@ Object *object_new_with_class(ObjectClass *klass) Object *object_new(const char *typename) { - TypeImpl *ti = type_get_by_name(typename); + TypeImpl *ti = type_get_by_name_noload(typename); return object_new_with_type(ti); } @@ -957,7 +963,7 @@ ObjectClass *object_class_dynamic_cast(ObjectClass *class, return class; } - target_type = type_get_by_name(typename); + target_type = type_get_by_name_noload(typename); if (!target_type) { /* target class type unknown, so fail the cast */ return NULL; @@ -1055,7 +1061,7 @@ const char *object_class_get_name(ObjectClass *klass) ObjectClass *object_class_by_name(const char *typename) { - TypeImpl *type = type_get_by_name(typename); + TypeImpl *type = type_get_by_name_noload(typename); if (!type) { return NULL; @@ -1068,21 +1074,15 @@ ObjectClass *object_class_by_name(const char *typename) ObjectClass *module_object_class_by_name(const char *typename) { - ObjectClass *oc; + TypeImpl *type = type_get_or_load_by_name(typename, NULL); - oc = object_class_by_name(typename); -#ifdef CONFIG_MODULES - if (!oc) { - Error *local_err = NULL; - int rv = module_load_qom(typename, &local_err); - if (rv > 0) { - oc = object_class_by_name(typename); - } else if (rv < 0) { - error_report_err(local_err); - } + if (!type) { + return NULL; } -#endif - return oc; + + type_initialize(type); + + return type->class; } ObjectClass *object_class_get_parent(ObjectClass *class) From 02009a12bcd7927a968df7641eaa609b659b3470 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Oct 2024 10:41:58 +0100 Subject: [PATCH 14/49] qom: let object_new use a module if the type is not present object_initialize() can use modules (it was added there because virtio-gpu-device is a child device of virtio-gpu-pci; commit 64f7aece8ea, "object_initialize: try module load", 2020-09-15). object_new() cannot; make things consistent. qdev_new() is now just a simple wrapper that returns DeviceState. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- hw/core/qdev.c | 16 ---------------- qom/object.c | 2 +- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 2f740fa55e..5f13111b77 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -146,22 +146,6 @@ bool qdev_set_parent_bus(DeviceState *dev, BusState *bus, Error **errp) DeviceState *qdev_new(const char *name) { - ObjectClass *oc = object_class_by_name(name); -#ifdef CONFIG_MODULES - if (!oc) { - int rv = module_load_qom(name, &error_fatal); - if (rv > 0) { - oc = object_class_by_name(name); - } else { - error_report("could not find a module for type '%s'", name); - exit(1); - } - } -#endif - if (!oc) { - error_report("unknown type '%s'", name); - abort(); - } return DEVICE(object_new(name)); } diff --git a/qom/object.c b/qom/object.c index 29155c6463..9edc06d391 100644 --- a/qom/object.c +++ b/qom/object.c @@ -790,7 +790,7 @@ Object *object_new_with_class(ObjectClass *klass) Object *object_new(const char *typename) { - TypeImpl *ti = type_get_by_name_noload(typename); + TypeImpl *ti = type_get_or_load_by_name(typename, &error_fatal); return object_new_with_type(ti); } From f41823e059d3460e116d1c29d8577649d48fcf33 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Oct 2024 10:46:38 +0100 Subject: [PATCH 15/49] qom: allow user-creatable classes to be in modules There is no real reason to make user-creatable classes different from other backends in this respect. This also allows modularized character devices to be treated by qom-list-properties just like builtin ones. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- qom/object_interfaces.c | 2 +- qom/qom-qmp-cmds.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c index 1f2aa13306..1a6f29c053 100644 --- a/qom/object_interfaces.c +++ b/qom/object_interfaces.c @@ -90,7 +90,7 @@ Object *user_creatable_add_type(const char *type, const char *id, return NULL; } - klass = object_class_by_name(type); + klass = module_object_class_by_name(type); if (!klass) { error_setg(errp, "invalid object type: %s", type); return NULL; diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c index 69a8e17aa8..46e4562300 100644 --- a/qom/qom-qmp-cmds.c +++ b/qom/qom-qmp-cmds.c @@ -186,7 +186,7 @@ ObjectPropertyInfoList *qmp_qom_list_properties(const char *typename, ObjectPropertyIterator iter; ObjectPropertyInfoList *prop_list = NULL; - klass = object_class_by_name(typename); + klass = module_object_class_by_name(typename); if (klass == NULL) { error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "Class '%s' not found", typename); From 9c882ad4dc96f658ff9f92b88b3749d0398e6fa2 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 24 Oct 2024 17:18:19 -0500 Subject: [PATCH 16/49] target/i386: Fix minor typo in NO_NESTED_DATA_BP feature bit Rename CPUID_8000_0021_EAX_No_NESTED_DATA_BP to CPUID_8000_0021_EAX_NO_NESTED_DATA_BP. No functional change intended. Signed-off-by: Babu Moger Link: https://lore.kernel.org/r/a6749acd125670d3930f4ca31736a91b1d965f2f.1729807947.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1ff1af032e..94faff83cd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5226,7 +5226,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, .features[FEAT_8000_0021_EAX] = - CPUID_8000_0021_EAX_No_NESTED_DATA_BP | + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | CPUID_8000_0021_EAX_AUTO_IBRS, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 74886d1580..9eb45faa65 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1014,7 +1014,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) /* Processor ignores nested data breakpoints */ -#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) +#define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ From 209b0ac12074341d0093985eb9ad3e7edb252ce5 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 Oct 2024 17:18:21 -0500 Subject: [PATCH 17/49] target/i386: Add PerfMonV2 feature bit CPUID leaf 0x80000022, i.e. ExtPerfMonAndDbg, advertises new performance monitoring features for AMD processors. Bit 0 of EAX indicates support for Performance Monitoring Version 2 (PerfMonV2) features. If found to be set during PMU initialization, the EBX bits can be used to determine the number of available counters for different PMUs. It also denotes the availability of global control and status registers. Add the required CPUID feature word and feature bit to allow guests to make use of the PerfMonV2 features. Signed-off-by: Sandipan Das Signed-off-by: Babu Moger Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/a96f00ee2637674c63c61e9fc4dee343ea818053.1729807947.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 26 ++++++++++++++++++++++++++ target/i386/cpu.h | 4 ++++ 2 files changed, 30 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 94faff83cd..846d3a9ef9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1227,6 +1227,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0022_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "perfmon-v2", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000022, .reg = R_EAX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_XSAVE] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -7010,6 +7026,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; } break; + case 0x80000022: + *eax = *ebx = *ecx = *edx = 0; + /* AMD Extended Performance Monitoring and Debug */ + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_8000_0022_EAX] & CPUID_8000_0022_EAX_PERFMON_V2)) { + *eax |= CPUID_8000_0022_EAX_PERFMON_V2; + *ebx |= kvm_arch_get_supported_cpuid(cs->kvm_state, index, count, + R_EBX) & 0xf; + } + break; case 0xC0000000: *eax = env->cpuid_xlevel2; *ebx = 0; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9eb45faa65..e0dea1ba54 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -634,6 +634,7 @@ typedef enum FeatureWord { FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ + FEAT_8000_0022_EAX, /* CPUID[8000_0022].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ @@ -1022,6 +1023,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Performance Monitoring Version 2 */ +#define CPUID_8000_0022_EAX_PERFMON_V2 (1U << 0) + #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) From 2ec282b8eaaddf5c136f7566b5f61d80288a2065 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 24 Oct 2024 17:18:23 -0500 Subject: [PATCH 18/49] target/i386: Expose bits related to SRSO vulnerability Add following bits related Speculative Return Stack Overflow (SRSO). Guests can make use of these bits if supported. These bits are reported via CPUID Fn8000_0021_EAX. =================================================================== Bit Feature Description =================================================================== 27 SBPB Indicates support for the Selective Branch Predictor Barrier. 28 IBPB_BRTYPE MSR_PRED_CMD[IBPB] flushes all branch type predictions. 29 SRSO_NO Not vulnerable to SRSO. 30 SRSO_USER_KERNEL_NO Not vulnerable to SRSO at the user-kernel boundary. =================================================================== Link: https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf Link: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/57238.zip Signed-off-by: Babu Moger Link: https://lore.kernel.org/r/dadbd70c38f4e165418d193918a3747bd715c5f4.1729807947.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 846d3a9ef9..8d4d3d9e3d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1221,7 +1221,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "sbpb", - "ibpb-brtype", NULL, NULL, NULL, + "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, .tcg_features = 0, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e0dea1ba54..792518b62d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1015,13 +1015,21 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) /* Processor ignores nested data breakpoints */ -#define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +#define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ -#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) +#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ -#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Selective Branch Predictor Barrier */ +#define CPUID_8000_0021_EAX_SBPB (1U << 27) +/* IBPB includes branch type prediction flushing */ +#define CPUID_8000_0021_EAX_IBPB_BRTYPE (1U << 28) +/* Not vulnerable to Speculative Return Stack Overflow */ +#define CPUID_8000_0021_EAX_SRSO_NO (1U << 29) +/* Not vulnerable to SRSO at the user-kernel boundary */ +#define CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO (1U << 30) /* Performance Monitoring Version 2 */ #define CPUID_8000_0022_EAX_PERFMON_V2 (1U << 0) From 9c07a7af5da66f11a97f56ad1b21f3b12e138a67 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 24 Oct 2024 17:18:24 -0500 Subject: [PATCH 19/49] target/i386: Expose new feature bits in CPUID 8000_0021_EAX/EBX Newer AMD CPUs support ERAPS (Enhanced Return Address Prediction Security) feature that enables the auto-clear of RSB entries on a TLB flush, context switches and VMEXITs. The number of default RSP entries is reflected in RapSize. Add the feature bit and feature word to support these features. CPUID_Fn80000021_EAX Bits Feature Description 24 ERAPS: Indicates support for enhanced return address predictor security. CPUID_Fn80000021_EBX Bits Feature Description 31-24 Reserved 23:16 RapSize: Return Address Predictor size. RapSize x 8 is the minimum number of CALL instructions software needs to execute to flush the RAP. 15-00 MicrocodePatchSize. Read-only. Reports the size of the Microcode patch in 16-byte multiples. If 0, the size of the patch is at most 5568 (15C0h) bytes. Link: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/57238.zip Signed-off-by: Babu Moger Link: https://lore.kernel.org/r/7c62371fe60af1e9bbd853f5f8e949bf2d908bd0.1729807947.git.babu.moger@amd.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 11 +++++++++-- target/i386/cpu.h | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 8d4d3d9e3d..5886b44fcf 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1220,13 +1220,19 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "sbpb", + "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0021_EBX] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { .eax = 0x80000021, .reg = R_EBX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_8000_0022_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -7069,8 +7075,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; case 0x80000021: + *eax = *ebx = *ecx = *edx = 0; *eax = env->features[FEAT_8000_0021_EAX]; - *ebx = *ecx = *edx = 0; + *ebx = env->features[FEAT_8000_0021_EBX]; break; default: /* reserved values: zero */ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 792518b62d..e2e10f55b2 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -634,6 +634,7 @@ typedef enum FeatureWord { FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ + FEAT_8000_0021_EBX, /* CPUID[8000_0021].EBX */ FEAT_8000_0022_EAX, /* CPUID[8000_0022].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ @@ -1022,6 +1023,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Enhanced Return Address Predictor Scurity */ +#define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ #define CPUID_8000_0021_EAX_SBPB (1U << 27) /* IBPB includes branch type prediction flushing */ @@ -1031,6 +1034,12 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Not vulnerable to SRSO at the user-kernel boundary */ #define CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO (1U << 30) +/* + * Return Address Predictor size. RapSize x 8 is the minimum number of + * CALL instructions software needs to execute to flush the RAP. + */ +#define CPUID_8000_0021_EBX_RAPSIZE (8U << 16) + /* Performance Monitoring Version 2 */ #define CPUID_8000_0022_EAX_PERFMON_V2 (1U << 0) From 7cac7aa7040a823c585f1578a38f28e83c8bf3e1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Oct 2024 16:31:26 +0100 Subject: [PATCH 20/49] target/i386/hvf: fix handling of XSAVE-related CPUID bits The call to xgetbv() is passing the ecx value for cpuid function 0xD, index 0. The xgetbv call thus returns false (OSXSAVE is bit 27, which is well out of the range of CPUID[0xD,0].ECX) and eax is not modified. While fixing it, cache the whole computation of supported XCR0 bits since it will be used for more than just CPUID leaf 0xD. Furthermore, unsupported subleafs of CPUID 0xD (including all those corresponding to zero bits in host's XCR0) must be hidden; if OSXSAVE is not set at all, the whole of CPUID leaf 0xD plus the XSAVE bit must be hidden. Finally, unconditionally drop XSTATE_BNDREGS_MASK and XSTATE_BNDCSR_MASK; real hardware will only show them if the MPX bit is set in CPUID; this is never the case for hvf_get_supported_cpuid() because QEMU's Hypervisor.framework support does not handle the VMX fields related to MPX (even in the unlikely possibility that the host has MPX enabled). So hide those bits in the new cache_host_xcr0(). Cc: Phil Dennis-Jordan Signed-off-by: Paolo Bonzini --- host/include/i386/host/cpuinfo.h | 1 + target/i386/hvf/x86_cpuid.c | 56 +++++++++++++++++++------------- util/cpuinfo-i386.c | 1 + 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h index 81771733ea..9541a64da6 100644 --- a/host/include/i386/host/cpuinfo.h +++ b/host/include/i386/host/cpuinfo.h @@ -9,6 +9,7 @@ /* Digested version of */ #define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */ +#define CPUINFO_OSXSAVE (1u << 1) #define CPUINFO_MOVBE (1u << 2) #define CPUINFO_LZCNT (1u << 3) #define CPUINFO_POPCNT (1u << 4) diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index e56cd8411b..3a116548a3 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -21,28 +21,38 @@ */ #include "qemu/osdep.h" +#include "qemu/cpuid.h" +#include "host/cpuinfo.h" #include "cpu.h" #include "x86.h" #include "vmx.h" #include "sysemu/hvf.h" #include "hvf-i386.h" -static bool xgetbv(uint32_t cpuid_ecx, uint32_t idx, uint64_t *xcr) +static bool cached_xcr0; +static uint64_t supported_xcr0; + +static void cache_host_xcr0() { - uint32_t xcrl, xcrh; - - if (cpuid_ecx & CPUID_EXT_OSXSAVE) { - /* - * The xgetbv instruction is not available to older versions of - * the assembler, so we encode the instruction manually. - */ - asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (idx)); - - *xcr = (((uint64_t)xcrh) << 32) | xcrl; - return true; + if (cached_xcr0) { + return; } - return false; + if (cpuinfo & CPUINFO_OSXSAVE) { + uint64_t host_xcr0 = xgetbv_low(0); + + /* Only show xcr0 bits corresponding to usable features. */ + supported_xcr0 = host_xcr0 & (XSTATE_FP_MASK | + XSTATE_SSE_MASK | XSTATE_YMM_MASK | + XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | + XSTATE_Hi16_ZMM_MASK); + if ((supported_xcr0 & (XSTATE_FP_MASK | XSTATE_SSE_MASK)) != + (XSTATE_FP_MASK | XSTATE_SSE_MASK)) { + supported_xcr0 = 0; + } + } + + cached_xcr0 = true; } uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, @@ -51,6 +61,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, uint64_t cap; uint32_t eax, ebx, ecx, edx; + cache_host_xcr0(); host_cpuid(func, idx, &eax, &ebx, &ecx, &edx); switch (func) { @@ -66,7 +77,8 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE | - CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | + (supported_xcr0 ? CPUID_EXT_XSAVE : 0) | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND; ecx |= CPUID_EXT_HYPERVISOR; break; @@ -107,16 +119,14 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, eax = 0; break; case 0xD: + if (!supported_xcr0 || + (idx > 1 && !(supported_xcr0 & (1 << idx)))) { + eax = ebx = ecx = edx = 0; + break; + } + if (idx == 0) { - uint64_t host_xcr0; - if (xgetbv(ecx, 0, &host_xcr0)) { - uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK | - XSTATE_SSE_MASK | XSTATE_YMM_MASK | - XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | - XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | - XSTATE_Hi16_ZMM_MASK); - eax &= supp_xcr0; - } + eax = supported_xcr0; } else if (idx == 1) { hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap); eax &= CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1; diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 90f92a42dc..c8c8a1b370 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -35,6 +35,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) __cpuid(1, a, b, c, d); info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0); + info |= (c & bit_OSXSAVE ? CPUINFO_OSXSAVE : 0); info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0); info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0); From 1ac32dc8eaa23e913be6afc175b2b43bf2aa5fac Mon Sep 17 00:00:00 2001 From: Dorjoy Chowdhury Date: Wed, 9 Oct 2024 03:17:22 +0600 Subject: [PATCH 21/49] tests/lcitool: Update libvirt-ci and add libcbor dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libcbor dependecy is necessary for adding virtio-nsm and nitro-enclave machine support in the following commits. libvirt-ci has already been updated with the dependency upstream and this commit updates libvirt-ci submodule in QEMU to latest upstream. Also the libcbor dependency has been added to tests/lcitool/projects/qemu.yml. Reviewed-by: Daniel P. Berrangé Signed-off-by: Dorjoy Chowdhury Reviewed-by: Alexander Graf Link: https://lore.kernel.org/r/20241008211727.49088-2-dorjoychy111@gmail.com Signed-off-by: Paolo Bonzini --- .gitlab-ci.d/cirrus/macos-14.vars | 2 +- .gitlab-ci.d/cirrus/macos-15.vars | 2 +- scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml | 1 + scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml | 1 + tests/docker/dockerfiles/alpine.docker | 1 + tests/docker/dockerfiles/debian-amd64-cross.docker | 1 + tests/docker/dockerfiles/debian-arm64-cross.docker | 1 + tests/docker/dockerfiles/debian-armhf-cross.docker | 1 + tests/docker/dockerfiles/debian-i686-cross.docker | 1 + tests/docker/dockerfiles/debian-mips64el-cross.docker | 1 + tests/docker/dockerfiles/debian-mipsel-cross.docker | 1 + tests/docker/dockerfiles/debian-ppc64el-cross.docker | 1 + tests/docker/dockerfiles/debian-s390x-cross.docker | 1 + tests/docker/dockerfiles/debian.docker | 1 + tests/docker/dockerfiles/fedora-rust-nightly.docker | 1 + tests/docker/dockerfiles/fedora.docker | 1 + tests/docker/dockerfiles/opensuse-leap.docker | 1 + tests/docker/dockerfiles/ubuntu2204.docker | 1 + tests/lcitool/projects/qemu.yml | 1 + 19 files changed, 19 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.d/cirrus/macos-14.vars b/.gitlab-ci.d/cirrus/macos-14.vars index 1fd03d68b9..25dff322e6 100644 --- a/.gitlab-ci.d/cirrus/macos-14.vars +++ b/.gitlab-ci.d/cirrus/macos-14.vars @@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake' NINJA='/opt/homebrew/bin/ninja' PACKAGING_COMMAND='brew' PIP3='/opt/homebrew/bin/pip3' -PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' +PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libcbor libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli' PYTHON='/opt/homebrew/bin/python3' diff --git a/.gitlab-ci.d/cirrus/macos-15.vars b/.gitlab-ci.d/cirrus/macos-15.vars index b6b6d71d47..a72e9ed024 100644 --- a/.gitlab-ci.d/cirrus/macos-15.vars +++ b/.gitlab-ci.d/cirrus/macos-15.vars @@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake' NINJA='/opt/homebrew/bin/ninja' PACKAGING_COMMAND='brew' PIP3='/opt/homebrew/bin/pip3' -PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' +PKGS='bash bc bindgen bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 gtk-vnc jemalloc jpeg-turbo json-c libcbor libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio rust sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd' PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli' PYTHON='/opt/homebrew/bin/python3' diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml index dd89ba1b3a..f4647940f8 100644 --- a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml +++ b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml @@ -36,6 +36,7 @@ packages: - libcacard-dev - libcap-ng-dev - libcapstone-dev + - libcbor-dev - libcmocka-dev - libcurl4-gnutls-dev - libdaxctl-dev diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml index 74f14d8d0f..fe2995c19b 100644 --- a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml +++ b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml @@ -36,6 +36,7 @@ packages: - libcacard-dev - libcap-ng-dev - libcapstone-dev + - libcbor-dev - libcmocka-dev - libcurl4-gnutls-dev - libdaxctl-dev diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker index 9ce7b5280c..f87c40fbfe 100644 --- a/tests/docker/dockerfiles/alpine.docker +++ b/tests/docker/dockerfiles/alpine.docker @@ -45,6 +45,7 @@ RUN apk update && \ libaio-dev \ libbpf-dev \ libcap-ng-dev \ + libcbor-dev \ libdrm-dev \ libepoxy-dev \ libffi-dev \ diff --git a/tests/docker/dockerfiles/debian-amd64-cross.docker b/tests/docker/dockerfiles/debian-amd64-cross.docker index b86949b2e6..d3b58c3e90 100644 --- a/tests/docker/dockerfiles/debian-amd64-cross.docker +++ b/tests/docker/dockerfiles/debian-amd64-cross.docker @@ -94,6 +94,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:amd64 \ libcap-ng-dev:amd64 \ libcapstone-dev:amd64 \ + libcbor-dev:amd64 \ libcmocka-dev:amd64 \ libcurl4-gnutls-dev:amd64 \ libdaxctl-dev:amd64 \ diff --git a/tests/docker/dockerfiles/debian-arm64-cross.docker b/tests/docker/dockerfiles/debian-arm64-cross.docker index 6878979112..4a6785bf5b 100644 --- a/tests/docker/dockerfiles/debian-arm64-cross.docker +++ b/tests/docker/dockerfiles/debian-arm64-cross.docker @@ -94,6 +94,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:arm64 \ libcap-ng-dev:arm64 \ libcapstone-dev:arm64 \ + libcbor-dev:arm64 \ libcmocka-dev:arm64 \ libcurl4-gnutls-dev:arm64 \ libdaxctl-dev:arm64 \ diff --git a/tests/docker/dockerfiles/debian-armhf-cross.docker b/tests/docker/dockerfiles/debian-armhf-cross.docker index e38b8fcc41..52e8831326 100644 --- a/tests/docker/dockerfiles/debian-armhf-cross.docker +++ b/tests/docker/dockerfiles/debian-armhf-cross.docker @@ -94,6 +94,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:armhf \ libcap-ng-dev:armhf \ libcapstone-dev:armhf \ + libcbor-dev:armhf \ libcmocka-dev:armhf \ libcurl4-gnutls-dev:armhf \ libdaxctl-dev:armhf \ diff --git a/tests/docker/dockerfiles/debian-i686-cross.docker b/tests/docker/dockerfiles/debian-i686-cross.docker index b4d0618739..1326e8a5ca 100644 --- a/tests/docker/dockerfiles/debian-i686-cross.docker +++ b/tests/docker/dockerfiles/debian-i686-cross.docker @@ -94,6 +94,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:i386 \ libcap-ng-dev:i386 \ libcapstone-dev:i386 \ + libcbor-dev:i386 \ libcmocka-dev:i386 \ libcurl4-gnutls-dev:i386 \ libdaxctl-dev:i386 \ diff --git a/tests/docker/dockerfiles/debian-mips64el-cross.docker b/tests/docker/dockerfiles/debian-mips64el-cross.docker index 4f6c816b3f..0ba542112e 100644 --- a/tests/docker/dockerfiles/debian-mips64el-cross.docker +++ b/tests/docker/dockerfiles/debian-mips64el-cross.docker @@ -93,6 +93,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:mips64el \ libcap-ng-dev:mips64el \ libcapstone-dev:mips64el \ + libcbor-dev:mips64el \ libcmocka-dev:mips64el \ libcurl4-gnutls-dev:mips64el \ libdaxctl-dev:mips64el \ diff --git a/tests/docker/dockerfiles/debian-mipsel-cross.docker b/tests/docker/dockerfiles/debian-mipsel-cross.docker index a238526b4c..59b5d2655b 100644 --- a/tests/docker/dockerfiles/debian-mipsel-cross.docker +++ b/tests/docker/dockerfiles/debian-mipsel-cross.docker @@ -93,6 +93,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:mipsel \ libcap-ng-dev:mipsel \ libcapstone-dev:mipsel \ + libcbor-dev:mipsel \ libcmocka-dev:mipsel \ libcurl4-gnutls-dev:mipsel \ libdaxctl-dev:mipsel \ diff --git a/tests/docker/dockerfiles/debian-ppc64el-cross.docker b/tests/docker/dockerfiles/debian-ppc64el-cross.docker index b6c6f5b7b0..8680b35c5a 100644 --- a/tests/docker/dockerfiles/debian-ppc64el-cross.docker +++ b/tests/docker/dockerfiles/debian-ppc64el-cross.docker @@ -94,6 +94,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:ppc64el \ libcap-ng-dev:ppc64el \ libcapstone-dev:ppc64el \ + libcbor-dev:ppc64el \ libcmocka-dev:ppc64el \ libcurl4-gnutls-dev:ppc64el \ libdaxctl-dev:ppc64el \ diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker index 14f169984e..384a2b425e 100644 --- a/tests/docker/dockerfiles/debian-s390x-cross.docker +++ b/tests/docker/dockerfiles/debian-s390x-cross.docker @@ -94,6 +94,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev:s390x \ libcap-ng-dev:s390x \ libcapstone-dev:s390x \ + libcbor-dev:s390x \ libcmocka-dev:s390x \ libcurl4-gnutls-dev:s390x \ libdaxctl-dev:s390x \ diff --git a/tests/docker/dockerfiles/debian.docker b/tests/docker/dockerfiles/debian.docker index 22b064cbf0..505330a9e2 100644 --- a/tests/docker/dockerfiles/debian.docker +++ b/tests/docker/dockerfiles/debian.docker @@ -42,6 +42,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev \ libcap-ng-dev \ libcapstone-dev \ + libcbor-dev \ libcmocka-dev \ libcurl4-gnutls-dev \ libdaxctl-dev \ diff --git a/tests/docker/dockerfiles/fedora-rust-nightly.docker b/tests/docker/dockerfiles/fedora-rust-nightly.docker index c2029cc4c6..9180c8b522 100644 --- a/tests/docker/dockerfiles/fedora-rust-nightly.docker +++ b/tests/docker/dockerfiles/fedora-rust-nightly.docker @@ -62,6 +62,7 @@ exec "$@"\n' > /usr/bin/nosync && \ libbpf-devel \ libcacard-devel \ libcap-ng-devel \ + libcbor-devel \ libcmocka-devel \ libcurl-devel \ libdrm-devel \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index 1980421f6a..b64399af66 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -62,6 +62,7 @@ exec "$@"\n' > /usr/bin/nosync && \ libbpf-devel \ libcacard-devel \ libcap-ng-devel \ + libcbor-devel \ libcmocka-devel \ libcurl-devel \ libdrm-devel \ diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker index 53b9461030..4d5fb3e3a1 100644 --- a/tests/docker/dockerfiles/opensuse-leap.docker +++ b/tests/docker/dockerfiles/opensuse-leap.docker @@ -47,6 +47,7 @@ RUN zypper update -y && \ libbz2-devel \ libcacard-devel \ libcap-ng-devel \ + libcbor-devel \ libcmocka-devel \ libcurl-devel \ libdrm-devel \ diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker index ce3aa39d4f..94697bd036 100644 --- a/tests/docker/dockerfiles/ubuntu2204.docker +++ b/tests/docker/dockerfiles/ubuntu2204.docker @@ -42,6 +42,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libcacard-dev \ libcap-ng-dev \ libcapstone-dev \ + libcbor-dev \ libcmocka-dev \ libcurl4-gnutls-dev \ libdaxctl-dev \ diff --git a/tests/lcitool/projects/qemu.yml b/tests/lcitool/projects/qemu.yml index 6852918642..80bcac0902 100644 --- a/tests/lcitool/projects/qemu.yml +++ b/tests/lcitool/projects/qemu.yml @@ -43,6 +43,7 @@ packages: - libc-static - libcacard - libcap-ng + - libcbor - libcurl - libdrm - libepoxy From bb154e3e0cc715605d915f0761e0cd7a4e64d1bd Mon Sep 17 00:00:00 2001 From: Dorjoy Chowdhury Date: Wed, 9 Oct 2024 03:17:23 +0600 Subject: [PATCH 22/49] device/virtio-nsm: Support for Nitro Secure Module device Nitro Secure Module (NSM)[1] device is used in AWS Nitro Enclaves[2] for stripped down TPM functionality like cryptographic attestation. The requests to and responses from NSM device are CBOR[3] encoded. This commit adds support for NSM device in QEMU. Although related to AWS Nitro Enclaves, the virito-nsm device is independent and can be used in other machine types as well. The libcbor[4] library has been used for the CBOR encoding and decoding functionalities. [1] https://lists.oasis-open.org/archives/virtio-comment/202310/msg00387.html [2] https://docs.aws.amazon.com/enclaves/latest/user/nitro-enclave.html [3] http://cbor.io/ [4] https://libcbor.readthedocs.io/en/latest/ Signed-off-by: Dorjoy Chowdhury Reviewed-by: Alexander Graf Link: https://lore.kernel.org/r/20241008211727.49088-3-dorjoychy111@gmail.com Signed-off-by: Paolo Bonzini --- Kconfig.host | 3 + MAINTAINERS | 10 + hw/virtio/Kconfig | 4 + hw/virtio/cbor-helpers.c | 321 ++++++ hw/virtio/meson.build | 2 + hw/virtio/virtio-nsm-pci.c | 73 ++ hw/virtio/virtio-nsm.c | 1732 ++++++++++++++++++++++++++++++ include/hw/virtio/cbor-helpers.h | 45 + include/hw/virtio/virtio-nsm.h | 49 + meson.build | 8 + meson_options.txt | 2 + scripts/meson-buildoptions.sh | 3 + 12 files changed, 2252 insertions(+) create mode 100644 hw/virtio/cbor-helpers.c create mode 100644 hw/virtio/virtio-nsm-pci.c create mode 100644 hw/virtio/virtio-nsm.c create mode 100644 include/hw/virtio/cbor-helpers.h create mode 100644 include/hw/virtio/virtio-nsm.h diff --git a/Kconfig.host b/Kconfig.host index 4ade7899d6..ccd84be60f 100644 --- a/Kconfig.host +++ b/Kconfig.host @@ -5,6 +5,9 @@ config LINUX bool +config LIBCBOR + bool + config OPENGL bool diff --git a/MAINTAINERS b/MAINTAINERS index 58c71f753a..649bd5ab1a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2293,6 +2293,16 @@ F: include/sysemu/rng*.h F: backends/rng*.c F: tests/qtest/virtio-rng-test.c +virtio-nsm +M: Alexander Graf +M: Dorjoy Chowdhury +S: Maintained +F: hw/virtio/cbor-helpers.c +F: hw/virtio/virtio-nsm.c +F: hw/virtio/virtio-nsm-pci.c +F: include/hw/virtio/cbor-helpers.h +F: include/hw/virtio/virtio-nsm.h + vhost-user-stubs M: Alex Bennée S: Maintained diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 17595ff350..70c77e183d 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -6,6 +6,10 @@ config VIRTIO_RNG default y depends on VIRTIO +config VIRTIO_NSM + bool + depends on LIBCBOR && VIRTIO + config VIRTIO_IOMMU bool default y diff --git a/hw/virtio/cbor-helpers.c b/hw/virtio/cbor-helpers.c new file mode 100644 index 0000000000..49f55df399 --- /dev/null +++ b/hw/virtio/cbor-helpers.c @@ -0,0 +1,321 @@ +/* + * QEMU CBOR helpers + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "hw/virtio/cbor-helpers.h" + +bool qemu_cbor_map_add(cbor_item_t *map, cbor_item_t *key, cbor_item_t *value) +{ + bool success = false; + struct cbor_pair pair = (struct cbor_pair) { + .key = cbor_move(key), + .value = cbor_move(value) + }; + + success = cbor_map_add(map, pair); + if (!success) { + cbor_incref(pair.key); + cbor_incref(pair.value); + } + + return success; +} + +bool qemu_cbor_array_push(cbor_item_t *array, cbor_item_t *value) +{ + bool success = false; + + success = cbor_array_push(array, cbor_move(value)); + if (!success) { + cbor_incref(value); + } + + return success; +} + +bool qemu_cbor_add_bool_to_map(cbor_item_t *map, const char *key, bool value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_bool(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint8_to_map(cbor_item_t *map, const char *key, + uint8_t value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_uint8(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_map_to_map(cbor_item_t *map, const char *key, + size_t nested_map_size, + cbor_item_t **nested_map) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_definite_map(nested_map_size); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + *nested_map = value_cbor; + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_bytestring_to_map(cbor_item_t *map, const char *key, + uint8_t *arr, size_t len) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_bytestring(arr, len); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_null_to_map(cbor_item_t *map, const char *key) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_null(); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_string_to_map(cbor_item_t *map, const char *key, + const char *value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_string(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint8_array_to_map(cbor_item_t *map, const char *key, + uint8_t *arr, size_t len) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_definite_array(len); + if (!value_cbor) { + goto cleanup; + } + + for (int i = 0; i < len; ++i) { + cbor_item_t *tmp = cbor_build_uint8(arr[i]); + if (!tmp) { + goto cleanup; + } + if (!qemu_cbor_array_push(value_cbor, tmp)) { + cbor_decref(&tmp); + goto cleanup; + } + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint8_key_bytestring_to_map(cbor_item_t *map, uint8_t key, + uint8_t *buf, size_t len) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_uint8(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_bytestring(buf, len); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint64_to_map(cbor_item_t *map, const char *key, + uint64_t value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_uint64(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 621fc65454..a5f9f7999d 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -54,6 +54,7 @@ specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) +specific_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm.c', 'cbor-helpers.c'), libcbor]) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) @@ -70,6 +71,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm-pci.c', 'cbor-helpers.c'), libcbor]) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SCSI', if_true: files('virtio-scsi-pci.c')) diff --git a/hw/virtio/virtio-nsm-pci.c b/hw/virtio/virtio-nsm-pci.c new file mode 100644 index 0000000000..dca797315a --- /dev/null +++ b/hw/virtio/virtio-nsm-pci.c @@ -0,0 +1,73 @@ +/* + * AWS Nitro Secure Module (NSM) device + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-nsm.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qom/object.h" + +typedef struct VirtIONsmPCI VirtIONsmPCI; + +#define TYPE_VIRTIO_NSM_PCI "virtio-nsm-pci-base" +DECLARE_INSTANCE_CHECKER(VirtIONsmPCI, VIRTIO_NSM_PCI, + TYPE_VIRTIO_NSM_PCI) + +struct VirtIONsmPCI { + VirtIOPCIProxy parent_obj; + VirtIONSM vdev; +}; + +static void virtio_nsm_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VirtIONsmPCI *vnsm = VIRTIO_NSM_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&vnsm->vdev); + + virtio_pci_force_virtio_1(vpci_dev); + + if (!qdev_realize(vdev, BUS(&vpci_dev->bus), errp)) { + return; + } +} + +static void virtio_nsm_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + + k->realize = virtio_nsm_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +} + +static void virtio_nsm_initfn(Object *obj) +{ + VirtIONsmPCI *dev = VIRTIO_NSM_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_NSM); +} + +static const VirtioPCIDeviceTypeInfo virtio_nsm_pci_info = { + .base_name = TYPE_VIRTIO_NSM_PCI, + .generic_name = "virtio-nsm-pci", + .instance_size = sizeof(VirtIONsmPCI), + .instance_init = virtio_nsm_initfn, + .class_init = virtio_nsm_pci_class_init, +}; + +static void virtio_nsm_pci_register(void) +{ + virtio_pci_types_register(&virtio_nsm_pci_info); +} + +type_init(virtio_nsm_pci_register) diff --git a/hw/virtio/virtio-nsm.c b/hw/virtio/virtio-nsm.c new file mode 100644 index 0000000000..a3db8eef3e --- /dev/null +++ b/hw/virtio/virtio-nsm.c @@ -0,0 +1,1732 @@ +/* + * AWS Nitro Secure Module (NSM) device + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "qemu/guest-random.h" +#include "qapi/error.h" + +#include "crypto/hash.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-nsm.h" +#include "hw/virtio/cbor-helpers.h" +#include "standard-headers/linux/virtio_ids.h" + +#define NSM_REQUEST_MAX_SIZE 0x1000 +#define NSM_RESPONSE_BUF_SIZE 0x3000 +#define NSM_RND_BUF_SIZE 256 + +enum NSMResponseTypes { + NSM_SUCCESS = 0, + NSM_INVALID_ARGUMENT = 1, + NSM_INVALID_INDEX = 2, + NSM_READONLY_INDEX = 3, + NSM_INVALID_OPERATION = 4, + NSM_BUFFER_TOO_SMALL = 5, + NSM_INPUT_TOO_LARGE = 6, + NSM_INTERNAL_ERROR = 7, +}; + +static const char *error_string(enum NSMResponseTypes type) +{ + const char *str; + switch (type) { + case NSM_INVALID_ARGUMENT: + str = "InvalidArgument"; + break; + case NSM_INVALID_INDEX: + str = "InvalidIndex"; + break; + case NSM_READONLY_INDEX: + str = "ReadOnlyIndex"; + break; + case NSM_INVALID_OPERATION: + str = "InvalidOperation"; + break; + case NSM_BUFFER_TOO_SMALL: + str = "BufferTooSmall"; + break; + case NSM_INPUT_TOO_LARGE: + str = "InputTooLarge"; + break; + default: + str = "InternalError"; + break; + } + + return str; +} + +/* + * Error response structure: + * + * { + * Map(1) { + * key = String("Error"), + * value = String(error_name) + * } + * } + * + * where error_name can be one of the following: + * InvalidArgument + * InvalidIndex + * InvalidResponse + * ReadOnlyIndex + * InvalidOperation + * BufferTooSmall + * InputTooLarge + * InternalError + */ + +static bool error_response(struct iovec *response, enum NSMResponseTypes error, + Error **errp) +{ + cbor_item_t *root; + size_t len; + bool r = false; + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_string_to_map(root, "Error", error_string(error))) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + error_setg(errp, "Response buffer is small for %s response", + error_string(error)); + goto out; + } + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize %s response", error_string(error)); + goto out; +} + +/* + * GetRandom response structure: + * + * { + * Map(1) { + * key = String("GetRandom"), + * value = Map(1) { + * key = String("random"), + * value = Byte_String() + * } + * } + * } + */ +static bool handle_get_random(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root, *nested_map; + size_t len; + uint8_t rnd[NSM_RND_BUF_SIZE]; + bool r = false; + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "GetRandom", 1, &nested_map)) { + goto err; + } + + qemu_guest_getrandom_nofail(rnd, NSM_RND_BUF_SIZE); + + if (!qemu_cbor_add_bytestring_to_map(nested_map, "random", rnd, + NSM_RND_BUF_SIZE)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize GetRandom response"); + goto out; +} + +/* + * DescribeNSM response structure: + * + * { + * Map(1) { + * key = String("DescribeNSM"), + * value = Map(7) { + * key = String("digest"), + * value = String("SHA384"), + * key = String("max_pcrs"), + * value = Uint8(32), + * key = String("module_id"), + * value = String("i-1234-enc5678"), + * key = String("locked_pcrs"), + * value = Array(), + * key = String("version_major"), + * value = Uint8(1), + * key = String("version_minor"), + * value = Uint8(0), + * key = String("version_patch"), + * value = Uint8(0) + * } + * } + * } + */ +static bool handle_describe_nsm(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root, *nested_map; + uint16_t locked_pcrs_cnt = 0; + uint8_t locked_pcrs_ind[NSM_MAX_PCRS]; + size_t len; + bool r = false; + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "DescribeNSM", 7, &nested_map)) { + goto err; + } + + if (!qemu_cbor_add_string_to_map(nested_map, "digest", vnsm->digest)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "max_pcrs", vnsm->max_pcrs)) { + goto err; + } + + if (!qemu_cbor_add_string_to_map(nested_map, "module_id", + vnsm->module_id)) { + goto err; + } + + for (uint8_t i = 0; i < NSM_MAX_PCRS; ++i) { + if (vnsm->pcrs[i].locked) { + locked_pcrs_ind[locked_pcrs_cnt++] = i; + } + } + if (!qemu_cbor_add_uint8_array_to_map(nested_map, "locked_pcrs", + locked_pcrs_ind, locked_pcrs_cnt)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "version_major", + vnsm->version_major)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "version_minor", + vnsm->version_minor)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "version_patch", + vnsm->version_patch)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize DescribeNSM response"); + goto out; +} + +/* + * DescribePCR request structure: + * + * { + * Map(1) { + * key = String("DescribePCR"), + * value = Map(1) { + * key = String("index"), + * value = Uint8(pcr) + * } + * } + * } + */ +typedef struct NSMDescribePCRReq { + uint8_t index; +} NSMDescribePCRReq; + +static enum NSMResponseTypes get_nsm_describe_pcr_req( + uint8_t *req, size_t len, + NSMDescribePCRReq *nsm_req) +{ + size_t size; + uint8_t *str; + struct cbor_pair *pair; + cbor_item_t *item = NULL; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 1) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + + str = cbor_string_handle(pair[i].key); + if (str && cbor_string_length(pair[i].key) == 5 && + memcmp(str, "index", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + break; + } + + nsm_req->index = cbor_get_uint8(pair[i].value); + r = NSM_SUCCESS; + break; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * DescribePCR response structure: + * + * { + * Map(1) { + * key = String("DescribePCR"), + * value = Map(2) { + * key = String("data"), + * value = Byte_String(), + * key = String("lock"), + * value = Bool() + * } + * } + * } + */ +static bool handle_describe_pcr(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + cbor_item_t *nested_map; + size_t len; + NSMDescribePCRReq nsm_req; + enum NSMResponseTypes type; + struct PCRInfo *pcr; + bool r = false; + + type = get_nsm_describe_pcr_req(request->iov_base, request->iov_len, + &nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto out; + } + if (nsm_req.index >= vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto out; + } + pcr = &(vnsm->pcrs[nsm_req.index]); + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "DescribePCR", 2, &nested_map)) { + goto err; + } + + if (!qemu_cbor_add_bytestring_to_map(nested_map, "data", pcr->data, + QCRYPTO_HASH_DIGEST_LEN_SHA384)) { + goto err; + } + + if (!qemu_cbor_add_bool_to_map(nested_map, "lock", pcr->locked)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize DescribePCR response"); + goto out; +} + +/* + * ExtendPCR request structure: + * + * { + * Map(1) { + * key = String("ExtendPCR"), + * value = Map(2) { + * key = String("index"), + * value = Uint8(pcr), + * key = String("data"), + * value = Byte_String(data), + * } + * } + * } + */ +typedef struct NSMExtendPCRReq { + uint8_t index; + uint16_t data_len; + uint8_t data[NSM_REQUEST_MAX_SIZE]; +} NSMExtendPCRReq; + +static enum NSMResponseTypes get_nsm_extend_pcr_req(uint8_t *req, size_t len, + NSMExtendPCRReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size ; + uint8_t *str; + bool index_found = false; + bool data_found = false; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 2) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + str = cbor_string_handle(pair[i].key); + if (!str) { + continue; + } + + if (cbor_string_length(pair[i].key) == 5 && + memcmp(str, "index", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + goto cleanup; + } + nsm_req->index = cbor_get_uint8(pair[i].value); + index_found = true; + continue; + } + + if (cbor_string_length(pair[i].key) == 4 && + memcmp(str, "data", 4) == 0) { + if (!cbor_isa_bytestring(pair[i].value)) { + goto cleanup; + } + str = cbor_bytestring_handle(pair[i].value); + if (!str) { + goto cleanup; + } + nsm_req->data_len = cbor_bytestring_length(pair[i].value); + /* + * nsm_req->data_len will be smaller than NSM_REQUEST_MAX_SIZE as + * we already check for the max request size before processing + * any request. So it's safe to copy. + */ + memcpy(nsm_req->data, str, nsm_req->data_len); + data_found = true; + continue; + } + } + + if (index_found && data_found) { + r = NSM_SUCCESS; + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * ExtendPCR response structure: + * + * { + * Map(1) { + * key = String("ExtendPCR"), + * value = Map(1) { + * key = String("data"), + * value = Byte_String() + * } + * } + * } + */ +static bool handle_extend_pcr(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + cbor_item_t *nested_map; + size_t len; + struct PCRInfo *pcr; + enum NSMResponseTypes type; + bool r = false; + g_autofree NSMExtendPCRReq *nsm_req = g_malloc(sizeof(NSMExtendPCRReq)); + + type = get_nsm_extend_pcr_req(request->iov_base, request->iov_len, + nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto out; + } + if (nsm_req->index >= vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto out; + } + + pcr = &(vnsm->pcrs[nsm_req->index]); + + if (pcr->locked) { + if (error_response(response, NSM_READONLY_INDEX, errp)) { + r = true; + } + goto out; + } + + if (!vnsm->extend_pcr(vnsm, nsm_req->index, nsm_req->data, + nsm_req->data_len)) { + if (error_response(response, NSM_INTERNAL_ERROR, errp)) { + r = true; + } + goto out; + } + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "ExtendPCR", 1, &nested_map)) { + goto err; + } + + if (!qemu_cbor_add_bytestring_to_map(nested_map, "data", pcr->data, + QCRYPTO_HASH_DIGEST_LEN_SHA384)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_BUFFER_TOO_SMALL, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize DescribePCR response"); + goto out; +} + +/* + * LockPCR request structure: + * + * { + * Map(1) { + * key = String("LockPCR"), + * value = Map(1) { + * key = String("index"), + * value = Uint8(pcr) + * } + * } + * } + */ +typedef struct NSMLockPCRReq { + uint8_t index; +} NSMLockPCRReq; + +static enum NSMResponseTypes get_nsm_lock_pcr_req(uint8_t *req, size_t len, + NSMLockPCRReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size; + uint8_t *str; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 1) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + str = cbor_string_handle(pair[i].key); + if (str && cbor_string_length(pair[i].key) == 5 && + memcmp(str, "index", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + break; + } + + nsm_req->index = cbor_get_uint8(pair[i].value); + r = NSM_SUCCESS; + break; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * LockPCR success response structure: + * { + * String("LockPCR") + * } + */ +static bool handle_lock_pcr(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + size_t len; + NSMLockPCRReq nsm_req; + enum NSMResponseTypes type; + struct PCRInfo *pcr; + bool r = false; + + type = get_nsm_lock_pcr_req(request->iov_base, request->iov_len, &nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto cleanup; + } + if (nsm_req.index >= vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto cleanup; + } + + pcr = &(vnsm->pcrs[nsm_req.index]); + + if (pcr->locked) { + if (error_response(response, NSM_READONLY_INDEX, errp)) { + r = true; + } + goto cleanup; + } + + pcr->locked = true; + + root = cbor_build_string("LockPCR"); + if (!root) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_BUFFER_TOO_SMALL, errp)) { + r = true; + } + goto cleanup; + } + + response->iov_len = len; + r = true; + goto cleanup; + + err: + error_setg(errp, "Failed to initialize LockPCR response"); + + cleanup: + if (root) { + cbor_decref(&root); + } + return r; +} + +/* + * LockPCRs request structure: + * + * { + * Map(1) { + * key = String("LockPCRs"), + * value = Map(1) { + * key = String("range"), + * value = Uint8(pcr) + * } + * } + * } + */ +typedef struct NSMLockPCRsReq { + uint16_t range; +} NSMLockPCRsReq; + +static enum NSMResponseTypes get_nsm_lock_pcrs_req(uint8_t *req, size_t len, + NSMLockPCRsReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size; + uint8_t *str; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 1) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + str = cbor_string_handle(pair[i].key); + if (str && cbor_string_length(pair[i].key) == 5 && + memcmp(str, "range", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + break; + } + + nsm_req->range = cbor_get_uint8(pair[i].value); + r = NSM_SUCCESS; + break; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * LockPCRs success response structure: + * { + * String("LockPCRs") + * } + */ +static bool handle_lock_pcrs(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + size_t len; + NSMLockPCRsReq nsm_req; + enum NSMResponseTypes type; + bool r = false; + + type = get_nsm_lock_pcrs_req(request->iov_base, request->iov_len, &nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto cleanup; + } + if (nsm_req.range > vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto cleanup; + } + + for (int i = 0; i < nsm_req.range; ++i) { + vnsm->pcrs[i].locked = true; + } + + root = cbor_build_string("LockPCRs"); + if (!root) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_BUFFER_TOO_SMALL, errp)) { + r = true; + } + goto cleanup; + } + + response->iov_len = len; + r = true; + goto cleanup; + + err: + error_setg(errp, "Failed to initialize response"); + + cleanup: + if (root) { + cbor_decref(&root); + } + return r; +} + +/* + * Attestation request structure: + * + * Map(1) { + * key = String("Attestation"), + * value = Map(3) { + * key = String("user_data"), + * value = Byte_String() || null, // Optional + * key = String("nonce"), + * value = Byte_String() || null, // Optional + * key = String("public_key"), + * value = Byte_String() || null, // Optional + * } + * } + * } + */ + +struct AttestationProperty { + bool is_null; /* True if property is not present in map or is null */ + uint16_t len; + uint8_t buf[NSM_REQUEST_MAX_SIZE]; +}; + +typedef struct NSMAttestationReq { + struct AttestationProperty public_key; + struct AttestationProperty user_data; + struct AttestationProperty nonce; +} NSMAttestationReq; + +static bool fill_attestation_property(struct AttestationProperty *prop, + cbor_item_t *value) +{ + uint8_t *str; + bool ret = false; + + if (cbor_is_null(value)) { + prop->is_null = true; + ret = true; + goto out; + } else if (cbor_isa_bytestring(value)) { + str = cbor_bytestring_handle(value); + if (!str) { + goto out; + } + prop->len = cbor_bytestring_length(value); + } else if (cbor_isa_string(value)) { + str = cbor_string_handle(value); + if (!str) { + goto out; + } + prop->len = cbor_string_length(value); + } else { + goto out; + } + + /* + * prop->len will be smaller than NSM_REQUEST_MAX_SIZE as we + * already check for the max request size before processing + * any request. So it's safe to copy. + */ + memcpy(prop->buf, str, prop->len); + prop->is_null = false; + ret = true; + + out: + return ret; +} + +static enum NSMResponseTypes get_nsm_attestation_req(uint8_t *req, size_t len, + NSMAttestationReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size; + uint8_t *str; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + nsm_req->public_key.is_null = true; + nsm_req->user_data.is_null = true; + nsm_req->nonce.is_null = true; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size == 0) { + r = NSM_SUCCESS; + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + + str = cbor_string_handle(pair[i].key); + if (!str) { + continue; + } + + if (cbor_string_length(pair[i].key) == 10 && + memcmp(str, "public_key", 10) == 0) { + if (!fill_attestation_property(&(nsm_req->public_key), + pair[i].value)) { + goto cleanup; + } + continue; + } + + if (cbor_string_length(pair[i].key) == 9 && + memcmp(str, "user_data", 9) == 0) { + if (!fill_attestation_property(&(nsm_req->user_data), + pair[i].value)) { + goto cleanup; + } + continue; + } + + if (cbor_string_length(pair[i].key) == 5 && + memcmp(str, "nonce", 5) == 0) { + if (!fill_attestation_property(&(nsm_req->nonce), pair[i].value)) { + goto cleanup; + } + continue; + } + } + + r = NSM_SUCCESS; + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +static bool add_protected_header_to_cose(cbor_item_t *cose) +{ + cbor_item_t *map = NULL; + cbor_item_t *key = NULL; + cbor_item_t *value = NULL; + cbor_item_t *bs = NULL; + size_t len; + bool r = false; + size_t buf_len = 4096; + g_autofree uint8_t *buf = g_malloc(buf_len); + + map = cbor_new_definite_map(1); + if (!map) { + goto cleanup; + } + key = cbor_build_uint8(1); + if (!key) { + goto cleanup; + } + value = cbor_new_int8(); + if (!value) { + goto cleanup; + } + cbor_mark_negint(value); + /* we don't actually sign the data, so we use -1 as the 'alg' value */ + cbor_set_uint8(value, 0); + + if (!qemu_cbor_map_add(map, key, value)) { + goto cleanup; + } + + len = cbor_serialize(map, buf, buf_len); + if (len == 0) { + goto cleanup_map; + } + + bs = cbor_build_bytestring(buf, len); + if (!bs) { + goto cleanup_map; + } + if (!qemu_cbor_array_push(cose, bs)) { + cbor_decref(&bs); + goto cleanup_map; + } + r = true; + goto cleanup_map; + + cleanup: + if (key) { + cbor_decref(&key); + } + if (value) { + cbor_decref(&value); + } + + cleanup_map: + if (map) { + cbor_decref(&map); + } + return r; +} + +static bool add_unprotected_header_to_cose(cbor_item_t *cose) +{ + cbor_item_t *map = cbor_new_definite_map(0); + if (!map) { + goto cleanup; + } + if (!qemu_cbor_array_push(cose, map)) { + goto cleanup; + } + + return true; + + cleanup: + if (map) { + cbor_decref(&map); + } + return false; +} + +static bool add_ca_bundle_to_payload(cbor_item_t *map) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + cbor_item_t *bs = NULL; + uint8_t zero[64] = {0}; + + key_cbor = cbor_build_string("cabundle"); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_definite_array(1); + if (!value_cbor) { + goto cleanup; + } + bs = cbor_build_bytestring(zero, 64); + if (!bs) { + goto cleanup; + } + if (!qemu_cbor_array_push(value_cbor, bs)) { + cbor_decref(&bs); + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +static bool add_payload_to_cose(cbor_item_t *cose, VirtIONSM *vnsm, + NSMAttestationReq *req) +{ + cbor_item_t *root = NULL; + cbor_item_t *nested_map; + cbor_item_t *bs = NULL; + size_t locked_cnt; + uint8_t ind[NSM_MAX_PCRS]; + size_t payload_map_size = 9; + size_t len; + struct PCRInfo *pcr; + uint8_t zero[64] = {0}; + bool r = false; + size_t buf_len = 16384; + g_autofree uint8_t *buf = g_malloc(buf_len); + + root = cbor_new_definite_map(payload_map_size); + if (!root) { + goto cleanup; + } + if (!qemu_cbor_add_string_to_map(root, "module_id", vnsm->module_id)) { + goto cleanup; + } + if (!qemu_cbor_add_string_to_map(root, "digest", vnsm->digest)) { + goto cleanup; + } + if (!qemu_cbor_add_uint64_to_map(root, "timestamp", + (uint64_t) time(NULL) * 1000)) { + goto cleanup; + } + + locked_cnt = 0; + for (uint8_t i = 0; i < NSM_MAX_PCRS; ++i) { + if (vnsm->pcrs[i].locked) { + ind[locked_cnt++] = i; + } + } + if (!qemu_cbor_add_map_to_map(root, "pcrs", locked_cnt, &nested_map)) { + goto cleanup; + } + for (uint8_t i = 0; i < locked_cnt; ++i) { + pcr = &(vnsm->pcrs[ind[i]]); + if (!qemu_cbor_add_uint8_key_bytestring_to_map( + nested_map, ind[i], + pcr->data, + QCRYPTO_HASH_DIGEST_LEN_SHA384)) { + goto cleanup; + } + } + if (!qemu_cbor_add_bytestring_to_map(root, "certificate", zero, 64)) { + goto cleanup; + } + if (!add_ca_bundle_to_payload(root)) { + goto cleanup; + } + + if (req->public_key.is_null) { + if (!qemu_cbor_add_null_to_map(root, "public_key")) { + goto cleanup; + } + } else if (!qemu_cbor_add_bytestring_to_map(root, "public_key", + req->public_key.buf, + req->public_key.len)) { + goto cleanup; + } + + if (req->user_data.is_null) { + if (!qemu_cbor_add_null_to_map(root, "user_data")) { + goto cleanup; + } + } else if (!qemu_cbor_add_bytestring_to_map(root, "user_data", + req->user_data.buf, + req->user_data.len)) { + goto cleanup; + } + + if (req->nonce.is_null) { + if (!qemu_cbor_add_null_to_map(root, "nonce")) { + goto cleanup; + } + } else if (!qemu_cbor_add_bytestring_to_map(root, "nonce", + req->nonce.buf, + req->nonce.len)) { + goto cleanup; + } + + len = cbor_serialize(root, buf, buf_len); + if (len == 0) { + goto cleanup; + } + + bs = cbor_build_bytestring(buf, len); + if (!bs) { + goto cleanup; + } + if (!qemu_cbor_array_push(cose, bs)) { + cbor_decref(&bs); + goto cleanup; + } + + r = true; + + cleanup: + if (root) { + cbor_decref(&root); + } + return r; +} + +static bool add_signature_to_cose(cbor_item_t *cose) +{ + cbor_item_t *bs = NULL; + uint8_t zero[64] = {0}; + + /* we don't actually sign the data, so we just put 64 zero bytes */ + bs = cbor_build_bytestring(zero, 64); + if (!bs) { + goto cleanup; + } + + if (!qemu_cbor_array_push(cose, bs)) { + goto cleanup; + } + + return true; + + cleanup: + if (bs) { + cbor_decref(&bs); + } + return false; +} + +/* + * Attestation response structure: + * + * { + * Map(1) { + * key = String("Attestation"), + * value = Map(1) { + * key = String("document"), + * value = Byte_String() + * } + * } + * } + * + * The document is a serialized COSE sign1 blob of the structure: + * { + * Array(4) { + * [0] { ByteString() }, // serialized protected header + * [1] { Map(0) }, // 0 length map + * [2] { ByteString() }, // serialized payload + * [3] { ByteString() }, // signature + * } + * } + * + * where [0] protected header is a serialized CBOR blob of the structure: + * { + * Map(1) { + * key = Uint8(1) // alg + * value = NegativeInt8() // Signing algorithm + * } + * } + * + * [2] payload is serialized CBOR blob of the structure: + * { + * Map(9) { + * [0] { key = String("module_id"), value = String(module_id) }, + * [1] { key = String("digest"), value = String("SHA384") }, + * [2] { + * key = String("timestamp"), + * value = Uint64(unix epoch of when document was created) + * }, + * [3] { + * key = String("pcrs"), + * value = Map(locked_pcr_cnt) { + * key = Uint8(pcr_index), + * value = ByteString(pcr_data) + * }, + * }, + * [4] { + * key = String("certificate"), + * value = ByteString(Signing certificate) + * }, + * [5] { key = String("cabundle"), value = Array(N) { ByteString()... } }, + * [6] { key = String("public_key"), value = ByteString() || null }, + * [7] { key = String("user_data"), value = ByteString() || null}, + * [8] { key = String("nonce"), value = ByteString() || null}, + * } + * } + */ +static bool handle_attestation(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + cbor_item_t *cose = NULL; + cbor_item_t *nested_map; + size_t len; + enum NSMResponseTypes type; + bool r = false; + size_t buf_len = 16384; + g_autofree uint8_t *buf = g_malloc(buf_len); + g_autofree NSMAttestationReq *nsm_req = g_malloc(sizeof(NSMAttestationReq)); + + nsm_req->public_key.is_null = true; + nsm_req->user_data.is_null = true; + nsm_req->nonce.is_null = true; + + type = get_nsm_attestation_req(request->iov_base, request->iov_len, + nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto out; + } + + cose = cbor_new_definite_array(4); + if (!cose) { + goto err; + } + if (!add_protected_header_to_cose(cose)) { + goto err; + } + if (!add_unprotected_header_to_cose(cose)) { + goto err; + } + if (!add_payload_to_cose(cose, vnsm, nsm_req)) { + goto err; + } + if (!add_signature_to_cose(cose)) { + goto err; + } + + len = cbor_serialize(cose, buf, buf_len); + if (len == 0) { + goto err; + } + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + if (!qemu_cbor_add_map_to_map(root, "Attestation", 1, &nested_map)) { + goto err; + } + if (!qemu_cbor_add_bytestring_to_map(nested_map, "document", buf, len)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + if (cose) { + cbor_decref(&cose); + } + return r; + + err: + error_setg(errp, "Failed to initialize Attestation response"); + goto out; +} + +enum CBOR_ROOT_TYPE { + CBOR_ROOT_TYPE_STRING = 0, + CBOR_ROOT_TYPE_MAP = 1, +}; + +struct nsm_cmd { + char name[16]; + /* + * There are 2 types of request + * 1) String(); "GetRandom", "DescribeNSM" + * 2) Map(1) { key: String(), value: ... } + */ + enum CBOR_ROOT_TYPE root_type; + bool (*response_fn)(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp); +}; + +const struct nsm_cmd nsm_cmds[] = { + { "GetRandom", CBOR_ROOT_TYPE_STRING, handle_get_random }, + { "DescribeNSM", CBOR_ROOT_TYPE_STRING, handle_describe_nsm }, + { "DescribePCR", CBOR_ROOT_TYPE_MAP, handle_describe_pcr }, + { "ExtendPCR", CBOR_ROOT_TYPE_MAP, handle_extend_pcr }, + { "LockPCR", CBOR_ROOT_TYPE_MAP, handle_lock_pcr }, + { "LockPCRs", CBOR_ROOT_TYPE_MAP, handle_lock_pcrs }, + { "Attestation", CBOR_ROOT_TYPE_MAP, handle_attestation }, +}; + +static const struct nsm_cmd *get_nsm_request_cmd(uint8_t *buf, size_t len) +{ + size_t size; + uint8_t *req; + enum CBOR_ROOT_TYPE root_type; + struct cbor_load_result result; + cbor_item_t *item = cbor_load(buf, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + if (cbor_isa_string(item)) { + size = cbor_string_length(item); + req = cbor_string_handle(item); + root_type = CBOR_ROOT_TYPE_STRING; + } else if (cbor_isa_map(item) && cbor_map_size(item) == 1) { + struct cbor_pair *handle = cbor_map_handle(item); + if (cbor_isa_string(handle->key)) { + size = cbor_string_length(handle->key); + req = cbor_string_handle(handle->key); + root_type = CBOR_ROOT_TYPE_MAP; + } else { + goto cleanup; + } + } else { + goto cleanup; + } + + if (size == 0 || req == NULL) { + goto cleanup; + } + + for (int i = 0; i < ARRAY_SIZE(nsm_cmds); ++i) { + if (nsm_cmds[i].root_type == root_type && + strlen(nsm_cmds[i].name) == size && + memcmp(nsm_cmds[i].name, req, size) == 0) { + cbor_decref(&item); + return &nsm_cmds[i]; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return NULL; +} + +static bool get_nsm_request_response(VirtIONSM *vnsm, struct iovec *req, + struct iovec *resp, Error **errp) +{ + const struct nsm_cmd *cmd; + + if (req->iov_len > NSM_REQUEST_MAX_SIZE) { + if (error_response(resp, NSM_INPUT_TOO_LARGE, errp)) { + return true; + } + error_setg(errp, "Failed to initialize InputTooLarge response"); + return false; + } + + cmd = get_nsm_request_cmd(req->iov_base, req->iov_len); + + if (cmd == NULL) { + if (error_response(resp, NSM_INVALID_OPERATION, errp)) { + return true; + } + error_setg(errp, "Failed to initialize InvalidOperation response"); + return false; + } + + return cmd->response_fn(vnsm, req, resp, errp); +} + +static void handle_input(VirtIODevice *vdev, VirtQueue *vq) +{ + g_autofree VirtQueueElement *out_elem = NULL; + g_autofree VirtQueueElement *in_elem = NULL; + VirtIONSM *vnsm = VIRTIO_NSM(vdev); + Error *err = NULL; + size_t sz; + struct iovec req = {.iov_base = NULL, .iov_len = 0}; + struct iovec res = {.iov_base = NULL, .iov_len = 0}; + + out_elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!out_elem) { + /* nothing in virtqueue */ + return; + } + + sz = iov_size(out_elem->out_sg, out_elem->out_num); + if (sz == 0) { + virtio_error(vdev, "Expected non-zero sized request buffer in " + "virtqueue"); + goto cleanup; + } + + in_elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!in_elem) { + virtio_error(vdev, "Expected response buffer after request buffer " + "in virtqueue"); + goto cleanup; + } + if (iov_size(in_elem->in_sg, in_elem->in_num) != NSM_RESPONSE_BUF_SIZE) { + virtio_error(vdev, "Expected response buffer of length 0x3000"); + goto cleanup; + } + + req.iov_base = g_malloc(sz); + req.iov_len = iov_to_buf(out_elem->out_sg, out_elem->out_num, 0, + req.iov_base, sz); + if (req.iov_len != sz) { + virtio_error(vdev, "Failed to copy request buffer"); + goto cleanup; + } + + res.iov_base = g_malloc(NSM_RESPONSE_BUF_SIZE); + res.iov_len = NSM_RESPONSE_BUF_SIZE; + + if (!get_nsm_request_response(vnsm, &req, &res, &err)) { + error_report_err(err); + virtio_error(vdev, "Failed to get NSM request response"); + goto cleanup; + } + + sz = iov_from_buf(in_elem->in_sg, in_elem->in_num, 0, res.iov_base, + res.iov_len); + if (sz != res.iov_len) { + virtio_error(vdev, "Failed to copy response buffer"); + goto cleanup; + } + + g_free(req.iov_base); + g_free(res.iov_base); + virtqueue_push(vq, out_elem, 0); + virtqueue_push(vq, in_elem, in_elem->in_sg->iov_len); + virtio_notify(vdev, vq); + return; + + cleanup: + g_free(req.iov_base); + g_free(res.iov_base); + if (out_elem) { + virtqueue_detach_element(vq, out_elem, 0); + } + if (in_elem) { + virtqueue_detach_element(vq, in_elem, 0); + } + return; +} + +static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp) +{ + return f; +} + +static bool extend_pcr(VirtIONSM *vnsm, int ind, uint8_t *data, uint16_t len) +{ + Error *err = NULL; + struct PCRInfo *pcr = &(vnsm->pcrs[ind]); + size_t digest_len = QCRYPTO_HASH_DIGEST_LEN_SHA384; + uint8_t result[QCRYPTO_HASH_DIGEST_LEN_SHA384]; + uint8_t *ptr = result; + struct iovec iov[2] = { + { .iov_base = pcr->data, .iov_len = QCRYPTO_HASH_DIGEST_LEN_SHA384 }, + { .iov_base = data, .iov_len = len }, + }; + + if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALGO_SHA384, iov, 2, &ptr, &digest_len, + &err) < 0) { + return false; + } + + memcpy(pcr->data, result, QCRYPTO_HASH_DIGEST_LEN_SHA384); + return true; +} + +static void lock_pcr(VirtIONSM *vnsm, int ind) +{ + vnsm->pcrs[ind].locked = true; +} + +static void virtio_nsm_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIONSM *vnsm = VIRTIO_NSM(dev); + + vnsm->max_pcrs = NSM_MAX_PCRS; + vnsm->digest = (char *) "SHA384"; + if (vnsm->module_id == NULL) { + vnsm->module_id = (char *) "i-234-enc5678"; + } + vnsm->version_major = 1; + vnsm->version_minor = 0; + vnsm->version_patch = 0; + vnsm->extend_pcr = extend_pcr; + vnsm->lock_pcr = lock_pcr; + + virtio_init(vdev, VIRTIO_ID_NITRO_SEC_MOD, 0); + + vnsm->vq = virtio_add_queue(vdev, 2, handle_input); +} + +static void virtio_nsm_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + + virtio_del_queue(vdev, 0); + virtio_cleanup(vdev); +} + +static const VMStateDescription vmstate_pcr_info_entry = { + .name = "pcr_info_entry", + .minimum_version_id = 1, + .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_BOOL(locked, struct PCRInfo), + VMSTATE_UINT8_ARRAY(data, struct PCRInfo, + QCRYPTO_HASH_DIGEST_LEN_SHA384), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_virtio_nsm_device = { + .name = "virtio-nsm-device", + .minimum_version_id = 1, + .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT_ARRAY(pcrs, VirtIONSM, NSM_MAX_PCRS, 1, + vmstate_pcr_info_entry, struct PCRInfo), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_virtio_nsm = { + .name = "virtio-nsm", + .minimum_version_id = 1, + .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property virtio_nsm_properties[] = { + DEFINE_PROP_STRING("module-id", VirtIONSM, module_id), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_nsm_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, virtio_nsm_properties); + dc->vmsd = &vmstate_virtio_nsm; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = virtio_nsm_device_realize; + vdc->unrealize = virtio_nsm_device_unrealize; + vdc->get_features = get_features; + vdc->vmsd = &vmstate_virtio_nsm_device; +} + +static const TypeInfo virtio_nsm_info = { + .name = TYPE_VIRTIO_NSM, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIONSM), + .class_init = virtio_nsm_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_nsm_info); +} + +type_init(virtio_register_types) diff --git a/include/hw/virtio/cbor-helpers.h b/include/hw/virtio/cbor-helpers.h new file mode 100644 index 0000000000..f25fd481ad --- /dev/null +++ b/include/hw/virtio/cbor-helpers.h @@ -0,0 +1,45 @@ +/* + * QEMU CBOR helpers + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef QEMU_VIRTIO_CBOR_HELPERS_H +#define QEMU_VIRTIO_CBOR_HELPERS_H + +#include + +bool qemu_cbor_map_add(cbor_item_t *map, cbor_item_t *key, cbor_item_t *value); + +bool qemu_cbor_array_push(cbor_item_t *array, cbor_item_t *value); + +bool qemu_cbor_add_bool_to_map(cbor_item_t *map, const char *key, bool value); + +bool qemu_cbor_add_uint8_to_map(cbor_item_t *map, const char *key, + uint8_t value); + +bool qemu_cbor_add_map_to_map(cbor_item_t *map, const char *key, + size_t nested_map_size, + cbor_item_t **nested_map); + +bool qemu_cbor_add_bytestring_to_map(cbor_item_t *map, const char *key, + uint8_t *arr, size_t len); + +bool qemu_cbor_add_null_to_map(cbor_item_t *map, const char *key); + +bool qemu_cbor_add_string_to_map(cbor_item_t *map, const char *key, + const char *value); + +bool qemu_cbor_add_uint8_array_to_map(cbor_item_t *map, const char *key, + uint8_t *arr, size_t len); + +bool qemu_cbor_add_uint8_key_bytestring_to_map(cbor_item_t *map, uint8_t key, + uint8_t *buf, size_t len); + +bool qemu_cbor_add_uint64_to_map(cbor_item_t *map, const char *key, + uint64_t value); +#endif diff --git a/include/hw/virtio/virtio-nsm.h b/include/hw/virtio/virtio-nsm.h new file mode 100644 index 0000000000..57ddbbbf3f --- /dev/null +++ b/include/hw/virtio/virtio-nsm.h @@ -0,0 +1,49 @@ +/* + * AWS Nitro Secure Module (NSM) device + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef QEMU_VIRTIO_NSM_H +#define QEMU_VIRTIO_NSM_H + +#include "crypto/hash.h" +#include "hw/virtio/virtio.h" +#include "qom/object.h" + +#define NSM_MAX_PCRS 32 + +#define TYPE_VIRTIO_NSM "virtio-nsm-device" +OBJECT_DECLARE_SIMPLE_TYPE(VirtIONSM, VIRTIO_NSM) +#define VIRTIO_NSM_GET_PARENT_CLASS(obj) \ + OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_NSM) + +struct PCRInfo { + bool locked; + uint8_t data[QCRYPTO_HASH_DIGEST_LEN_SHA384]; +}; + +struct VirtIONSM { + VirtIODevice parent_obj; + + /* Only one vq - guest puts request and response buffers on it */ + VirtQueue *vq; + + /* NSM State */ + uint16_t max_pcrs; + struct PCRInfo pcrs[NSM_MAX_PCRS]; + char *digest; + char *module_id; + uint8_t version_major; + uint8_t version_minor; + uint8_t version_patch; + + bool (*extend_pcr)(VirtIONSM *vnsm, int ind, uint8_t *data, uint16_t len); + void (*lock_pcr)(VirtIONSM *vnsm, int ind); +}; + +#endif diff --git a/meson.build b/meson.build index bae7a4370d..2739cc4964 100644 --- a/meson.build +++ b/meson.build @@ -1716,6 +1716,12 @@ if (have_system or have_tools) and (virgl.found() or opengl.found()) endif have_vhost_user_gpu = have_vhost_user_gpu and virgl.found() and opengl.found() and gbm.found() +libcbor = not_found +if not get_option('libcbor').auto() or have_system + libcbor = dependency('libcbor', version: '>=0.7.0', + required: get_option('libcbor')) +endif + gnutls = not_found gnutls_crypto = not_found if get_option('gnutls').enabled() or (get_option('gnutls').auto() and have_system) @@ -3130,6 +3136,7 @@ host_kconfig = \ (spice.found() ? ['CONFIG_SPICE=y'] : []) + \ (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ (opengl.found() ? ['CONFIG_OPENGL=y'] : []) + \ + (libcbor.found() ? ['CONFIG_LIBCBOR=y'] : []) + \ (x11.found() ? ['CONFIG_X11=y'] : []) + \ (fdt.found() ? ['CONFIG_FDT=y'] : []) + \ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ @@ -4696,6 +4703,7 @@ summary_info += {'NUMA host support': numa} summary_info += {'capstone': capstone} summary_info += {'libpmem support': libpmem} summary_info += {'libdaxctl support': libdaxctl} +summary_info += {'libcbor support': libcbor} summary_info += {'libudev': libudev} # Dummy dependency, keep .found() summary_info += {'FUSE lseek': fuse_lseek.found()} diff --git a/meson_options.txt b/meson_options.txt index 0ee4d7bb86..24bf009056 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -168,6 +168,8 @@ option('iconv', type : 'feature', value : 'auto', description: 'Font glyph conversion support') option('curses', type : 'feature', value : 'auto', description: 'curses UI') +option('libcbor', type : 'feature', value : 'auto', + description: 'libcbor support') option('gnutls', type : 'feature', value : 'auto', description: 'GNUTLS cryptography support') option('nettle', type : 'feature', value : 'auto', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 6d08605b77..6f2bb08ecd 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -133,6 +133,7 @@ meson_options_help() { printf "%s\n" ' keyring Linux keyring support' printf "%s\n" ' kvm KVM acceleration support' printf "%s\n" ' l2tpv3 l2tpv3 network backend support' + printf "%s\n" ' libcbor libcbor support' printf "%s\n" ' libdaxctl libdaxctl support' printf "%s\n" ' libdw debuginfo support' printf "%s\n" ' libiscsi libiscsi userspace initiator' @@ -358,6 +359,8 @@ _meson_option_parse() { --disable-kvm) printf "%s" -Dkvm=disabled ;; --enable-l2tpv3) printf "%s" -Dl2tpv3=enabled ;; --disable-l2tpv3) printf "%s" -Dl2tpv3=disabled ;; + --enable-libcbor) printf "%s" -Dlibcbor=enabled ;; + --disable-libcbor) printf "%s" -Dlibcbor=disabled ;; --enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;; --disable-libdaxctl) printf "%s" -Dlibdaxctl=disabled ;; --libdir=*) quote_sh "-Dlibdir=$2" ;; From 63d2a5c78791e5df1d3173e553a09838fb4f7c9c Mon Sep 17 00:00:00 2001 From: Dorjoy Chowdhury Date: Wed, 9 Oct 2024 03:17:24 +0600 Subject: [PATCH 23/49] hw/core: Add Enclave Image Format (EIF) related helpers An EIF (Enclave Image Format)[1] file is used to boot an AWS nitro enclave[2] virtual machine. The EIF file contains the necessary kernel, cmdline, ramdisk(s) sections to boot. Some helper functions have been introduced for extracting the necessary sections from an EIF file and then writing them to temporary files as well as computing SHA384 hashes from the section data. These will be used in the following commit to add support for nitro-enclave machine type in QEMU. The files added in this commit are not compiled yet but will be added to the hw/core/meson.build file in the following commit where CONFIG_NITRO_ENCLAVE will be introduced. [1] https://github.com/aws/aws-nitro-enclaves-image-format [2] https://docs.aws.amazon.com/enclaves/latest/user/nitro-enclave.html Signed-off-by: Dorjoy Chowdhury Reviewed-by: Alexander Graf Link: https://lore.kernel.org/r/20241008211727.49088-4-dorjoychy111@gmail.com Signed-off-by: Paolo Bonzini --- Kconfig.host | 3 + MAINTAINERS | 7 + hw/core/Kconfig | 4 + hw/core/eif.c | 719 ++++++++++++++++++++++++++++++++++++++++++++ hw/core/eif.h | 22 ++ hw/core/meson.build | 1 + meson.build | 1 + 7 files changed, 757 insertions(+) create mode 100644 hw/core/eif.c create mode 100644 hw/core/eif.h diff --git a/Kconfig.host b/Kconfig.host index ccd84be60f..842cbe0d6c 100644 --- a/Kconfig.host +++ b/Kconfig.host @@ -8,6 +8,9 @@ config LINUX config LIBCBOR bool +config GNUTLS + bool + config OPENGL bool diff --git a/MAINTAINERS b/MAINTAINERS index 649bd5ab1a..19b7d54a19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1834,6 +1834,13 @@ F: hw/i386/microvm.c F: include/hw/i386/microvm.h F: pc-bios/bios-microvm.bin +nitro-enclave +M: Alexander Graf +M: Dorjoy Chowdhury +S: Maintained +F: hw/core/eif.c +F: hw/core/eif.h + Machine core M: Eduardo Habkost M: Marcel Apfelbaum diff --git a/hw/core/Kconfig b/hw/core/Kconfig index 24411f5930..d1bdf765ee 100644 --- a/hw/core/Kconfig +++ b/hw/core/Kconfig @@ -34,3 +34,7 @@ config REGISTER config SPLIT_IRQ bool + +config EIF + bool + depends on LIBCBOR && GNUTLS diff --git a/hw/core/eif.c b/hw/core/eif.c new file mode 100644 index 0000000000..7f3b2edc9a --- /dev/null +++ b/hw/core/eif.c @@ -0,0 +1,719 @@ +/* + * EIF (Enclave Image Format) related helpers + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "qapi/error.h" +#include "crypto/hash.h" +#include "crypto/x509-utils.h" +#include /* for crc32 */ +#include + +#include "hw/core/eif.h" + +#define MAX_SECTIONS 32 + +/* members are ordered according to field order in .eif file */ +typedef struct EifHeader { + uint8_t magic[4]; /* must be .eif in ascii i.e., [46, 101, 105, 102] */ + uint16_t version; + uint16_t flags; + uint64_t default_memory; + uint64_t default_cpus; + uint16_t reserved; + uint16_t section_cnt; + uint64_t section_offsets[MAX_SECTIONS]; + uint64_t section_sizes[MAX_SECTIONS]; + uint32_t unused; + uint32_t eif_crc32; +} QEMU_PACKED EifHeader; + +/* members are ordered according to field order in .eif file */ +typedef struct EifSectionHeader { + /* + * 0 = invalid, 1 = kernel, 2 = cmdline, 3 = ramdisk, 4 = signature, + * 5 = metadata + */ + uint16_t section_type; + uint16_t flags; + uint64_t section_size; +} QEMU_PACKED EifSectionHeader; + +enum EifSectionTypes { + EIF_SECTION_INVALID = 0, + EIF_SECTION_KERNEL = 1, + EIF_SECTION_CMDLINE = 2, + EIF_SECTION_RAMDISK = 3, + EIF_SECTION_SIGNATURE = 4, + EIF_SECTION_METADATA = 5, + EIF_SECTION_MAX = 6, +}; + +static const char *section_type_to_string(uint16_t type) +{ + const char *str; + switch (type) { + case EIF_SECTION_INVALID: + str = "invalid"; + break; + case EIF_SECTION_KERNEL: + str = "kernel"; + break; + case EIF_SECTION_CMDLINE: + str = "cmdline"; + break; + case EIF_SECTION_RAMDISK: + str = "ramdisk"; + break; + case EIF_SECTION_SIGNATURE: + str = "signature"; + break; + case EIF_SECTION_METADATA: + str = "metadata"; + break; + default: + str = "unknown"; + break; + } + + return str; +} + +static bool read_eif_header(FILE *f, EifHeader *header, uint32_t *crc, + Error **errp) +{ + size_t got; + size_t header_size = sizeof(*header); + + got = fread(header, 1, header_size, f); + if (got != header_size) { + error_setg(errp, "Failed to read EIF header"); + return false; + } + + if (memcmp(header->magic, ".eif", 4) != 0) { + error_setg(errp, "Invalid EIF image. Magic mismatch."); + return false; + } + + /* Exclude header->eif_crc32 field from CRC calculation */ + *crc = crc32(*crc, (uint8_t *)header, header_size - 4); + + header->version = be16_to_cpu(header->version); + header->flags = be16_to_cpu(header->flags); + header->default_memory = be64_to_cpu(header->default_memory); + header->default_cpus = be64_to_cpu(header->default_cpus); + header->reserved = be16_to_cpu(header->reserved); + header->section_cnt = be16_to_cpu(header->section_cnt); + + for (int i = 0; i < MAX_SECTIONS; ++i) { + header->section_offsets[i] = be64_to_cpu(header->section_offsets[i]); + } + + for (int i = 0; i < MAX_SECTIONS; ++i) { + header->section_sizes[i] = be64_to_cpu(header->section_sizes[i]); + } + + header->unused = be32_to_cpu(header->unused); + header->eif_crc32 = be32_to_cpu(header->eif_crc32); + return true; +} + +static bool read_eif_section_header(FILE *f, EifSectionHeader *section_header, + uint32_t *crc, Error **errp) +{ + size_t got; + size_t section_header_size = sizeof(*section_header); + + got = fread(section_header, 1, section_header_size, f); + if (got != section_header_size) { + error_setg(errp, "Failed to read EIF section header"); + return false; + } + + *crc = crc32(*crc, (uint8_t *)section_header, section_header_size); + + section_header->section_type = be16_to_cpu(section_header->section_type); + section_header->flags = be16_to_cpu(section_header->flags); + section_header->section_size = be64_to_cpu(section_header->section_size); + return true; +} + +/* + * Upon success, the caller is responsible for unlinking and freeing *tmp_path. + */ +static bool get_tmp_file(const char *template, char **tmp_path, Error **errp) +{ + int tmp_fd; + + *tmp_path = NULL; + tmp_fd = g_file_open_tmp(template, tmp_path, NULL); + if (tmp_fd < 0 || *tmp_path == NULL) { + error_setg(errp, "Failed to create temporary file for template %s", + template); + return false; + } + + close(tmp_fd); + return true; +} + +static void safe_fclose(FILE *f) +{ + if (f) { + fclose(f); + } +} + +static void safe_unlink(char *f) +{ + if (f) { + unlink(f); + } +} + +/* + * Upon success, the caller is reponsible for unlinking and freeing *kernel_path + */ +static bool read_eif_kernel(FILE *f, uint64_t size, char **kernel_path, + uint8_t *kernel, uint32_t *crc, Error **errp) +{ + size_t got; + FILE *tmp_file = NULL; + + *kernel_path = NULL; + if (!get_tmp_file("eif-kernel-XXXXXX", kernel_path, errp)) { + goto cleanup; + } + + tmp_file = fopen(*kernel_path, "wb"); + if (tmp_file == NULL) { + error_setg_errno(errp, errno, "Failed to open temporary file %s", + *kernel_path); + goto cleanup; + } + + got = fread(kernel, 1, size, f); + if ((uint64_t) got != size) { + error_setg(errp, "Failed to read EIF kernel section data"); + goto cleanup; + } + + got = fwrite(kernel, 1, size, tmp_file); + if ((uint64_t) got != size) { + error_setg(errp, "Failed to write EIF kernel section data to temporary" + " file"); + goto cleanup; + } + + *crc = crc32(*crc, kernel, size); + fclose(tmp_file); + + return true; + + cleanup: + safe_fclose(tmp_file); + + safe_unlink(*kernel_path); + g_free(*kernel_path); + *kernel_path = NULL; + + return false; +} + +static bool read_eif_cmdline(FILE *f, uint64_t size, char *cmdline, + uint32_t *crc, Error **errp) +{ + size_t got = fread(cmdline, 1, size, f); + if ((uint64_t) got != size) { + error_setg(errp, "Failed to read EIF cmdline section data"); + return false; + } + + *crc = crc32(*crc, (uint8_t *)cmdline, size); + return true; +} + +static bool read_eif_ramdisk(FILE *eif, FILE *initrd, uint64_t size, + uint8_t *ramdisk, uint32_t *crc, Error **errp) +{ + size_t got; + + got = fread(ramdisk, 1, size, eif); + if ((uint64_t) got != size) { + error_setg(errp, "Failed to read EIF ramdisk section data"); + return false; + } + + got = fwrite(ramdisk, 1, size, initrd); + if ((uint64_t) got != size) { + error_setg(errp, "Failed to write EIF ramdisk data to temporary file"); + return false; + } + + *crc = crc32(*crc, ramdisk, size); + return true; +} + +static bool get_signature_fingerprint_sha384(FILE *eif, uint64_t size, + uint8_t *sha384, + uint32_t *crc, + Error **errp) +{ + size_t got; + g_autofree uint8_t *sig = NULL; + g_autofree uint8_t *cert = NULL; + cbor_item_t *item = NULL; + cbor_item_t *pcr0 = NULL; + size_t len; + size_t hash_len = QCRYPTO_HASH_DIGEST_LEN_SHA384; + struct cbor_pair *pair; + struct cbor_load_result result; + bool ret = false; + + sig = g_malloc(size); + got = fread(sig, 1, size, eif); + if ((uint64_t) got != size) { + error_setg(errp, "Failed to read EIF signature section data"); + goto cleanup; + } + + *crc = crc32(*crc, sig, size); + + item = cbor_load(sig, size, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + error_setg(errp, "Failed to load signature section data as CBOR"); + goto cleanup; + } + if (!cbor_isa_array(item) || cbor_array_size(item) < 1) { + error_setg(errp, "Invalid signature CBOR"); + goto cleanup; + } + pcr0 = cbor_array_get(item, 0); + if (!pcr0) { + error_setg(errp, "Failed to get PCR0 signature"); + goto cleanup; + } + if (!cbor_isa_map(pcr0) || cbor_map_size(pcr0) != 2) { + error_setg(errp, "Invalid signature CBOR"); + goto cleanup; + } + pair = cbor_map_handle(pcr0); + if (!cbor_isa_string(pair->key) || cbor_string_length(pair->key) != 19 || + memcmp(cbor_string_handle(pair->key), "signing_certificate", 19) != 0) { + error_setg(errp, "Invalid signautre CBOR"); + goto cleanup; + } + if (!cbor_isa_array(pair->value)) { + error_setg(errp, "Invalid signature CBOR"); + goto cleanup; + } + len = cbor_array_size(pair->value); + if (len == 0) { + error_setg(errp, "Invalid signature CBOR"); + goto cleanup; + } + cert = g_malloc(len); + for (int i = 0; i < len; ++i) { + cbor_item_t *tmp = cbor_array_get(pair->value, i); + if (!tmp) { + error_setg(errp, "Invalid signature CBOR"); + goto cleanup; + } + if (!cbor_isa_uint(tmp) || cbor_int_get_width(tmp) != CBOR_INT_8) { + cbor_decref(&tmp); + error_setg(errp, "Invalid signature CBOR"); + goto cleanup; + } + cert[i] = cbor_get_uint8(tmp); + cbor_decref(&tmp); + } + + if (qcrypto_get_x509_cert_fingerprint(cert, len, QCRYPTO_HASH_ALGO_SHA384, + sha384, &hash_len, errp)) { + goto cleanup; + } + + ret = true; + + cleanup: + if (pcr0) { + cbor_decref(&pcr0); + } + if (item) { + cbor_decref(&item); + } + return ret; +} + +/* Expects file to have offset 0 before this function is called */ +static long get_file_size(FILE *f, Error **errp) +{ + long size; + + if (fseek(f, 0, SEEK_END) != 0) { + error_setg_errno(errp, errno, "Failed to seek to the end of file"); + return -1; + } + + size = ftell(f); + if (size == -1) { + error_setg_errno(errp, errno, "Failed to get offset"); + return -1; + } + + if (fseek(f, 0, SEEK_SET) != 0) { + error_setg_errno(errp, errno, "Failed to seek back to the start"); + return -1; + } + + return size; +} + +static bool get_SHA384_digest(GList *list, uint8_t *digest, Error **errp) +{ + size_t digest_len = QCRYPTO_HASH_DIGEST_LEN_SHA384; + size_t list_len = g_list_length(list); + struct iovec *iovec_list = g_new0(struct iovec, list_len); + bool ret = true; + GList *l; + int i; + + for (i = 0, l = list; l != NULL; l = l->next, i++) { + iovec_list[i] = *(struct iovec *) l->data; + } + + if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALGO_SHA384, iovec_list, list_len, + &digest, &digest_len, errp) < 0) { + ret = false; + } + + g_free(iovec_list); + return ret; +} + +static void free_iovec(struct iovec *iov) +{ + if (iov) { + g_free(iov->iov_base); + g_free(iov); + } +} + +/* + * Upon success, the caller is reponsible for unlinking and freeing + * *kernel_path, *initrd_path and freeing *cmdline. + */ +bool read_eif_file(const char *eif_path, const char *machine_initrd, + char **kernel_path, char **initrd_path, char **cmdline, + uint8_t *image_sha384, uint8_t *bootstrap_sha384, + uint8_t *app_sha384, uint8_t *fingerprint_sha384, + bool *signature_found, Error **errp) +{ + FILE *f = NULL; + FILE *machine_initrd_f = NULL; + FILE *initrd_path_f = NULL; + long machine_initrd_size; + uint32_t crc = 0; + EifHeader eif_header; + bool seen_sections[EIF_SECTION_MAX] = {false}; + /* kernel + ramdisks + cmdline sha384 hash */ + GList *iov_PCR0 = NULL; + /* kernel + boot ramdisk + cmdline sha384 hash */ + GList *iov_PCR1 = NULL; + /* application ramdisk(s) hash */ + GList *iov_PCR2 = NULL; + uint8_t *ptr = NULL; + struct iovec *iov_ptr = NULL; + + *signature_found = false; + *kernel_path = *initrd_path = *cmdline = NULL; + + f = fopen(eif_path, "rb"); + if (f == NULL) { + error_setg_errno(errp, errno, "Failed to open %s", eif_path); + goto cleanup; + } + + if (!read_eif_header(f, &eif_header, &crc, errp)) { + goto cleanup; + } + + if (eif_header.version < 4) { + error_setg(errp, "Expected EIF version 4 or greater"); + goto cleanup; + } + + if (eif_header.flags != 0) { + error_setg(errp, "Expected EIF flags to be 0"); + goto cleanup; + } + + if (eif_header.section_cnt > MAX_SECTIONS) { + error_setg(errp, "EIF header section count must not be greater than " + "%d but found %d", MAX_SECTIONS, eif_header.section_cnt); + goto cleanup; + } + + for (int i = 0; i < eif_header.section_cnt; ++i) { + EifSectionHeader hdr; + uint16_t section_type; + + if (fseek(f, eif_header.section_offsets[i], SEEK_SET) != 0) { + error_setg_errno(errp, errno, "Failed to offset to %" PRIu64 " in EIF file", + eif_header.section_offsets[i]); + goto cleanup; + } + + if (!read_eif_section_header(f, &hdr, &crc, errp)) { + goto cleanup; + } + + if (hdr.flags != 0) { + error_setg(errp, "Expected EIF section header flags to be 0"); + goto cleanup; + } + + if (eif_header.section_sizes[i] != hdr.section_size) { + error_setg(errp, "EIF section size mismatch between header and " + "section header: header %" PRIu64 ", section header %" PRIu64, + eif_header.section_sizes[i], + hdr.section_size); + goto cleanup; + } + + section_type = hdr.section_type; + + switch (section_type) { + case EIF_SECTION_KERNEL: + if (seen_sections[EIF_SECTION_KERNEL]) { + error_setg(errp, "Invalid EIF image. More than 1 kernel " + "section"); + goto cleanup; + } + + ptr = g_malloc(hdr.section_size); + + iov_ptr = g_malloc(sizeof(struct iovec)); + iov_ptr->iov_base = ptr; + iov_ptr->iov_len = hdr.section_size; + + iov_PCR0 = g_list_append(iov_PCR0, iov_ptr); + iov_PCR1 = g_list_append(iov_PCR1, iov_ptr); + + if (!read_eif_kernel(f, hdr.section_size, kernel_path, ptr, &crc, + errp)) { + goto cleanup; + } + + break; + case EIF_SECTION_CMDLINE: + { + uint64_t size; + uint8_t *cmdline_copy; + if (seen_sections[EIF_SECTION_CMDLINE]) { + error_setg(errp, "Invalid EIF image. More than 1 cmdline " + "section"); + goto cleanup; + } + size = hdr.section_size; + *cmdline = g_malloc(size + 1); + if (!read_eif_cmdline(f, size, *cmdline, &crc, errp)) { + goto cleanup; + } + (*cmdline)[size] = '\0'; + + /* + * We make a copy of '*cmdline' for putting it in iovecs so that + * we can easily free all the iovec entries later as we cannot + * free '*cmdline' which is used by the caller. + */ + cmdline_copy = g_memdup2(*cmdline, size); + + iov_ptr = g_malloc(sizeof(struct iovec)); + iov_ptr->iov_base = cmdline_copy; + iov_ptr->iov_len = size; + + iov_PCR0 = g_list_append(iov_PCR0, iov_ptr); + iov_PCR1 = g_list_append(iov_PCR1, iov_ptr); + break; + } + case EIF_SECTION_RAMDISK: + { + if (!seen_sections[EIF_SECTION_RAMDISK]) { + /* + * If this is the first time we are seeing a ramdisk section, + * we need to create the initrd temporary file. + */ + if (!get_tmp_file("eif-initrd-XXXXXX", initrd_path, errp)) { + goto cleanup; + } + initrd_path_f = fopen(*initrd_path, "wb"); + if (initrd_path_f == NULL) { + error_setg_errno(errp, errno, "Failed to open file %s", + *initrd_path); + goto cleanup; + } + } + + ptr = g_malloc(hdr.section_size); + + iov_ptr = g_malloc(sizeof(struct iovec)); + iov_ptr->iov_base = ptr; + iov_ptr->iov_len = hdr.section_size; + + iov_PCR0 = g_list_append(iov_PCR0, iov_ptr); + /* + * If it's the first ramdisk, we need to hash it into bootstrap + * i.e., iov_PCR1, otherwise we need to hash it into app i.e., + * iov_PCR2. + */ + if (!seen_sections[EIF_SECTION_RAMDISK]) { + iov_PCR1 = g_list_append(iov_PCR1, iov_ptr); + } else { + iov_PCR2 = g_list_append(iov_PCR2, iov_ptr); + } + + if (!read_eif_ramdisk(f, initrd_path_f, hdr.section_size, ptr, + &crc, errp)) { + goto cleanup; + } + + break; + } + case EIF_SECTION_SIGNATURE: + *signature_found = true; + if (!get_signature_fingerprint_sha384(f, hdr.section_size, + fingerprint_sha384, &crc, + errp)) { + goto cleanup; + } + break; + default: + /* other sections including invalid or unknown sections */ + { + uint8_t *buf; + size_t got; + uint64_t size = hdr.section_size; + buf = g_malloc(size); + got = fread(buf, 1, size, f); + if ((uint64_t) got != size) { + g_free(buf); + error_setg(errp, "Failed to read EIF %s section data", + section_type_to_string(section_type)); + goto cleanup; + } + crc = crc32(crc, buf, size); + g_free(buf); + break; + } + } + + if (section_type < EIF_SECTION_MAX) { + seen_sections[section_type] = true; + } + } + + if (!seen_sections[EIF_SECTION_KERNEL]) { + error_setg(errp, "Invalid EIF image. No kernel section."); + goto cleanup; + } + if (!seen_sections[EIF_SECTION_CMDLINE]) { + error_setg(errp, "Invalid EIF image. No cmdline section."); + goto cleanup; + } + if (!seen_sections[EIF_SECTION_RAMDISK]) { + error_setg(errp, "Invalid EIF image. No ramdisk section."); + goto cleanup; + } + + if (eif_header.eif_crc32 != crc) { + error_setg(errp, "CRC mismatch. Expected %u but header has %u.", + crc, eif_header.eif_crc32); + goto cleanup; + } + + /* + * Let's append the initrd file from "-initrd" option if any. Although + * we pass the crc pointer to read_eif_ramdisk, it is not useful anymore. + * We have already done the crc mismatch check above this code. + */ + if (machine_initrd) { + machine_initrd_f = fopen(machine_initrd, "rb"); + if (machine_initrd_f == NULL) { + error_setg_errno(errp, errno, "Failed to open initrd file %s", + machine_initrd); + goto cleanup; + } + + machine_initrd_size = get_file_size(machine_initrd_f, errp); + if (machine_initrd_size == -1) { + goto cleanup; + } + + ptr = g_malloc(machine_initrd_size); + + iov_ptr = g_malloc(sizeof(struct iovec)); + iov_ptr->iov_base = ptr; + iov_ptr->iov_len = machine_initrd_size; + + iov_PCR0 = g_list_append(iov_PCR0, iov_ptr); + iov_PCR2 = g_list_append(iov_PCR2, iov_ptr); + + if (!read_eif_ramdisk(machine_initrd_f, initrd_path_f, + machine_initrd_size, ptr, &crc, errp)) { + goto cleanup; + } + } + + if (!get_SHA384_digest(iov_PCR0, image_sha384, errp)) { + goto cleanup; + } + if (!get_SHA384_digest(iov_PCR1, bootstrap_sha384, errp)) { + goto cleanup; + } + if (!get_SHA384_digest(iov_PCR2, app_sha384, errp)) { + goto cleanup; + } + + /* + * We only need to free iov_PCR0 entries because iov_PCR1 and + * iov_PCR2 iovec entries are subsets of iov_PCR0 iovec entries. + */ + g_list_free_full(iov_PCR0, (GDestroyNotify) free_iovec); + g_list_free(iov_PCR1); + g_list_free(iov_PCR2); + fclose(f); + fclose(initrd_path_f); + safe_fclose(machine_initrd_f); + return true; + + cleanup: + g_list_free_full(iov_PCR0, (GDestroyNotify) free_iovec); + g_list_free(iov_PCR1); + g_list_free(iov_PCR2); + + safe_fclose(f); + safe_fclose(initrd_path_f); + safe_fclose(machine_initrd_f); + + safe_unlink(*kernel_path); + g_free(*kernel_path); + *kernel_path = NULL; + + safe_unlink(*initrd_path); + g_free(*initrd_path); + *initrd_path = NULL; + + g_free(*cmdline); + *cmdline = NULL; + + return false; +} diff --git a/hw/core/eif.h b/hw/core/eif.h new file mode 100644 index 0000000000..fed3cb5514 --- /dev/null +++ b/hw/core/eif.h @@ -0,0 +1,22 @@ +/* + * EIF (Enclave Image Format) related helpers + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef HW_CORE_EIF_H +#define HW_CORE_EIF_H + +bool read_eif_file(const char *eif_path, const char *machine_initrd, + char **kernel_path, char **initrd_path, + char **kernel_cmdline, uint8_t *image_sha384, + uint8_t *bootstrap_sha384, uint8_t *app_sha384, + uint8_t *fingerprint_sha384, bool *signature_found, + Error **errp); + +#endif + diff --git a/hw/core/meson.build b/hw/core/meson.build index a3d9bab9f4..9fd0b5aaa5 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -24,6 +24,7 @@ system_ss.add(when: 'CONFIG_REGISTER', if_true: files('register.c')) system_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c')) system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c')) +system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls]) system_ss.add(files( 'cpu-sysemu.c', diff --git a/meson.build b/meson.build index 2739cc4964..5bb519ea15 100644 --- a/meson.build +++ b/meson.build @@ -3137,6 +3137,7 @@ host_kconfig = \ (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ (opengl.found() ? ['CONFIG_OPENGL=y'] : []) + \ (libcbor.found() ? ['CONFIG_LIBCBOR=y'] : []) + \ + (gnutls.found() ? ['CONFIG_GNUTLS=y'] : []) + \ (x11.found() ? ['CONFIG_X11=y'] : []) + \ (fdt.found() ? ['CONFIG_FDT=y'] : []) + \ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ From 1a9867498dddcdfc5021f6ef453d75c347455e32 Mon Sep 17 00:00:00 2001 From: Dorjoy Chowdhury Date: Wed, 9 Oct 2024 03:17:25 +0600 Subject: [PATCH 24/49] core/machine: Make create_default_memdev machine a virtual method This is in preparation for the next commit where the nitro-enclave machine type will need to instead use a memfd backend, for the built-in vhost-user-vsock device to work. Signed-off-by: Dorjoy Chowdhury Reviewed-by: Alexander Graf Link: https://lore.kernel.org/r/20241008211727.49088-5-dorjoychy111@gmail.com Signed-off-by: Paolo Bonzini --- backends/hostmem-memfd.c | 2 -- hw/core/machine.c | 71 +++++++++++++++++++++------------------- include/hw/boards.h | 2 ++ include/sysemu/hostmem.h | 2 ++ 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 6a3c89a12b..9f890a813e 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -18,8 +18,6 @@ #include "qapi/error.h" #include "qom/object.h" -#define TYPE_MEMORY_BACKEND_MEMFD "memory-backend-memfd" - OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD) diff --git a/hw/core/machine.c b/hw/core/machine.c index adaba17eba..222799bc46 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1001,6 +1001,39 @@ void machine_add_audiodev_property(MachineClass *mc) "Audiodev to use for default machine devices"); } +static bool create_default_memdev(MachineState *ms, const char *path, + Error **errp) +{ + Object *obj; + MachineClass *mc = MACHINE_GET_CLASS(ms); + bool r = false; + + obj = object_new(path ? TYPE_MEMORY_BACKEND_FILE : TYPE_MEMORY_BACKEND_RAM); + if (path) { + if (!object_property_set_str(obj, "mem-path", path, errp)) { + goto out; + } + } + if (!object_property_set_int(obj, "size", ms->ram_size, errp)) { + goto out; + } + object_property_add_child(object_get_objects_root(), mc->default_ram_id, + obj); + /* Ensure backend's memory region name is equal to mc->default_ram_id */ + if (!object_property_set_bool(obj, "x-use-canonical-path-for-ramblock-id", + false, errp)) { + goto out; + } + if (!user_creatable_complete(USER_CREATABLE(obj), errp)) { + goto out; + } + r = object_property_set_link(OBJECT(ms), "memory-backend", obj, errp); + +out: + object_unref(obj); + return r; +} + static void machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1020,6 +1053,8 @@ static void machine_class_init(ObjectClass *oc, void *data) */ mc->numa_mem_align_shift = 23; + mc->create_default_memdev = create_default_memdev; + object_class_property_add_str(oc, "kernel", machine_get_kernel, machine_set_kernel); object_class_property_set_description(oc, "kernel", @@ -1413,38 +1448,6 @@ MemoryRegion *machine_consume_memdev(MachineState *machine, return ret; } -static bool create_default_memdev(MachineState *ms, const char *path, Error **errp) -{ - Object *obj; - MachineClass *mc = MACHINE_GET_CLASS(ms); - bool r = false; - - obj = object_new(path ? TYPE_MEMORY_BACKEND_FILE : TYPE_MEMORY_BACKEND_RAM); - if (path) { - if (!object_property_set_str(obj, "mem-path", path, errp)) { - goto out; - } - } - if (!object_property_set_int(obj, "size", ms->ram_size, errp)) { - goto out; - } - object_property_add_child(object_get_objects_root(), mc->default_ram_id, - obj); - /* Ensure backend's memory region name is equal to mc->default_ram_id */ - if (!object_property_set_bool(obj, "x-use-canonical-path-for-ramblock-id", - false, errp)) { - goto out; - } - if (!user_creatable_complete(USER_CREATABLE(obj), errp)) { - goto out; - } - r = object_property_set_link(OBJECT(ms), "memory-backend", obj, errp); - -out: - object_unref(obj); - return r; -} - const char *machine_class_default_cpu_type(MachineClass *mc) { if (mc->valid_cpu_types && !mc->valid_cpu_types[1]) { @@ -1548,7 +1551,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * machine_class->default_ram_id); return; } - if (!create_default_memdev(current_machine, mem_path, errp)) { + + if (!machine_class->create_default_memdev(current_machine, mem_path, + errp)) { return; } } diff --git a/include/hw/boards.h b/include/hw/boards.h index 5966069baa..91f2edd392 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -314,6 +314,8 @@ struct MachineClass { int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx); ram_addr_t (*fixup_ram_size)(ram_addr_t size); uint64_t smbios_memory_device_size; + bool (*create_default_memdev)(MachineState *ms, const char *path, + Error **errp); }; /** diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index de47ae59e4..67f45abe39 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -39,6 +39,8 @@ OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass, */ #define TYPE_MEMORY_BACKEND_FILE "memory-backend-file" +#define TYPE_MEMORY_BACKEND_MEMFD "memory-backend-memfd" + /** * HostMemoryBackendClass: From f1826463d2e82115ebf2b2aa8e041c85cceaec5e Mon Sep 17 00:00:00 2001 From: Dorjoy Chowdhury Date: Wed, 9 Oct 2024 03:17:26 +0600 Subject: [PATCH 25/49] machine/nitro-enclave: New machine type for AWS Nitro Enclaves AWS nitro enclaves[1] is an Amazon EC2[2] feature that allows creating isolated execution environments, called enclaves, from Amazon EC2 instances which are used for processing highly sensitive data. Enclaves have no persistent storage and no external networking. The enclave VMs are based on the Firecracker microvm with a vhost-vsock device for communication with the parent EC2 instance that spawned it and a Nitro Secure Module (NSM) device for cryptographic attestation. The parent instance VM always has CID 3 while the enclave VM gets a dynamic CID. An EIF (Enclave Image Format)[3] file is used to boot an AWS nitro enclave virtual machine. This commit adds support for AWS nitro enclave emulation using a new machine type option '-M nitro-enclave'. This new machine type is based on the 'microvm' machine type, similar to how real nitro enclave VMs are based on Firecracker microvm. For nitro-enclave to boot from an EIF file, the kernel and ramdisk(s) are extracted into a temporary kernel and a temporary initrd file which are then hooked into the regular x86 boot mechanism along with the extracted cmdline. The EIF file path should be provided using the '-kernel' QEMU option. In QEMU, the vsock emulation for nitro enclave is added using vhost-user- vsock as opposed to vhost-vsock. vhost-vsock doesn't support sibling VM communication which is needed for nitro enclaves. So for the vsock communication to CID 3 to work, another process that does the vsock emulation in userspace must be run, for example, vhost-device-vsock[4] from rust-vmm, with necessary vsock communication support in another guest VM with CID 3. Using vhost-user-vsock also enables the possibility to implement some proxying support in the vhost-user-vsock daemon that will forward all the packets to the host machine instead of CID 3 so that users of nitro-enclave can run the necessary applications in their host machine instead of running another whole VM with CID 3. The following mandatory nitro-enclave machine option has been added related to the vhost-user-vsock device. - 'vsock': The chardev id from the '-chardev' option for the vhost-user-vsock device. AWS Nitro Enclaves have built-in Nitro Secure Module (NSM) device which has been added using the virtio-nsm device added in a previous commit. In Nitro Enclaves, all the PCRs start in a known zero state and the first 16 PCRs are locked from boot and reserved. The PCR0, PCR1, PCR2 and PCR8 contain the SHA384 hashes related to the EIF file used to boot the VM for validation. The following optional nitro-enclave machine options have been added related to the NSM device. - 'id': Enclave identifier, reflected in the module-id of the NSM device. If not provided, a default id will be set. - 'parent-role': Parent instance IAM role ARN, reflected in PCR3 of the NSM device. - 'parent-id': Parent instance identifier, reflected in PCR4 of the NSM device. [1] https://docs.aws.amazon.com/enclaves/latest/user/nitro-enclave.html [2] https://aws.amazon.com/ec2/ [3] https://github.com/aws/aws-nitro-enclaves-image-format [4] https://github.com/rust-vmm/vhost-device/tree/main/vhost-device-vsock Signed-off-by: Dorjoy Chowdhury Reviewed-by: Alexander Graf Link: https://lore.kernel.org/r/20241008211727.49088-6-dorjoychy111@gmail.com Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 + configs/devices/i386-softmmu/default.mak | 1 + hw/i386/Kconfig | 10 + hw/i386/meson.build | 1 + hw/i386/microvm.c | 6 +- hw/i386/nitro_enclave.c | 354 +++++++++++++++++++++++ include/hw/i386/microvm.h | 2 + include/hw/i386/nitro_enclave.h | 62 ++++ 8 files changed, 437 insertions(+), 1 deletion(-) create mode 100644 hw/i386/nitro_enclave.c create mode 100644 include/hw/i386/nitro_enclave.h diff --git a/MAINTAINERS b/MAINTAINERS index 19b7d54a19..5d3501ac28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1840,6 +1840,8 @@ M: Dorjoy Chowdhury S: Maintained F: hw/core/eif.c F: hw/core/eif.h +F: hw/i386/nitro_enclave.c +F: include/hw/i386/nitro_enclave.h Machine core M: Eduardo Habkost diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak index 448e3e3b1b..4faf2f0315 100644 --- a/configs/devices/i386-softmmu/default.mak +++ b/configs/devices/i386-softmmu/default.mak @@ -29,3 +29,4 @@ # CONFIG_I440FX=n # CONFIG_Q35=n # CONFIG_MICROVM=n +# CONFIG_NITRO_ENCLAVE=n diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index f4a33b6c08..32818480d2 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -129,6 +129,16 @@ config MICROVM select USB_XHCI_SYSBUS select I8254 +config NITRO_ENCLAVE + default y + depends on I386 && FDT # for MICROVM + depends on LIBCBOR && GNUTLS # for EIF and VIRTIO_NSM + depends on VHOST_USER # for VHOST_USER_VSOCK + select EIF + select MICROVM + select VHOST_USER_VSOCK + select VIRTIO_NSM + config X86_IOMMU bool depends on PC diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 03aad10df7..10bdfde27c 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -15,6 +15,7 @@ i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), if_false: files('amd_iommu-stub.c')) i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) +i386_ss.add(when: 'CONFIG_NITRO_ENCLAVE', if_true: files('nitro_enclave.c')) i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 693099f225..86637afa0f 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -283,6 +283,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) static void microvm_memory_init(MicrovmMachineState *mms) { + MicrovmMachineClass *mmc = MICROVM_MACHINE_GET_CLASS(mms); MachineState *machine = MACHINE(mms); X86MachineState *x86ms = X86_MACHINE(mms); MemoryRegion *ram_below_4g, *ram_above_4g; @@ -328,7 +329,7 @@ static void microvm_memory_init(MicrovmMachineState *mms) rom_set_fw(fw_cfg); if (machine->kernel_filename != NULL) { - x86_load_linux(x86ms, fw_cfg, 0, true); + mmc->x86_load_linux(x86ms, fw_cfg, 0, true); } if (mms->option_roms) { @@ -637,9 +638,12 @@ GlobalProperty microvm_properties[] = { static void microvm_class_init(ObjectClass *oc, void *data) { X86MachineClass *x86mc = X86_MACHINE_CLASS(oc); + MicrovmMachineClass *mmc = MICROVM_MACHINE_CLASS(oc); MachineClass *mc = MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + mmc->x86_load_linux = x86_load_linux; + mc->init = microvm_machine_state_init; mc->family = "microvm_i386"; diff --git a/hw/i386/nitro_enclave.c b/hw/i386/nitro_enclave.c new file mode 100644 index 0000000000..b6263ae127 --- /dev/null +++ b/hw/i386/nitro_enclave.c @@ -0,0 +1,354 @@ +/* + * AWS nitro-enclave machine + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" + +#include "chardev/char.h" +#include "hw/sysbus.h" +#include "hw/core/eif.h" +#include "hw/i386/x86.h" +#include "hw/i386/microvm.h" +#include "hw/i386/nitro_enclave.h" +#include "hw/virtio/virtio-mmio.h" +#include "hw/virtio/virtio-nsm.h" +#include "hw/virtio/vhost-user-vsock.h" +#include "sysemu/hostmem.h" + +static BusState *find_free_virtio_mmio_bus(void) +{ + BusChild *kid; + BusState *bus = sysbus_get_default(); + + QTAILQ_FOREACH(kid, &bus->children, sibling) { + DeviceState *dev = kid->child; + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MMIO)) { + VirtIOMMIOProxy *mmio = VIRTIO_MMIO(OBJECT(dev)); + VirtioBusState *mmio_virtio_bus = &mmio->bus; + BusState *mmio_bus = &mmio_virtio_bus->parent_obj; + if (QTAILQ_EMPTY(&mmio_bus->children)) { + return mmio_bus; + } + } + } + + return NULL; +} + +static void vhost_user_vsock_init(NitroEnclaveMachineState *nems) +{ + DeviceState *dev = qdev_new(TYPE_VHOST_USER_VSOCK); + VHostUserVSock *vsock = VHOST_USER_VSOCK(dev); + BusState *bus; + + if (!nems->vsock) { + error_report("A valid chardev id for vhost-user-vsock device must be " + "provided using the 'vsock' machine option"); + exit(1); + } + + bus = find_free_virtio_mmio_bus(); + if (!bus) { + error_report("Failed to find bus for vhost-user-vsock device"); + exit(1); + } + + Chardev *chardev = qemu_chr_find(nems->vsock); + if (!chardev) { + error_report("Failed to find chardev with id %s", nems->vsock); + exit(1); + } + + vsock->conf.chardev.chr = chardev; + + qdev_realize_and_unref(dev, bus, &error_fatal); +} + +static void virtio_nsm_init(NitroEnclaveMachineState *nems) +{ + DeviceState *dev = qdev_new(TYPE_VIRTIO_NSM); + VirtIONSM *vnsm = VIRTIO_NSM(dev); + BusState *bus = find_free_virtio_mmio_bus(); + + if (!bus) { + error_report("Failed to find bus for virtio-nsm device."); + exit(1); + } + + qdev_prop_set_string(dev, "module-id", nems->id); + + qdev_realize_and_unref(dev, bus, &error_fatal); + nems->vnsm = vnsm; +} + +static void nitro_enclave_devices_init(NitroEnclaveMachineState *nems) +{ + vhost_user_vsock_init(nems); + virtio_nsm_init(nems); +} + +static void nitro_enclave_machine_state_init(MachineState *machine) +{ + NitroEnclaveMachineClass *ne_class = + NITRO_ENCLAVE_MACHINE_GET_CLASS(machine); + NitroEnclaveMachineState *ne_state = NITRO_ENCLAVE_MACHINE(machine); + + ne_class->parent_init(machine); + nitro_enclave_devices_init(ne_state); +} + +static void nitro_enclave_machine_reset(MachineState *machine, ResetType type) +{ + NitroEnclaveMachineClass *ne_class = + NITRO_ENCLAVE_MACHINE_GET_CLASS(machine); + NitroEnclaveMachineState *ne_state = NITRO_ENCLAVE_MACHINE(machine); + + ne_class->parent_reset(machine, type); + + memset(ne_state->vnsm->pcrs, 0, sizeof(ne_state->vnsm->pcrs)); + + /* PCR0 */ + ne_state->vnsm->extend_pcr(ne_state->vnsm, 0, ne_state->image_sha384, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + /* PCR1 */ + ne_state->vnsm->extend_pcr(ne_state->vnsm, 1, ne_state->bootstrap_sha384, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + /* PCR2 */ + ne_state->vnsm->extend_pcr(ne_state->vnsm, 2, ne_state->app_sha384, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + /* PCR3 */ + if (ne_state->parent_role) { + ne_state->vnsm->extend_pcr(ne_state->vnsm, 3, + (uint8_t *) ne_state->parent_role, + strlen(ne_state->parent_role)); + } + /* PCR4 */ + if (ne_state->parent_id) { + ne_state->vnsm->extend_pcr(ne_state->vnsm, 4, + (uint8_t *) ne_state->parent_id, + strlen(ne_state->parent_id)); + } + /* PCR8 */ + if (ne_state->signature_found) { + ne_state->vnsm->extend_pcr(ne_state->vnsm, 8, + ne_state->fingerprint_sha384, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + } + + /* First 16 PCRs are locked from boot and reserved for nitro enclave */ + for (int i = 0; i < 16; ++i) { + ne_state->vnsm->lock_pcr(ne_state->vnsm, i); + } +} + +static void nitro_enclave_machine_initfn(Object *obj) +{ + MicrovmMachineState *mms = MICROVM_MACHINE(obj); + X86MachineState *x86ms = X86_MACHINE(obj); + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + nems->id = g_strdup("i-234-enc5678"); + + /* AWS nitro enclaves have PCIE and ACPI disabled */ + mms->pcie = ON_OFF_AUTO_OFF; + x86ms->acpi = ON_OFF_AUTO_OFF; +} + +static void x86_load_eif(X86MachineState *x86ms, FWCfgState *fw_cfg, + int acpi_data_size, bool pvh_enabled) +{ + Error *err = NULL; + char *eif_kernel, *eif_initrd, *eif_cmdline; + MachineState *machine = MACHINE(x86ms); + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(x86ms); + + if (!read_eif_file(machine->kernel_filename, machine->initrd_filename, + &eif_kernel, &eif_initrd, &eif_cmdline, + nems->image_sha384, nems->bootstrap_sha384, + nems->app_sha384, nems->fingerprint_sha384, + &(nems->signature_found), &err)) { + error_report_err(err); + exit(1); + } + + g_free(machine->kernel_filename); + machine->kernel_filename = eif_kernel; + g_free(machine->initrd_filename); + machine->initrd_filename = eif_initrd; + + /* + * If kernel cmdline argument was provided, let's concatenate it to the + * extracted EIF kernel cmdline. + */ + if (machine->kernel_cmdline != NULL) { + char *cmd = g_strdup_printf("%s %s", eif_cmdline, + machine->kernel_cmdline); + g_free(eif_cmdline); + g_free(machine->kernel_cmdline); + machine->kernel_cmdline = cmd; + } else { + machine->kernel_cmdline = eif_cmdline; + } + + x86_load_linux(x86ms, fw_cfg, 0, true); + + unlink(machine->kernel_filename); + unlink(machine->initrd_filename); + return; +} + +static bool create_memfd_backend(MachineState *ms, const char *path, + Error **errp) +{ + Object *obj; + MachineClass *mc = MACHINE_GET_CLASS(ms); + bool r = false; + + obj = object_new(TYPE_MEMORY_BACKEND_MEMFD); + if (!object_property_set_int(obj, "size", ms->ram_size, errp)) { + goto out; + } + object_property_add_child(object_get_objects_root(), mc->default_ram_id, + obj); + + if (!user_creatable_complete(USER_CREATABLE(obj), errp)) { + goto out; + } + r = object_property_set_link(OBJECT(ms), "memory-backend", obj, errp); + +out: + object_unref(obj); + return r; +} + +static char *nitro_enclave_get_vsock_chardev_id(Object *obj, Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + return g_strdup(nems->vsock); +} + +static void nitro_enclave_set_vsock_chardev_id(Object *obj, const char *value, + Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + g_free(nems->vsock); + nems->vsock = g_strdup(value); +} + +static char *nitro_enclave_get_id(Object *obj, Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + return g_strdup(nems->id); +} + +static void nitro_enclave_set_id(Object *obj, const char *value, + Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + g_free(nems->id); + nems->id = g_strdup(value); +} + +static char *nitro_enclave_get_parent_role(Object *obj, Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + return g_strdup(nems->parent_role); +} + +static void nitro_enclave_set_parent_role(Object *obj, const char *value, + Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + g_free(nems->parent_role); + nems->parent_role = g_strdup(value); +} + +static char *nitro_enclave_get_parent_id(Object *obj, Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + return g_strdup(nems->parent_id); +} + +static void nitro_enclave_set_parent_id(Object *obj, const char *value, + Error **errp) +{ + NitroEnclaveMachineState *nems = NITRO_ENCLAVE_MACHINE(obj); + + g_free(nems->parent_id); + nems->parent_id = g_strdup(value); +} + +static void nitro_enclave_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + MicrovmMachineClass *mmc = MICROVM_MACHINE_CLASS(oc); + NitroEnclaveMachineClass *nemc = NITRO_ENCLAVE_MACHINE_CLASS(oc); + + mmc->x86_load_linux = x86_load_eif; + + mc->family = "nitro_enclave_i386"; + mc->desc = "AWS Nitro Enclave"; + + nemc->parent_init = mc->init; + mc->init = nitro_enclave_machine_state_init; + + nemc->parent_reset = mc->reset; + mc->reset = nitro_enclave_machine_reset; + + mc->create_default_memdev = create_memfd_backend; + + object_class_property_add_str(oc, NITRO_ENCLAVE_VSOCK_CHARDEV_ID, + nitro_enclave_get_vsock_chardev_id, + nitro_enclave_set_vsock_chardev_id); + object_class_property_set_description(oc, NITRO_ENCLAVE_VSOCK_CHARDEV_ID, + "Set chardev id for vhost-user-vsock " + "device"); + + object_class_property_add_str(oc, NITRO_ENCLAVE_ID, nitro_enclave_get_id, + nitro_enclave_set_id); + object_class_property_set_description(oc, NITRO_ENCLAVE_ID, + "Set enclave identifier"); + + object_class_property_add_str(oc, NITRO_ENCLAVE_PARENT_ROLE, + nitro_enclave_get_parent_role, + nitro_enclave_set_parent_role); + object_class_property_set_description(oc, NITRO_ENCLAVE_PARENT_ROLE, + "Set parent instance IAM role ARN"); + + object_class_property_add_str(oc, NITRO_ENCLAVE_PARENT_ID, + nitro_enclave_get_parent_id, + nitro_enclave_set_parent_id); + object_class_property_set_description(oc, NITRO_ENCLAVE_PARENT_ID, + "Set parent instance identifier"); +} + +static const TypeInfo nitro_enclave_machine_info = { + .name = TYPE_NITRO_ENCLAVE_MACHINE, + .parent = TYPE_MICROVM_MACHINE, + .instance_size = sizeof(NitroEnclaveMachineState), + .instance_init = nitro_enclave_machine_initfn, + .class_size = sizeof(NitroEnclaveMachineClass), + .class_init = nitro_enclave_class_init, +}; + +static void nitro_enclave_machine_init(void) +{ + type_register_static(&nitro_enclave_machine_info); +} +type_init(nitro_enclave_machine_init); diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h index fad97a891d..b9ac34a3ef 100644 --- a/include/hw/i386/microvm.h +++ b/include/hw/i386/microvm.h @@ -78,6 +78,8 @@ struct MicrovmMachineClass { X86MachineClass parent; HotplugHandler *(*orig_hotplug_handler)(MachineState *machine, DeviceState *dev); + void (*x86_load_linux)(X86MachineState *x86ms, FWCfgState *fw_cfg, + int acpi_data_size, bool pvh_enabled); }; struct MicrovmMachineState { diff --git a/include/hw/i386/nitro_enclave.h b/include/hw/i386/nitro_enclave.h new file mode 100644 index 0000000000..b65875033c --- /dev/null +++ b/include/hw/i386/nitro_enclave.h @@ -0,0 +1,62 @@ +/* + * AWS nitro-enclave machine + * + * Copyright (c) 2024 Dorjoy Chowdhury + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#ifndef HW_I386_NITRO_ENCLAVE_H +#define HW_I386_NITRO_ENCLAVE_H + +#include "crypto/hash.h" +#include "hw/i386/microvm.h" +#include "qom/object.h" +#include "hw/virtio/virtio-nsm.h" + +/* Machine type options */ +#define NITRO_ENCLAVE_VSOCK_CHARDEV_ID "vsock" +#define NITRO_ENCLAVE_ID "id" +#define NITRO_ENCLAVE_PARENT_ROLE "parent-role" +#define NITRO_ENCLAVE_PARENT_ID "parent-id" + +struct NitroEnclaveMachineClass { + MicrovmMachineClass parent; + + void (*parent_init)(MachineState *state); + void (*parent_reset)(MachineState *machine, ResetType type); +}; + +struct NitroEnclaveMachineState { + MicrovmMachineState parent; + + /* Machine type options */ + char *vsock; + /* Enclave identifier */ + char *id; + /* Parent instance IAM role ARN */ + char *parent_role; + /* Parent instance identifier */ + char *parent_id; + + /* Machine state */ + VirtIONSM *vnsm; + + /* kernel + ramdisks + cmdline sha384 hash */ + uint8_t image_sha384[QCRYPTO_HASH_DIGEST_LEN_SHA384]; + /* kernel + boot ramdisk + cmdline sha384 hash */ + uint8_t bootstrap_sha384[QCRYPTO_HASH_DIGEST_LEN_SHA384]; + /* application ramdisk(s) hash */ + uint8_t app_sha384[QCRYPTO_HASH_DIGEST_LEN_SHA384]; + /* certificate fingerprint hash */ + uint8_t fingerprint_sha384[QCRYPTO_HASH_DIGEST_LEN_SHA384]; + bool signature_found; +}; + +#define TYPE_NITRO_ENCLAVE_MACHINE MACHINE_TYPE_NAME("nitro-enclave") +OBJECT_DECLARE_TYPE(NitroEnclaveMachineState, NitroEnclaveMachineClass, + NITRO_ENCLAVE_MACHINE) + +#endif From 05bad41ba96bb1de2403e845038e4195693d5272 Mon Sep 17 00:00:00 2001 From: Dorjoy Chowdhury Date: Wed, 9 Oct 2024 03:17:27 +0600 Subject: [PATCH 26/49] docs/nitro-enclave: Documentation for nitro-enclave machine type Signed-off-by: Dorjoy Chowdhury Reviewed-by: Alexander Graf Link: https://lore.kernel.org/r/20241008211727.49088-7-dorjoychy111@gmail.com Signed-off-by: Paolo Bonzini --- MAINTAINERS | 1 + docs/system/i386/nitro-enclave.rst | 78 ++++++++++++++++++++++++++++++ docs/system/target-i386.rst | 3 +- tests/qtest/libqtest.c | 3 +- 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 docs/system/i386/nitro-enclave.rst diff --git a/MAINTAINERS b/MAINTAINERS index 5d3501ac28..4e5018a5e1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1842,6 +1842,7 @@ F: hw/core/eif.c F: hw/core/eif.h F: hw/i386/nitro_enclave.c F: include/hw/i386/nitro_enclave.h +F: docs/system/i386/nitro-enclave.rst Machine core M: Eduardo Habkost diff --git a/docs/system/i386/nitro-enclave.rst b/docs/system/i386/nitro-enclave.rst new file mode 100644 index 0000000000..73e3edefe5 --- /dev/null +++ b/docs/system/i386/nitro-enclave.rst @@ -0,0 +1,78 @@ +'nitro-enclave' virtual machine (``nitro-enclave``) +=================================================== + +``nitro-enclave`` is a machine type which emulates an *AWS nitro enclave* +virtual machine. `AWS nitro enclaves`_ is an Amazon EC2 feature that allows +creating isolated execution environments, called enclaves, from Amazon EC2 +instances which are used for processing highly sensitive data. Enclaves have +no persistent storage and no external networking. The enclave VMs are based +on Firecracker microvm with a vhost-vsock device for communication with the +parent EC2 instance that spawned it and a Nitro Secure Module (NSM) device +for cryptographic attestation. The parent instance VM always has CID 3 while +the enclave VM gets a dynamic CID. Enclaves use an EIF (`Enclave Image Format`_) +file which contains the necessary kernel, cmdline and ramdisk(s) to boot. + +In QEMU, ``nitro-enclave`` is a machine type based on ``microvm`` similar to how +AWS nitro enclaves are based on `Firecracker`_ microvm. This is useful for +local testing of EIF files using QEMU instead of running real AWS Nitro Enclaves +which can be difficult for debugging due to its roots in security. The vsock +device emulation is done using vhost-user-vsock which means another process that +can do the userspace emulation, like `vhost-device-vsock`_ from rust-vmm crate, +must be run alongside nitro-enclave for the vsock communication to work. + +``libcbor`` and ``gnutls`` are required dependencies for nitro-enclave machine +support to be added when building QEMU from source. + +.. _AWS nitro enclaves: https://docs.aws.amazon.com/enclaves/latest/user/nitro-enclave.html +.. _Enclave Image Format: https://github.com/aws/aws-nitro-enclaves-image-format +.. _vhost-device-vsock: https://github.com/rust-vmm/vhost-device/tree/main/vhost-device-vsock +.. _Firecracker: https://firecracker-microvm.github.io + +Using the nitro-enclave machine type +------------------------------------ + +Machine-specific options +~~~~~~~~~~~~~~~~~~~~~~~~ + +It supports the following machine-specific options: + +- nitro-enclave.vsock=string (required) (Id of the chardev from '-chardev' option that vhost-user-vsock device will use) +- nitro-enclave.id=string (optional) (Set enclave identifier) +- nitro-enclave.parent-role=string (optional) (Set parent instance IAM role ARN) +- nitro-enclave.parent-id=string (optional) (Set parent instance identifier) + + +Running a nitro-enclave VM +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, run `vhost-device-vsock`__ (or a similar tool that supports vhost-user-vsock). +The forward-cid option below with value 1 forwards all connections from the enclave +VM to the host machine and the forward-listen (port numbers separated by '+') is used +for forwarding connections from the host machine to the enclave VM. + +__ https://github.com/rust-vmm/vhost-device/tree/main/vhost-device-vsock#using-the-vsock-backend + + $ vhost-device-vsock \ + --vm guest-cid=4,forward-cid=1,forward-listen=9001+9002,socket=/tmp/vhost4.socket + +Now run the necessary applications on the host machine so that the nitro-enclave VM +applications' vsock communication works. For example, the nitro-enclave VM's init +process connects to CID 3 and sends a single byte hello heartbeat (0xB7) to let the +parent VM know that it booted expecting a heartbeat (0xB7) response. So you must run +a AF_VSOCK server on the host machine that listens on port 9000 and sends the heartbeat +after it receives the heartbeat for enclave VM to boot successfully. You should run all +the applications on the host machine that would typically be running in the parent EC2 +VM for successful communication with the enclave VM. + +Then run the nitro-enclave VM using the following command where ``hello.eif`` is +an EIF file you would use to spawn a real AWS nitro enclave virtual machine: + + $ qemu-system-x86_64 -M nitro-enclave,vsock=c,id=hello-world \ + -kernel hello-world.eif -nographic -m 4G --enable-kvm -cpu host \ + -chardev socket,id=c,path=/tmp/vhost4.socket + +In this example, the nitro-enclave VM has CID 4. If there are applications that +connect to the enclave VM, run them on the host machine after enclave VM starts. +You need to modify the applications to connect to CID 1 (instead of the enclave +VM's CID) and use the forward-listen (e.g., 9001+9002) option of vhost-device-vsock +to forward the ports they connect to. diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst index 23e84e3ba7..ab7af1a75d 100644 --- a/docs/system/target-i386.rst +++ b/docs/system/target-i386.rst @@ -14,8 +14,9 @@ Board-specific documentation .. toctree:: :maxdepth: 1 - i386/microvm i386/pc + i386/microvm + i386/nitro-enclave Architectural features ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index 9d07de1fbd..817fd7aac5 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -1648,7 +1648,8 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), /* Ignore machines that cannot be used for qtests */ if (!strncmp("xenfv", machines[i].name, 5) || g_str_equal("xenpv", machines[i].name) || - g_str_equal("xenpvh", machines[i].name)) { + g_str_equal("xenpvh", machines[i].name) || + g_str_equal("nitro-enclave", machines[i].name)) { continue; } if (!skip_old_versioned || From cf4344639bbedb3f941488af75f44da9aa3d8650 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Oct 2024 15:09:52 +0100 Subject: [PATCH 27/49] ci: always invoke meson through pyvenv Do not assume that the distro-installed meson is compatible with the one in the virtual environment. Signed-off-by: Paolo Bonzini --- .gitlab-ci.d/buildtest-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml index 8c69c60d21..39da7698b0 100644 --- a/.gitlab-ci.d/buildtest-template.yml +++ b/.gitlab-ci.d/buildtest-template.yml @@ -66,7 +66,7 @@ - source scripts/ci/gitlab-ci-section - section_start buildenv "Setting up to run tests" - scripts/git-submodule.sh update roms/SLOF - - meson subprojects download $(cd build/subprojects && echo *) + - build/pyvenv/bin/meson subprojects download $(cd build/subprojects && echo *) - cd build - find . -type f -exec touch {} + # Avoid recompiling by hiding ninja with NINJA=":" From a635390f05f37e6e1d3ce2850a1d8564be0081ad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 3 Sep 2024 09:50:00 +0200 Subject: [PATCH 28/49] target/i386: use tcg_gen_ext_tl when applicable Prefer it to gen_ext_tl in the common case where the destination is known. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index ef190416b4..dc308f3104 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -883,16 +883,16 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) case CC_OP_SUBB ... CC_OP_SUBQ: /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_SUBB; - gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false); - gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size); + tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size); return (CCPrepare) { .cond = TCG_COND_LTU, .reg = s->cc_srcT, .reg2 = cpu_cc_src, .use_reg2 = true }; case CC_OP_ADDB ... CC_OP_ADDQ: /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_ADDB; - gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size, false); - gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + tcg_gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size); + tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size); return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst, .reg2 = cpu_cc_src, .use_reg2 = true }; @@ -1041,8 +1041,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) size = s->cc_op - CC_OP_SUBB; switch (jcc_op) { case JCC_BE: - gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false); - gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size); + tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size); cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->cc_srcT, .reg2 = cpu_cc_src, .use_reg2 = true }; break; @@ -1052,8 +1052,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) case JCC_LE: cond = TCG_COND_LE; fast_jcc_l: - gen_ext_tl(s->cc_srcT, s->cc_srcT, size, true); - gen_ext_tl(cpu_cc_src, cpu_cc_src, size, true); + tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size | MO_SIGN); + tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size | MO_SIGN); cc = (CCPrepare) { .cond = cond, .reg = s->cc_srcT, .reg2 = cpu_cc_src, .use_reg2 = true }; break; From c2954745f2c1ed0c8b8fe7c47c2b09479e3c6d81 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Jun 2024 19:51:11 -0700 Subject: [PATCH 29/49] target/i386: Tidy cc_op_str usage Make const. Use the read-only strings directly; do not copy them into an on-stack buffer with snprintf. Allow for holes in the cc_op_str array, now present with CC_OP_POPCNT. Fixes: 460231ad369 ("target/i386: give CC_OP_POPCNT low bits corresponding to MO_TL") Signed-off-by: Richard Henderson Link: https://lore.kernel.org/r/20240701025115.1265117-2-richard.henderson@linaro.org Signed-off-by: Paolo Bonzini --- target/i386/cpu-dump.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c index 3bb8e44091..dc6723aede 100644 --- a/target/i386/cpu-dump.c +++ b/target/i386/cpu-dump.c @@ -27,7 +27,7 @@ /***********************************************************/ /* x86 debug */ -static const char *cc_op_str[CC_OP_NB] = { +static const char * const cc_op_str[] = { [CC_OP_DYNAMIC] = "DYNAMIC", [CC_OP_EFLAGS] = "EFLAGS", @@ -347,7 +347,6 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; int eflags, i, nb; - char cc_op_name[32]; static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" }; eflags = cpu_compute_eflags(env); @@ -456,10 +455,16 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags) env->dr[6], env->dr[7]); } if (flags & CPU_DUMP_CCOP) { - if ((unsigned)env->cc_op < CC_OP_NB) - snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]); - else - snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op); + const char *cc_op_name = NULL; + char cc_op_buf[32]; + + if ((unsigned)env->cc_op < ARRAY_SIZE(cc_op_str)) { + cc_op_name = cc_op_str[env->cc_op]; + } + if (cc_op_name == NULL) { + snprintf(cc_op_buf, sizeof(cc_op_buf), "[%d]", env->cc_op); + cc_op_name = cc_op_buf; + } #ifdef TARGET_X86_64 if (env->hflags & HF_CS64_MASK) { qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%s\n", From e09447c39f3afad9920f2a1764e284976767d96e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Jun 2024 10:16:47 +0200 Subject: [PATCH 30/49] target/i386: remove CC_OP_CLR Just use CC_OP_EFLAGS; it is not that likely that the flags computed by CC_OP_CLR survive the end of the basic block, in which case there is no need to spill cc_op_src. cc_op_src now does need spilling if the XOR is followed by a memory operation, but this only costs 0.2% extra TCG ops. They will be recouped by simplifications in how QEMU evaluates ZF at runtime, which are even greater with this change. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/cpu-dump.c | 1 - target/i386/cpu.h | 1 - target/i386/tcg/cc_helper.c | 3 --- target/i386/tcg/emit.c.inc | 15 ++++----------- target/i386/tcg/translate.c | 10 ---------- 5 files changed, 4 insertions(+), 26 deletions(-) diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c index dc6723aede..a72ed93bd2 100644 --- a/target/i386/cpu-dump.c +++ b/target/i386/cpu-dump.c @@ -91,7 +91,6 @@ static const char * const cc_op_str[] = { [CC_OP_BMILGQ] = "BMILGQ", [CC_OP_POPCNT] = "POPCNT", - [CC_OP_CLR] = "CLR", }; static void diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e2e10f55b2..50172d7a6f 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1304,7 +1304,6 @@ typedef enum { CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ CC_OP_ADOX, /* CC_SRC2 = O, CC_SRC = rest. */ CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ - CC_OP_CLR, /* Z and P set, all other flags clear. */ CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ CC_OP_MULW, diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index dbddaa2fcb..40583c04cf 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -104,8 +104,6 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, case CC_OP_EFLAGS: return src1; - case CC_OP_CLR: - return CC_Z | CC_P; case CC_OP_POPCNT: return dst ? 0 : CC_Z; @@ -243,7 +241,6 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_LOGICW: case CC_OP_LOGICL: case CC_OP_LOGICQ: - case CC_OP_CLR: case CC_OP_POPCNT: return 0; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index fd17a9b1ec..790307dbba 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1452,19 +1452,12 @@ static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv * C is the result of the test, Z is unchanged, and the others * are all undefined. */ - switch (s->cc_op) { - case CC_OP_DYNAMIC: - case CC_OP_CLR: - case CC_OP_EFLAGS: - case CC_OP_ADCX: - case CC_OP_ADOX: - case CC_OP_ADCOX: + if (s->cc_op == CC_OP_DYNAMIC || CC_OP_HAS_EFLAGS(s->cc_op)) { /* Generate EFLAGS and replace the C bit. */ cf = tcg_temp_new(); tcg_gen_setcond_tl(TCG_COND_TSTNE, cf, src, mask); prepare_update_cf(decode, s, cf); - break; - default: + } else { /* * Z was going to be computed from the non-zero status of CC_DST. * We can get that same Z value (and the new C value) by leaving @@ -1475,7 +1468,6 @@ static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv decode->cc_dst = cpu_cc_dst; decode->cc_op = ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB; tcg_gen_shr_tl(decode->cc_src, src, s->T1); - break; } } @@ -4724,7 +4716,8 @@ static void gen_XOR(DisasContext *s, X86DecodedInsn *decode) decode->op[2].unit == X86_OP_INT && decode->op[1].n == decode->op[2].n) { tcg_gen_movi_tl(s->T0, 0); - decode->cc_op = CC_OP_CLR; + decode->cc_op = CC_OP_EFLAGS; + decode->cc_src = tcg_constant_tl(CC_Z | CC_P); } else { MemOp ot = decode->op[1].ot; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index dc308f3104..a20fbb019c 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -309,7 +309,6 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC, [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, - [CC_OP_CLR] = 0, [CC_OP_POPCNT] = USES_CC_DST, }; @@ -803,10 +802,6 @@ static void gen_mov_eflags(DisasContext *s, TCGv reg) tcg_gen_mov_tl(reg, cpu_cc_src); return; } - if (s->cc_op == CC_OP_CLR) { - tcg_gen_movi_tl(reg, CC_Z | CC_P); - return; - } dst = cpu_cc_dst; src1 = cpu_cc_src; @@ -897,7 +892,6 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) .reg2 = cpu_cc_src, .use_reg2 = true }; case CC_OP_LOGICB ... CC_OP_LOGICQ: - case CC_OP_CLR: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER }; @@ -969,7 +963,6 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, .imm = CC_S }; - case CC_OP_CLR: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER }; default: @@ -988,7 +981,6 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, .no_setcond = true }; - case CC_OP_CLR: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER }; case CC_OP_MULB ... CC_OP_MULQ: @@ -1013,8 +1005,6 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, .imm = CC_Z }; - case CC_OP_CLR: - return (CCPrepare) { .cond = TCG_COND_ALWAYS }; default: { MemOp size = (s->cc_op - CC_OP_ADDB) & 3; From ee806f9f67ba908187a7d273d96ded398ae9192d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Jul 2024 14:31:56 +0200 Subject: [PATCH 31/49] target/i386: Rearrange CCOp Give the first few enumerators explicit integer constants, align the BWLQ enumerators. This will be used to simplify ((op - CC_OP_*B) & 3). Signed-off-by: Richard Henderson Link: https://lore.kernel.org/r/20240701025115.1265117-4-richard.henderson@linaro.org Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 50172d7a6f..6ec6338047 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1299,11 +1299,10 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); * are only needed for conditional branches. */ typedef enum { - CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ - CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ - CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ - CC_OP_ADOX, /* CC_SRC2 = O, CC_SRC = rest. */ - CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ + CC_OP_EFLAGS = 0, /* all cc are explicitly computed, CC_SRC = flags */ + CC_OP_ADCX = 1, /* CC_DST = C, CC_SRC = rest. */ + CC_OP_ADOX = 2, /* CC_SRC2 = O, CC_SRC = rest. */ + CC_OP_ADCOX = 3, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ CC_OP_MULW, @@ -1376,9 +1375,12 @@ typedef enum { CC_OP_POPCNTQ__, CC_OP_POPCNT = sizeof(target_ulong) == 8 ? CC_OP_POPCNTQ__ : CC_OP_POPCNTL__, + CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_NB, } CCOp; -QEMU_BUILD_BUG_ON(CC_OP_NB >= 128); + +/* See X86DecodedInsn.cc_op, using int8_t. */ +QEMU_BUILD_BUG_ON(CC_OP_DYNAMIC > INT8_MAX); typedef struct SegmentCache { uint32_t selector; From f359b2fb71c379db28a5184b565f43af6b5ec268 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 15 Jul 2024 14:34:29 +0200 Subject: [PATCH 32/49] target/i386: Introduce cc_op_size Replace arithmetic on cc_op with a helper function. Assert that the op has a size and that it is valid for the configuration. Signed-off-by: Richard Henderson Link: https://lore.kernel.org/r/20240701025115.1265117-6-richard.henderson@linaro.org Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 17 ++++++++++++++++- target/i386/tcg/emit.c.inc | 5 +++-- target/i386/tcg/translate.c | 17 +++++++---------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6ec6338047..1bf4dfdc5b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -24,6 +24,7 @@ #include "cpu-qom.h" #include "kvm/hyperv-proto.h" #include "exec/cpu-defs.h" +#include "exec/memop.h" #include "hw/i386/topology.h" #include "qapi/qapi-types-common.h" #include "qemu/cpu-float.h" @@ -1304,7 +1305,9 @@ typedef enum { CC_OP_ADOX = 2, /* CC_SRC2 = O, CC_SRC = rest. */ CC_OP_ADCOX = 3, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ - CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ + /* Low 2 bits = MemOp constant for the size */ +#define CC_OP_FIRST_BWLQ CC_OP_MULB + CC_OP_MULB = 4, /* modify all flags, C, O = (CC_SRC != 0) */ CC_OP_MULW, CC_OP_MULL, CC_OP_MULQ, @@ -1374,6 +1377,7 @@ typedef enum { CC_OP_POPCNTL__, CC_OP_POPCNTQ__, CC_OP_POPCNT = sizeof(target_ulong) == 8 ? CC_OP_POPCNTQ__ : CC_OP_POPCNTL__, +#define CC_OP_LAST_BWLQ CC_OP_POPCNTQ__ CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_NB, @@ -1382,6 +1386,17 @@ typedef enum { /* See X86DecodedInsn.cc_op, using int8_t. */ QEMU_BUILD_BUG_ON(CC_OP_DYNAMIC > INT8_MAX); +static inline MemOp cc_op_size(CCOp op) +{ + MemOp size = op & 3; + + QEMU_BUILD_BUG_ON(CC_OP_FIRST_BWLQ & 3); + assert(op >= CC_OP_FIRST_BWLQ && op <= CC_OP_LAST_BWLQ); + assert(size <= MO_TL); + + return size; +} + typedef struct SegmentCache { uint32_t selector; target_ulong base; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 790307dbba..45ac5edb1a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1466,7 +1466,7 @@ static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv */ decode->cc_src = tcg_temp_new(); decode->cc_dst = cpu_cc_dst; - decode->cc_op = ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB; + decode->cc_op = CC_OP_SARB + cc_op_size(s->cc_op); tcg_gen_shr_tl(decode->cc_src, src, s->T1); } } @@ -3346,7 +3346,8 @@ static bool gen_eflags_adcox(DisasContext *s, X86DecodedInsn *decode, bool want_ * bit, we might as well fish CF out of EFLAGS and save a shift. */ if (want_carry && (!need_flags || s->cc_op == CC_OP_SHLB + MO_TL)) { - tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << (s->cc_op - CC_OP_SHLB)) - 1); + MemOp size = cc_op_size(s->cc_op); + tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << size) - 1); got_cf = true; } gen_mov_eflags(s, decode->cc_src); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a20fbb019c..46062002c0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -885,7 +885,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) case CC_OP_ADDB ... CC_OP_ADDQ: /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */ - size = s->cc_op - CC_OP_ADDB; + size = cc_op_size(s->cc_op); tcg_gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size); tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size); return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst, @@ -902,7 +902,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) case CC_OP_SHLB ... CC_OP_SHLQ: /* (CC_SRC >> (DATA_BITS - 1)) & 1 */ - size = s->cc_op - CC_OP_SHLB; + size = cc_op_size(s->cc_op); return gen_prepare_sign_nz(cpu_cc_src, size); case CC_OP_MULB ... CC_OP_MULQ: @@ -910,11 +910,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) .reg = cpu_cc_src }; case CC_OP_BMILGB ... CC_OP_BMILGQ: - size = s->cc_op - CC_OP_BMILGB; + size = cc_op_size(s->cc_op); return gen_prepare_val_nz(cpu_cc_src, size, true); case CC_OP_BLSIB ... CC_OP_BLSIQ: - size = s->cc_op - CC_OP_BLSIB; + size = cc_op_size(s->cc_op); return gen_prepare_val_nz(cpu_cc_src, size, false); case CC_OP_ADCX: @@ -966,10 +966,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER }; default: - { - MemOp size = (s->cc_op - CC_OP_ADDB) & 3; - return gen_prepare_sign_nz(cpu_cc_dst, size); - } + return gen_prepare_sign_nz(cpu_cc_dst, cc_op_size(s->cc_op)); } } @@ -1007,7 +1004,7 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) .imm = CC_Z }; default: { - MemOp size = (s->cc_op - CC_OP_ADDB) & 3; + MemOp size = cc_op_size(s->cc_op); return gen_prepare_val_nz(cpu_cc_dst, size, true); } } @@ -1028,7 +1025,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) switch (s->cc_op) { case CC_OP_SUBB ... CC_OP_SUBQ: /* We optimize relational operators for the cmp/jcc case. */ - size = s->cc_op - CC_OP_SUBB; + size = cc_op_size(s->cc_op); switch (jcc_op) { case JCC_BE: tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size); From 1f7f72bdc4faece6af875503a3abe992e87e776b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 1 Jul 2024 11:08:50 +0200 Subject: [PATCH 33/49] target/i386: Wrap cc_op_live with a validity check Assert that op is known and that cc_op_live_ is populated. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 1 - target/i386/tcg/decode-new.c.inc | 2 +- target/i386/tcg/emit.c.inc | 4 ++-- target/i386/tcg/translate.c | 21 ++++++++++++++++++--- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 1bf4dfdc5b..a0a122cb5b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1380,7 +1380,6 @@ typedef enum { #define CC_OP_LAST_BWLQ CC_OP_POPCNTQ__ CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ - CC_OP_NB, } CCOp; /* See X86DecodedInsn.cc_op, using int8_t. */ diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 48bf730cd3..cda32ee678 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -2865,7 +2865,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic); } set_cc_op(s, decode.cc_op); - cc_live = cc_op_live[decode.cc_op]; + cc_live = cc_op_live(decode.cc_op); } else { cc_live = 0; } diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 45ac5edb1a..785ff63f2a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -3777,13 +3777,13 @@ static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCG decode->cc_op_dynamic = tcg_temp_new_i32(); assert(decode->cc_dst == s->T0); - if (cc_op_live[s->cc_op] & USES_CC_DST) { + if (cc_op_live(s->cc_op) & USES_CC_DST) { decode->cc_dst = tcg_temp_new(); tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0), cpu_cc_dst, s->T0); } - if (cc_op_live[s->cc_op] & USES_CC_SRC) { + if (cc_op_live(s->cc_op) & USES_CC_SRC) { tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src, count, tcg_constant_tl(0), cpu_cc_src, decode->cc_src); } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 46062002c0..1a9a2fe709 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -291,7 +291,7 @@ enum { }; /* Bit set if the global variable is live after setting CC_OP to X. */ -static const uint8_t cc_op_live[CC_OP_NB] = { +static const uint8_t cc_op_live_[] = { [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, [CC_OP_EFLAGS] = USES_CC_SRC, [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC, @@ -312,6 +312,21 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_POPCNT] = USES_CC_DST, }; +static uint8_t cc_op_live(CCOp op) +{ + uint8_t result; + assert(op >= 0 && op < ARRAY_SIZE(cc_op_live_)); + + /* + * Check that the array is fully populated. A zero entry would correspond + * to a fixed value of EFLAGS, which can be obtained with CC_OP_EFLAGS + * as well. + */ + result = cc_op_live_[op]; + assert(result); + return result; +} + static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty) { int dead; @@ -321,7 +336,7 @@ static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty) } /* Discard CC computation that will no longer be used. */ - dead = cc_op_live[s->cc_op] & ~cc_op_live[op]; + dead = cc_op_live(s->cc_op) & ~cc_op_live(op); if (dead & USES_CC_DST) { tcg_gen_discard_tl(cpu_cc_dst); } @@ -808,7 +823,7 @@ static void gen_mov_eflags(DisasContext *s, TCGv reg) src2 = cpu_cc_src2; /* Take care to not read values that are not live. */ - live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; + live = cc_op_live(s->cc_op) & ~USES_CC_SRCT; dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2); if (dead) { TCGv zero = tcg_constant_tl(0); From ae14b33de8d329d5497db5446bdc0b0cb6ba756b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Jun 2024 10:34:28 +0200 Subject: [PATCH 34/49] target/i386: optimize computation of ZF from CC_OP_DYNAMIC Most uses of CC_OP_DYNAMIC are for CMP/JB/JE or similar sequences. We can optimize many of them to avoid computation of the flags. This eliminates both TCG ops to set up the new cc_op, and helper instructions because evaluating just ZF is much cheaper. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/helper.h | 1 + target/i386/tcg/cc_helper.c | 13 +++++++++++++ target/i386/tcg/translate.c | 10 +++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index eeb8df56ea..3f67098f11 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -1,5 +1,6 @@ DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) +DEF_HELPER_FLAGS_3(cc_compute_nz, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) DEF_HELPER_3(write_eflags, void, env, tl, i32) DEF_HELPER_1(read_eflags, tl, env) diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index 40583c04cf..1b83775a91 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -95,6 +95,19 @@ static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1, return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O); } +target_ulong helper_cc_compute_nz(target_ulong dst, target_ulong src1, + int op) +{ + if (CC_OP_HAS_EFLAGS(op)) { + return ~src1 & CC_Z; + } else { + MemOp size = cc_op_size(op); + target_ulong mask = MAKE_64BIT_MASK(0, 8 << size); + + return dst & mask; + } +} + target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, target_ulong src2, int op) { diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 1a9a2fe709..5e326ab1af 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1008,15 +1008,19 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) { switch (s->cc_op) { - case CC_OP_DYNAMIC: - gen_compute_eflags(s); - /* FALLTHRU */ case CC_OP_EFLAGS: case CC_OP_ADCX: case CC_OP_ADOX: case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, .imm = CC_Z }; + case CC_OP_DYNAMIC: + gen_update_cc_op(s); + if (!reg) { + reg = tcg_temp_new(); + } + gen_helper_cc_compute_nz(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = reg, .imm = 0 }; default: { MemOp size = cc_op_size(s->cc_op); From 37df7c4d577124e01f087f598842b253aa2c9eca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Jun 2024 11:31:33 +0200 Subject: [PATCH 35/49] target/i386: optimize TEST+Jxx sequences Mostly used for TEST+JG and TEST+JLE, but it is easy to cover also JBE/JA and JL/JGE; shaves about 0.5% TCG ops. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 5e326ab1af..d3bbcf7317 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1069,6 +1069,28 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) } break; + case CC_OP_LOGICB ... CC_OP_LOGICQ: + /* Mostly used for test+jump */ + size = s->cc_op - CC_OP_LOGICB; + switch (jcc_op) { + case JCC_BE: + /* CF = 0, becomes jz/je */ + jcc_op = JCC_Z; + goto slow_jcc; + case JCC_L: + /* OF = 0, becomes js/jns */ + jcc_op = JCC_S; + goto slow_jcc; + case JCC_LE: + /* SF or ZF, becomes signed <= 0 */ + tcg_gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size | MO_SIGN); + cc = (CCPrepare) { .cond = TCG_COND_LE, .reg = cpu_cc_dst }; + break; + default: + goto slow_jcc; + } + break; + default: slow_jcc: /* This actually generates good code for JC, JZ and JS. */ From cea677e821c5d4efad5e25e504e935751fa61c95 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 1 Jul 2024 21:11:07 +0200 Subject: [PATCH 36/49] target/i386: add a few more trivial CCPrepare cases Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index d3bbcf7317..6e89d4faef 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -993,6 +993,7 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, .no_setcond = true }; + case CC_OP_LOGICB ... CC_OP_LOGICQ: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER }; case CC_OP_MULB ... CC_OP_MULQ: @@ -1021,6 +1022,8 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) } gen_helper_cc_compute_nz(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op); return (CCPrepare) { .cond = TCG_COND_EQ, .reg = reg, .imm = 0 }; + case CC_OP_POPCNT: + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst }; default: { MemOp size = cc_op_size(s->cc_op); From 44d58e938b649a3d73af9b12aba491ebc39e5f7c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Aug 2024 13:44:47 +0200 Subject: [PATCH 37/49] target/i386: add a note about gen_jcc1 Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 6e89d4faef..5d729e68c9 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1193,6 +1193,10 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) { CCPrepare cc = gen_prepare_cc(s, b, NULL); + /* + * Note that this must be _after_ gen_prepare_cc, because it + * can change the cc_op from CC_OP_DYNAMIC to CC_OP_EFLAGS! + */ gen_update_cc_op(s); if (cc.use_reg2) { tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); From 46c04e4bcfc0e3afc24c40f89a16475e564f0b10 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 11:41:50 +0200 Subject: [PATCH 38/49] target/i386: make flag variables unsigned This makes it easier for the compiler to understand which bits are set, and it also removes "cltq" instructions to canonicalize the output value as 32-bit signed. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/cc_helper_template.h.inc | 46 ++++++++++++------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc index c5425e57cf..4cbbc73c3c 100644 --- a/target/i386/tcg/cc_helper_template.h.inc +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -39,9 +39,9 @@ /* dynamic flags computation */ -static int glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src2 = dst - src1; cf = dst < src1; @@ -58,10 +58,10 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return dst < src1; } -static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, +static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, DATA_TYPE src3) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src2 = dst - src1 - src3; cf = (src3 ? dst <= src1 : dst < src1); @@ -79,9 +79,9 @@ static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, return src3 ? dst <= src1 : dst < src1; } -static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) +static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src1 = dst + src2; cf = src1 < src2; @@ -100,10 +100,10 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) return src1 < src2; } -static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, +static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, DATA_TYPE src3) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src1 = dst + src2 + src3; cf = (src3 ? src1 <= src2 : src1 < src2); @@ -123,9 +123,9 @@ static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, return (src3 ? src1 <= src2 : src1 < src2); } -static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; cf = 0; pf = parity_table[(uint8_t)dst]; @@ -136,9 +136,9 @@ static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return cf | pf | af | zf | sf | of; } -static int glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src2; cf = src1; @@ -152,9 +152,9 @@ static int glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return cf | pf | af | zf | sf | of; } -static int glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; DATA_TYPE src2; cf = src1; @@ -168,9 +168,9 @@ static int glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return cf | pf | af | zf | sf | of; } -static int glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; cf = (src1 >> (DATA_BITS - 1)) & CC_C; pf = parity_table[(uint8_t)dst]; @@ -187,9 +187,9 @@ static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return (src1 >> (DATA_BITS - 1)) & CC_C; } -static int glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; cf = src1 & 1; pf = parity_table[(uint8_t)dst]; @@ -204,9 +204,9 @@ static int glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) /* NOTE: we compute the flags like the P4. On olders CPUs, only OF and CF are modified and it is slower to do that. Note as well that we don't truncate SRC1 for computing carry to DATA_TYPE. */ -static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) +static uint32_t glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; cf = (src1 != 0); pf = parity_table[(uint8_t)dst]; @@ -217,9 +217,9 @@ static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) return cf | pf | af | zf | sf | of; } -static int glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static uint32_t glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; cf = (src1 == 0); pf = 0; /* undefined */ @@ -237,7 +237,7 @@ static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) static int glue(compute_all_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf, pf, af, zf, sf, of; + uint32_t cf, pf, af, zf, sf, of; cf = (src1 != 0); pf = 0; /* undefined */ From 24899cdcd238f885e9b7c0c708f624aa29090c21 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 10:52:42 +0200 Subject: [PATCH 39/49] target/i386: use compiler builtin to compute PF This removes the 256 byte parity table from the executable. Suggested-by: Richard Henderson Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- include/qemu/host-utils.h | 9 ++++++ target/i386/tcg/cc_helper.c | 35 ------------------------ target/i386/tcg/cc_helper_template.h.inc | 20 +++++++------- target/i386/tcg/helper-tcg.h | 6 +++- target/i386/tcg/int_helper.c | 4 +-- 5 files changed, 26 insertions(+), 48 deletions(-) diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index ead97d354d..4d28fa22cf 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -313,6 +313,15 @@ static inline int ctpop8(uint8_t val) return __builtin_popcount(val); } +/* + * parity8 - return the parity (1 = odd) of an 8-bit value. + * @val: The value to search + */ +static inline int parity8(uint8_t val) +{ + return __builtin_parity(val); +} + /** * ctpop16 - count the population of one bits in a 16-bit value. * @val: The value to search diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index 1b83775a91..f1940b4092 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -22,41 +22,6 @@ #include "exec/helper-proto.h" #include "helper-tcg.h" -const uint8_t parity_table[256] = { - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, -}; - #define SHIFT 0 #include "cc_helper_template.h.inc" #undef SHIFT diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc index 4cbbc73c3c..8af8b8539f 100644 --- a/target/i386/tcg/cc_helper_template.h.inc +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -45,7 +45,7 @@ static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) DATA_TYPE src2 = dst - src1; cf = dst < src1; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & CC_A; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -65,7 +65,7 @@ static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, DATA_TYPE src2 = dst - src1 - src3; cf = (src3 ? dst <= src1 : dst < src1); - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & 0x10; zf = (dst == 0) << 6; sf = lshift(dst, 8 - DATA_BITS) & 0x80; @@ -85,7 +85,7 @@ static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) DATA_TYPE src1 = dst + src2; cf = src1 < src2; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & CC_A; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -107,7 +107,7 @@ static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, DATA_TYPE src1 = dst + src2 + src3; cf = (src3 ? src1 <= src2 : src1 < src2); - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & 0x10; zf = (dst == 0) << 6; sf = lshift(dst, 8 - DATA_BITS) & 0x80; @@ -128,7 +128,7 @@ static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) uint32_t cf, pf, af, zf, sf, of; cf = 0; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = 0; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -144,7 +144,7 @@ static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) cf = src1; src1 = dst - 1; src2 = 1; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & CC_A; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -160,7 +160,7 @@ static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) cf = src1; src1 = dst + 1; src2 = 1; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & CC_A; zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -173,7 +173,7 @@ static uint32_t glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) uint32_t cf, pf, af, zf, sf, of; cf = (src1 >> (DATA_BITS - 1)) & CC_C; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = 0; /* undefined */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -192,7 +192,7 @@ static uint32_t glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) uint32_t cf, pf, af, zf, sf, of; cf = src1 & 1; - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = 0; /* undefined */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; @@ -209,7 +209,7 @@ static uint32_t glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) uint32_t cf, pf, af, zf, sf, of; cf = (src1 != 0); - pf = parity_table[(uint8_t)dst]; + pf = compute_pf(dst); af = 0; /* undefined */ zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 15d6c6f8b4..696d6ef016 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -21,6 +21,7 @@ #define I386_HELPER_TCG_H #include "exec/exec-all.h" +#include "qemu/host-utils.h" /* Maximum instruction code size */ #define TARGET_MAX_INSN_SIZE 16 @@ -87,7 +88,10 @@ G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, #endif /* cc_helper.c */ -extern const uint8_t parity_table[256]; +static inline unsigned int compute_pf(uint8_t x) +{ + return !parity8(x) * CC_P; +} /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index e1f9240528..1a02e9d843 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -237,7 +237,7 @@ void helper_daa(CPUX86State *env) env->regs[R_EAX] = (env->regs[R_EAX] & ~0xff) | al; /* well, speed is not an issue here, so we compute the flags by hand */ eflags |= (al == 0) << 6; /* zf */ - eflags |= parity_table[al]; /* pf */ + eflags |= compute_pf(al); eflags |= (al & 0x80); /* sf */ CC_SRC = eflags; CC_OP = CC_OP_EFLAGS; @@ -269,7 +269,7 @@ void helper_das(CPUX86State *env) env->regs[R_EAX] = (env->regs[R_EAX] & ~0xff) | al; /* well, speed is not an issue here, so we compute the flags by hand */ eflags |= (al == 0) << 6; /* zf */ - eflags |= parity_table[al]; /* pf */ + eflags |= compute_pf(al); eflags |= (al & 0x80); /* sf */ CC_SRC = eflags; CC_OP = CC_OP_EFLAGS; From 134ffcb276be569fcb7531b2266b0df0d2a1db81 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 11:00:33 +0200 Subject: [PATCH 40/49] target/i386: use higher-precision arithmetic to compute CF If the operands of the arithmetic instruction fit within a half-register, it's easiest to use a comparison instruction to compute the carry. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/cc_helper_template.h.inc | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc index 8af8b8539f..628d942a16 100644 --- a/target/i386/tcg/cc_helper_template.h.inc +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -22,12 +22,17 @@ #if DATA_BITS == 8 #define SUFFIX b #define DATA_TYPE uint8_t +#define WIDER_TYPE uint32_t #elif DATA_BITS == 16 #define SUFFIX w #define DATA_TYPE uint16_t +#define WIDER_TYPE uint32_t #elif DATA_BITS == 32 #define SUFFIX l #define DATA_TYPE uint32_t +#if HOST_LONG_BITS >= 64 +#define WIDER_TYPE uint64_t +#endif #elif DATA_BITS == 64 #define SUFFIX q #define DATA_TYPE uint64_t @@ -62,9 +67,18 @@ static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, DATA_TYPE src3) { uint32_t cf, pf, af, zf, sf, of; + +#ifdef WIDER_TYPE + WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3; + DATA_TYPE src2 = dst - src13; + + cf = dst < src13; +#else DATA_TYPE src2 = dst - src1 - src3; cf = (src3 ? dst <= src1 : dst < src1); +#endif + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & 0x10; zf = (dst == 0) << 6; @@ -76,7 +90,13 @@ static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, DATA_TYPE src3) { +#ifdef WIDER_TYPE + WIDER_TYPE src13 = (WIDER_TYPE) src1 + (WIDER_TYPE) src3; + + return dst < src13; +#else return src3 ? dst <= src1 : dst < src1; +#endif } static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) @@ -104,9 +124,18 @@ static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, DATA_TYPE src3) { uint32_t cf, pf, af, zf, sf, of; + +#ifdef WIDER_TYPE + WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3; + DATA_TYPE src1 = dst + src23; + + cf = src1 < src23; +#else DATA_TYPE src1 = dst + src2 + src3; cf = (src3 ? src1 <= src2 : src1 < src2); +#endif + pf = compute_pf(dst); af = (dst ^ src1 ^ src2) & 0x10; zf = (dst == 0) << 6; @@ -118,9 +147,16 @@ static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, DATA_TYPE src3) { +#ifdef WIDER_TYPE + WIDER_TYPE src23 = (WIDER_TYPE) src2 + (WIDER_TYPE) src3; + DATA_TYPE src1 = dst + src23; + + return src1 < src23; +#else DATA_TYPE src1 = dst + src2 + src3; return (src3 ? src1 <= src2 : src1 < src2); +#endif } static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -258,3 +294,4 @@ static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) #undef DATA_TYPE #undef DATA_MASK #undef SUFFIX +#undef WIDER_TYPE From 6d8623b5c0854a422c2aa57fff90d39275886a47 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 11:32:19 +0200 Subject: [PATCH 41/49] target/i386: use + to put flags together This gives greater opportunity for reassociation on x86 targets, since addition can use the LEA instruction. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/tcg/cc_helper_template.h.inc | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/target/i386/tcg/cc_helper_template.h.inc b/target/i386/tcg/cc_helper_template.h.inc index 628d942a16..9aff16b880 100644 --- a/target/i386/tcg/cc_helper_template.h.inc +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -55,7 +55,7 @@ static uint32_t glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -84,7 +84,7 @@ static uint32_t glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, zf = (dst == 0) << 6; sf = lshift(dst, 8 - DATA_BITS) & 0x80; of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, @@ -110,7 +110,7 @@ static uint32_t glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) @@ -141,7 +141,7 @@ static uint32_t glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, zf = (dst == 0) << 6; sf = lshift(dst, 8 - DATA_BITS) & 0x80; of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, @@ -169,7 +169,7 @@ static uint32_t glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = 0; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -185,7 +185,7 @@ static uint32_t glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = (dst == SIGN_MASK) * CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -201,7 +201,7 @@ static uint32_t glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = (dst == SIGN_MASK - 1) * CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static uint32_t glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -215,7 +215,7 @@ static uint32_t glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) sf = lshift(dst, 8 - DATA_BITS) & CC_S; /* of is defined iff shift count == 1 */ of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -234,7 +234,7 @@ static uint32_t glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) sf = lshift(dst, 8 - DATA_BITS) & CC_S; /* of is defined iff shift count == 1 */ of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } /* NOTE: we compute the flags like the P4. On olders CPUs, only OF and @@ -250,7 +250,7 @@ static uint32_t glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = cf * CC_O; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static uint32_t glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -263,7 +263,7 @@ static uint32_t glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = 0; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) @@ -281,7 +281,7 @@ static int glue(compute_all_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) zf = (dst == 0) * CC_Z; sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = 0; - return cf | pf | af | zf | sf | of; + return cf + pf + af + zf + sf + of; } static int glue(compute_c_blsi, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) From 33098002a838a0450f243f5e17463aca700e923d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Oct 2024 16:52:26 +0800 Subject: [PATCH 42/49] target/i386: cpu: set correct supported XCR0 features for TCG Signed-off-by: Paolo Bonzini Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20241031085233.425388-2-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 5886b44fcf..f08e9b8f1b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1318,7 +1318,9 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .needs_ecx = true, .ecx = 0, .reg = R_EAX, }, - .tcg_features = ~0U, + .tcg_features = XSTATE_FP_MASK | XSTATE_SSE_MASK | + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | + XSTATE_PKRU_MASK, .migratable_flags = XSTATE_FP_MASK | XSTATE_SSE_MASK | XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK | @@ -1331,7 +1333,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .needs_ecx = true, .ecx = 0, .reg = R_EDX, }, - .tcg_features = ~0U, + .tcg_features = 0U, }, /*Below are MSR exposed features*/ [FEAT_ARCH_CAPABILITIES] = { From b888c7807049cc044d10d70139cb945202fb7cd2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Oct 2024 16:52:27 +0800 Subject: [PATCH 43/49] target/i386: do not rely on ExtSaveArea for accelerator-supported XCR0 bits Right now, QEMU is using the "feature" and "bits" fields of ExtSaveArea to query the accelerator for the support status of extended save areas. This is a problem for AVX10, which attaches two feature bits (AVX512F and AVX10) to the same extended save states. To keep the AVX10 hacks to the minimum, limit usage of esa->features and esa->bits. Instead, just query the accelerator for the 0xD leaf. Do it in common code and clear esa->size if an extended save state is unsupported. Signed-off-by: Paolo Bonzini Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20241031085233.425388-3-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 33 +++++++++++++++++++++++++++++++-- target/i386/kvm/kvm-cpu.c | 4 ---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index f08e9b8f1b..1ee4d988ca 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7102,6 +7102,15 @@ static void x86_cpu_set_sgxlepubkeyhash(CPUX86State *env) #endif } +static bool cpuid_has_xsave_feature(CPUX86State *env, const ExtSaveArea *esa) +{ + if (!esa->size) { + return false; + } + + return (env->features[esa->feature] & esa->bits); +} + static void x86_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -7210,7 +7219,7 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) if (!((1 << i) & CPUID_XSTATE_XCR0_MASK)) { continue; } - if (env->features[esa->feature] & esa->bits) { + if (cpuid_has_xsave_feature(env, esa)) { xcr0 |= 1ull << i; } } @@ -7348,7 +7357,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) mask = 0; for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) { const ExtSaveArea *esa = &x86_ext_save_areas[i]; - if (env->features[esa->feature] & esa->bits) { + if (cpuid_has_xsave_feature(env, esa)) { mask |= (1ULL << i); } } @@ -8020,6 +8029,26 @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc, static void x86_cpu_post_initfn(Object *obj) { + static bool first = true; + uint64_t supported_xcr0; + int i; + + if (first) { + first = false; + + supported_xcr0 = + ((uint64_t) x86_cpu_get_supported_feature_word(NULL, FEAT_XSAVE_XCR0_HI) << 32) | + x86_cpu_get_supported_feature_word(NULL, FEAT_XSAVE_XCR0_LO); + + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (!(supported_xcr0 & (1 << i))) { + esa->size = 0; + } + } + } + accel_cpu_instance_init(CPU(obj)); } diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 6bf8dcfc60..99d1941cf5 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -143,10 +143,6 @@ static void kvm_cpu_xsave_init(void) if (!esa->size) { continue; } - if ((x86_cpu_get_supported_feature_word(NULL, esa->feature) & esa->bits) - != esa->bits) { - continue; - } host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); if (eax != 0) { assert(esa->size == eax); From 3507c6f04606593711408a6d26141bdbceff9377 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Oct 2024 16:52:28 +0800 Subject: [PATCH 44/49] target/i386: return bool from x86_cpu_filter_features Prepare for filtering non-boolean features such as AVX10 version. Signed-off-by: Paolo Bonzini Reviewed-by: Zhao Liu Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20241031085233.425388-4-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1ee4d988ca..92fefdaa42 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5840,7 +5840,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } } -static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); +static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose); /* Build a list with the name of all features on a feature word array */ static void x86_cpu_list_feature_names(FeatureWordArray features, @@ -7556,9 +7556,9 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * Finishes initialization of CPUID data, filters CPU feature * words based on host availability of each feature. * - * Returns: 0 if all flags are supported by the host, non-zero otherwise. + * Returns: true if any flag is not supported by the host, false otherwise. */ -static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) +static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) { CPUX86State *env = &cpu->env; FeatureWord w; @@ -7610,6 +7610,8 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); } } + + return x86_cpu_have_filtered_features(cpu); } static void x86_cpu_hyperv_realize(X86CPU *cpu) @@ -7707,14 +7709,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) } } - x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); - - if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { - error_setg(&local_err, - accel_uses_host_cpuid() ? + if (x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid)) { + if (cpu->enforce_cpuid) { + error_setg(&local_err, + accel_uses_host_cpuid() ? "Host doesn't support requested features" : "TCG doesn't support requested features"); - goto out; + goto out; + } } /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on From bccfb846fd52d6f20704ecfa4d01b60b43c6f640 Mon Sep 17 00:00:00 2001 From: Tao Su Date: Thu, 31 Oct 2024 16:52:29 +0800 Subject: [PATCH 45/49] target/i386: add AVX10 feature and AVX10 version property When AVX10 enable bit is set, the 0x24 leaf will be present as "AVX10 Converged Vector ISA leaf" containing fields for the version number and the supported vector bit lengths. Introduce avx10-version property so that avx10 version can be controlled by user and cpu model. Per spec, avx10 version can never be 0, the default value of avx10-version is set to 0 to determine whether it is specified by user. The default can come from the device model or, for the max model, from KVM's reported value. Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20241028024512.156724-3-tao1.su@linux.intel.com Link: https://lore.kernel.org/r/20241028024512.156724-4-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini Tested-by: Xuelian Guo Link: https://lore.kernel.org/r/20241031085233.425388-5-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 64 ++++++++++++++++++++++++++++++++++++++----- target/i386/cpu.h | 4 +++ target/i386/kvm/kvm.c | 3 +- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 92fefdaa42..681c04e056 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -46,6 +46,9 @@ #include "cpu-internal.h" static void x86_cpu_realizefn(DeviceState *dev, Error **errp); +static void x86_cpu_get_supported_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx); /* Helpers for building CPUID[2] descriptors: */ @@ -1132,7 +1135,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx-vnni-int8", "avx-ne-convert", NULL, NULL, "amx-complex", NULL, "avx-vnni-int16", NULL, NULL, NULL, "prefetchiti", NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "avx10", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1989,6 +1992,7 @@ typedef struct X86CPUDefinition { int family; int model; int stepping; + uint8_t avx10_version; FeatureWordArray features; const char *model_id; const CPUCaches *const cache_info; @@ -6331,6 +6335,9 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) */ object_property_set_str(OBJECT(cpu), "vendor", def->vendor, &error_abort); + object_property_set_uint(OBJECT(cpu), "avx10-version", def->avx10_version, + &error_abort); + x86_cpu_apply_version_props(cpu, model); /* @@ -6859,6 +6866,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; } + case 0x24: { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if ((env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) && count == 0) { + *ebx = env->features[FEAT_24_0_EBX] | env->avx10_version; + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -7450,6 +7467,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) ~env->user_features[w] & ~feature_word_info[w].no_autoenable_flags; } + + if ((env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) && !env->avx10_version) { + uint32_t eax, ebx, ecx, edx; + x86_cpu_get_supported_cpuid(0x24, 0, &eax, &ebx, &ecx, &edx); + env->avx10_version = ebx & 0xff; + } } for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { @@ -7513,6 +7536,11 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } + /* Advanced Vector Extensions 10 (AVX10) requires CPUID[0x24] */ + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x24); + } + /* SVM requires CPUID[0x8000000A] */ if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A); @@ -7563,6 +7591,10 @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) CPUX86State *env = &cpu->env; FeatureWord w; const char *prefix = NULL; + bool have_filtered_features; + + uint32_t eax_0, ebx_0, ecx_0, edx_0; + uint32_t eax_1, ebx_1, ecx_1, edx_1; if (verbose) { prefix = accel_uses_host_cpuid() @@ -7584,13 +7616,10 @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) */ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && kvm_enabled()) { - uint32_t eax_0, ebx_0, ecx_0, edx_0_unused; - uint32_t eax_1, ebx_1, ecx_1_unused, edx_1_unused; - x86_cpu_get_supported_cpuid(0x14, 0, - &eax_0, &ebx_0, &ecx_0, &edx_0_unused); + &eax_0, &ebx_0, &ecx_0, &edx_0); x86_cpu_get_supported_cpuid(0x14, 1, - &eax_1, &ebx_1, &ecx_1_unused, &edx_1_unused); + &eax_1, &ebx_1, &ecx_1, &edx_1); if (!eax_0 || ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) || @@ -7611,7 +7640,27 @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose) } } - return x86_cpu_have_filtered_features(cpu); + have_filtered_features = x86_cpu_have_filtered_features(cpu); + + if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) { + x86_cpu_get_supported_cpuid(0x24, 0, + &eax_0, &ebx_0, &ecx_0, &edx_0); + uint8_t version = ebx_0 & 0xff; + + if (version < env->avx10_version) { + if (prefix) { + warn_report("%s: avx10.%d. Adjust to avx10.%d", + prefix, env->avx10_version, version); + } + env->avx10_version = version; + have_filtered_features = true; + } + } else if (env->avx10_version && prefix) { + warn_report("%s: avx10.%d.", prefix, env->avx10_version); + have_filtered_features = true; + } + + return have_filtered_features; } static void x86_cpu_hyperv_realize(X86CPU *cpu) @@ -8395,6 +8444,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), + DEFINE_PROP_UINT8("avx10-version", X86CPU, env.avx10_version, 0), DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index a0a122cb5b..72e98b2511 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -975,6 +975,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) /* PREFETCHIT0/1 Instructions */ #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +/* Support for Advanced Vector Extensions 10 */ +#define CPUID_7_1_EDX_AVX10 (1U << 19) /* Flexible return and event delivery (FRED) */ #define CPUID_7_1_EAX_FRED (1U << 17) /* Load into IA32_KERNEL_GS_BASE (LKGS) */ @@ -1954,6 +1956,8 @@ typedef struct CPUArchState { uint32_t cpuid_vendor3; uint32_t cpuid_version; FeatureWordArray features; + /* AVX10 version */ + uint8_t avx10_version; /* Features that were explicitly enabled/disabled */ FeatureWordArray user_features; uint32_t cpuid_model[12]; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index fd9f198892..8e17942c3b 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1923,7 +1923,8 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, case 0x7: case 0x14: case 0x1d: - case 0x1e: { + case 0x1e: + case 0x24: { uint32_t times; c->function = i; From 2d055b8fe11ee567c2ae8047311fd83697e494b6 Mon Sep 17 00:00:00 2001 From: Tao Su Date: Thu, 31 Oct 2024 16:52:30 +0800 Subject: [PATCH 46/49] target/i386: add CPUID.24 features for AVX10 Introduce features for the supported vector bit lengths. Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20241028024512.156724-3-tao1.su@linux.intel.com Link: https://lore.kernel.org/r/20241028024512.156724-4-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini Reviewed-by: Zhao Liu Tested-by: Xuelian Guo Link: https://lore.kernel.org/r/20241031085233.425388-6-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 15 +++++++++++++++ target/i386/cpu.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 681c04e056..3731155c2d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -901,6 +901,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_SGX_12_0_EAX_FEATURES 0 #define TCG_SGX_12_0_EBX_FEATURES 0 #define TCG_SGX_12_1_EAX_FEATURES 0 +#define TCG_24_0_EBX_FEATURES 0 #if defined CONFIG_USER_ONLY #define CPUID_8000_0008_EBX_KERNEL_FEATURES (CPUID_8000_0008_EBX_IBPB | \ @@ -1166,6 +1167,20 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_2_EDX_FEATURES, }, + [FEAT_24_0_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + [16] = "avx10-128", + [17] = "avx10-256", + [18] = "avx10-512", + }, + .cpuid = { + .eax = 0x24, + .needs_ecx = true, .ecx = 0, + .reg = R_EBX, + }, + .tcg_features = TCG_24_0_EBX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 72e98b2511..f8f97fe933 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -665,6 +665,7 @@ typedef enum FeatureWord { FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ + FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; @@ -993,6 +994,13 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) +/* AVX10 128-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_128 (1U << 16) +/* AVX10 256-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_256 (1U << 17) +/* AVX10 512-bit vector support is present */ +#define CPUID_24_0_EBX_AVX10_512 (1U << 18) + /* RAS Features */ #define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) #define CPUID_8000_0007_EBX_SUCCOR (1U << 1) From 150ab84b2d0083e6af344cca70290614d4fe568d Mon Sep 17 00:00:00 2001 From: Tao Su Date: Thu, 31 Oct 2024 16:52:31 +0800 Subject: [PATCH 47/49] target/i386: Add feature dependencies for AVX10 Since the highest supported vector length for a processor implies that all lesser vector lengths are also supported, add the dependencies of the supported vector lengths. If all vector lengths aren't supported, clear AVX10 enable bit as well. Note that the order of AVX10 related dependencies should be kept as: CPUID_24_0_EBX_AVX10_128 -> CPUID_24_0_EBX_AVX10_256, CPUID_24_0_EBX_AVX10_256 -> CPUID_24_0_EBX_AVX10_512, CPUID_24_0_EBX_AVX10_VL_MASK -> CPUID_7_1_EDX_AVX10, CPUID_7_1_EDX_AVX10 -> CPUID_24_0_EBX, so that prevent user from setting weird CPUID combinations, e.g. 256-bits and 512-bits are supported but 128-bits is not, no vector lengths are supported but AVX10 enable bit is still set. Since AVX10_128 will be reserved as 1, adding these dependencies has the bonus that when user sets -cpu host,-avx10-128, CPUID_7_1_EDX_AVX10 and CPUID_24_0_EBX will be disabled automatically. Tested-by: Xuelian Guo Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20241028024512.156724-5-tao1.su@linux.intel.com Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini Link: https://lore.kernel.org/r/20241031085233.425388-7-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 16 ++++++++++++++++ target/i386/cpu.h | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3731155c2d..d056285a03 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1787,6 +1787,22 @@ static FeatureDep feature_dependencies[] = { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_SGX_12_1_EAX, ~0ull }, }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_128 }, + .to = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_256 }, + }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_256 }, + .to = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_512 }, + }, + { + .from = { FEAT_24_0_EBX, CPUID_24_0_EBX_AVX10_VL_MASK }, + .to = { FEAT_7_1_EDX, CPUID_7_1_EDX_AVX10 }, + }, + { + .from = { FEAT_7_1_EDX, CPUID_7_1_EDX_AVX10 }, + .to = { FEAT_24_0_EBX, ~0ull }, + }, }; typedef struct X86RegisterInfo32 { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index f8f97fe933..59959b8b7a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1000,6 +1000,10 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_24_0_EBX_AVX10_256 (1U << 17) /* AVX10 512-bit vector support is present */ #define CPUID_24_0_EBX_AVX10_512 (1U << 18) +/* AVX10 vector length support mask */ +#define CPUID_24_0_EBX_AVX10_VL_MASK (CPUID_24_0_EBX_AVX10_128 | \ + CPUID_24_0_EBX_AVX10_256 | \ + CPUID_24_0_EBX_AVX10_512) /* RAS Features */ #define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) From 0d7475be3b402c25d74c5a4573cbeb733c8f3559 Mon Sep 17 00:00:00 2001 From: Tao Su Date: Thu, 31 Oct 2024 16:52:32 +0800 Subject: [PATCH 48/49] target/i386: Add AVX512 state when AVX10 is supported AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register are identical to AVX512 state regardless of the supported vector lengths. Given that some E-cores will support AVX10 but not support AVX512, add AVX512 state components to guest when AVX10 is enabled. Based on a patch by Tao Su Signed-off-by: Paolo Bonzini Reviewed-by: Zhao Liu Tested-by: Xuelian Guo Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20241031085233.425388-8-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index d056285a03..7666a50bf0 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7156,7 +7156,15 @@ static bool cpuid_has_xsave_feature(CPUX86State *env, const ExtSaveArea *esa) return false; } - return (env->features[esa->feature] & esa->bits); + if (env->features[esa->feature] & esa->bits) { + return true; + } + if (esa->feature == FEAT_7_0_EBX && esa->bits == CPUID_7_0_EBX_AVX512F + && (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10)) { + return true; + } + + return false; } static void x86_cpu_reset_hold(Object *obj, ResetType type) From 1a519388a882fbb352e49cbebb0ed8f62d05842d Mon Sep 17 00:00:00 2001 From: Tao Su Date: Thu, 31 Oct 2024 16:52:33 +0800 Subject: [PATCH 49/49] target/i386: Introduce GraniteRapids-v2 model Update GraniteRapids CPU model to add AVX10 and the missing features(ss, tsc-adjust, cldemote, movdiri, movdir64b). Tested-by: Xuelian Guo Signed-off-by: Tao Su Link: https://lore.kernel.org/r/20241028024512.156724-7-tao1.su@linux.intel.com Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini Link: https://lore.kernel.org/r/20241031085233.425388-9-tao1.su@linux.intel.com Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 7666a50bf0..3baa95481f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -4403,6 +4403,23 @@ static const X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel Xeon Processor (GraniteRapids)", .versions = (X86CPUVersionDefinition[]) { { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "ss", "on" }, + { "tsc-adjust", "on" }, + { "cldemote", "on" }, + { "movdiri", "on" }, + { "movdir64b", "on" }, + { "avx10", "on" }, + { "avx10-128", "on" }, + { "avx10-256", "on" }, + { "avx10-512", "on" }, + { "avx10-version", "1" }, + { "stepping", "1" }, + { /* end of list */ } + } + }, { /* end of list */ }, }, },