From 1fd286385c31e42a60db0a298c01e1c8ec290e3e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 23 May 2019 15:09:49 +0200
Subject: [PATCH 01/33] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL

Complicated stuff. Provide two different helpers for CC an !CC handling.
We might want to add more helpers later.

zero_search() and match_index() are courtesy of Richard H.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/Makefile.objs       |   2 +-
 target/s390x/helper.h            |   8 ++
 target/s390x/insn-data.def       |   5 +
 target/s390x/translate_vx.inc.c  |  30 ++++++
 target/s390x/vec_string_helper.c | 154 +++++++++++++++++++++++++++++++
 5 files changed, 198 insertions(+), 1 deletion(-)
 create mode 100644 target/s390x/vec_string_helper.c

diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs
index 0316457880..ffdd484ef0 100644
--- a/target/s390x/Makefile.objs
+++ b/target/s390x/Makefile.objs
@@ -1,7 +1,7 @@
 obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
 obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
-obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o
+obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o
 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
 obj-$(CONFIG_SOFTMMU) += sigp.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7755a96c33..c45328cf73 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -211,6 +211,14 @@ DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
 
+/* === Vector String Instructions === */
+DEF_HELPER_FLAGS_4(gvec_vfae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32)
+
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
 DEF_HELPER_4(diag, void, env, i32, i32, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e61475bdc4..070ce2a471 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1191,6 +1191,11 @@
 /* VECTOR TEST UNDER MASK */
     F(0xe7d8, VTM,     VRR_a, V,   0, 0, 0, 0, vtm, 0, IF_VEC)
 
+/* === Vector String Instructions === */
+
+/* VECTOR FIND ANY ELEMENT EQUAL */
+    F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
+
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
     E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 7e0bfcb190..ebd7a877f1 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2353,3 +2353,33 @@ static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
     set_cc_static(s);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfae8,
+        gen_helper_gvec_vfae16,
+        gen_helper_gvec_vfae32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfae_cc8,
+        gen_helper_gvec_vfae_cc16,
+        gen_helper_gvec_vfae_cc32,
+    };
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
new file mode 100644
index 0000000000..56dc89c824
--- /dev/null
+++ b/target/s390x/vec_string_helper.c
@@ -0,0 +1,154 @@
+/*
+ * QEMU TCG support -- s390x vector string instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+
+/*
+ * Returns a bit set in the MSB of each element that is zero,
+ * as defined by the mask.
+ */
+static inline uint64_t zero_search(uint64_t a, uint64_t mask)
+{
+    return ~(((a & mask) + mask) | a | mask);
+}
+
+/*
+ * Returns the byte offset for the first match, or 16 for no match.
+ */
+static inline int match_index(uint64_t c0, uint64_t c1)
+{
+    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
+}
+
+/*
+ * Returns the number of bits composing one element.
+ */
+static uint8_t get_element_bits(uint8_t es)
+{
+    return (1 << es) * BITS_PER_BYTE;
+}
+
+/*
+ * Returns the bitmask for a single element.
+ */
+static uint64_t get_single_element_mask(uint8_t es)
+{
+    return -1ull >> (64 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmask for a single element (excluding the MSB).
+ */
+static uint64_t get_single_element_lsbs_mask(uint8_t es)
+{
+    return -1ull >> (65 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmasks for multiple elements (excluding the MSBs).
+ */
+static uint64_t get_element_lsbs_mask(uint8_t es)
+{
+    return dup_const(es, get_single_element_lsbs_mask(es));
+}
+
+static int vfae(void *v1, const void *v2, const void *v3, bool in,
+                bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    const int bits = get_element_bits(es);
+    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+    int i;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = 0;
+    e1 = 0;
+    /* compare against equality with every other element */
+    for (i = 0; i < 64; i += bits) {
+        t0 = rol64(b0, i);
+        t1 = rol64(b1, i);
+        e0 |= zero_search(a0 ^ t0, mask);
+        e0 |= zero_search(a0 ^ t1, mask);
+        e1 |= zero_search(a1 ^ t0, mask);
+        e1 |= zero_search(a1 ^ t1, mask);
+    }
+    /* invert the result if requested - invert only the MSBs */
+    if (in) {
+        e0 = ~e0 & ~mask;
+        e1 = ~e1 & ~mask;
+    }
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    if (rt) {
+        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
+        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
+        s390_vec_write_element64(v1, 0, e0);
+        s390_vec_write_element64(v1, 1, e1);
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFAE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
+}
+DEF_VFAE_HELPER(8)
+DEF_VFAE_HELPER(16)
+DEF_VFAE_HELPER(32)
+
+#define DEF_VFAE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
+}
+DEF_VFAE_CC_HELPER(8)
+DEF_VFAE_CC_HELPER(16)
+DEF_VFAE_CC_HELPER(32)

From 8c0e1e58ce45ab1317bed817a9821b0286f926a2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 24 May 2019 11:25:58 +0200
Subject: [PATCH 02/33] s390x/tcg: Implement VECTOR FIND ELEMENT EQUAL

Core logic courtesy of Richard H.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  6 ++++
 target/s390x/insn-data.def       |  2 ++
 target/s390x/translate_vx.inc.c  | 31 +++++++++++++++++
 target/s390x/vec_string_helper.c | 57 ++++++++++++++++++++++++++++++++
 4 files changed, 96 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index c45328cf73..a1b169b666 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -218,6 +218,12 @@ DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 070ce2a471..d8907ef6a5 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1195,6 +1195,8 @@
 
 /* VECTOR FIND ANY ELEMENT EQUAL */
     F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
+/* VECTOR FIND ELEMENT EQUAL */
+    F(0xe780, VFEE,    VRR_b, V,   0, 0, 0, 0, vfee, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index ebd7a877f1..b25afbc011 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2383,3 +2383,34 @@ static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfee8,
+        gen_helper_gvec_vfee16,
+        gen_helper_gvec_vfee32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfee_cc8,
+        gen_helper_gvec_vfee_cc16,
+        gen_helper_gvec_vfee_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 56dc89c824..05ad99e173 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -152,3 +152,60 @@ void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 DEF_VFAE_CC_HELPER(8)
 DEF_VFAE_CC_HELPER(16)
 DEF_VFAE_CC_HELPER(32)
+
+static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = zero_search(a0 ^ b0, mask);
+    e1 = zero_search(a1 ^ b1, mask);
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFEE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfee(v1, v2, v3, zs, MO_##BITS);                                           \
+}
+DEF_VFEE_HELPER(8)
+DEF_VFEE_HELPER(16)
+DEF_VFEE_HELPER(32)
+
+#define DEF_VFEE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
+}
+DEF_VFEE_CC_HELPER(8)
+DEF_VFEE_CC_HELPER(16)
+DEF_VFEE_CC_HELPER(32)

From 074e99b3b5552b297f76c820ea55c724209bb6d1 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 24 May 2019 11:26:45 +0200
Subject: [PATCH 03/33] s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL

Similar to VECTOR FIND ELEMENT EQUAL. Core logic courtesy of Richard H.

Add s390_vec_read_element() that can deal with element sizes.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  6 +++
 target/s390x/insn-data.def       |  2 +
 target/s390x/translate_vx.inc.c  | 31 +++++++++++++
 target/s390x/vec.h               | 19 ++++++++
 target/s390x/vec_string_helper.c | 74 ++++++++++++++++++++++++++++++++
 5 files changed, 132 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index a1b169b666..fb50b404db 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -224,6 +224,12 @@ DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d8907ef6a5..d03c1ee0b3 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1197,6 +1197,8 @@
     F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
 /* VECTOR FIND ELEMENT EQUAL */
     F(0xe780, VFEE,    VRR_b, V,   0, 0, 0, 0, vfee, 0, IF_VEC)
+/* VECTOR FIND ELEMENT NOT EQUAL */
+    F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index b25afbc011..1ad0b62517 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2414,3 +2414,34 @@ static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfene8,
+        gen_helper_gvec_vfene16,
+        gen_helper_gvec_vfene32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfene_cc8,
+        gen_helper_gvec_vfene_cc16,
+        gen_helper_gvec_vfene_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                       get_field(s->fields, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index 3313fb43ee..affc62874c 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -12,6 +12,8 @@
 #ifndef S390X_VEC_H
 #define S390X_VEC_H
 
+#include "tcg/tcg.h"
+
 typedef union S390Vector {
     uint64_t doubleword[2];
     uint32_t word[4];
@@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
     return v->doubleword[enr];
 }
 
+static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
+                                             uint8_t es)
+{
+    switch (es) {
+    case MO_8:
+        return s390_vec_read_element8(v, enr);
+    case MO_16:
+        return s390_vec_read_element16(v, enr);
+    case MO_32:
+        return s390_vec_read_element32(v, enr);
+    case MO_64:
+        return s390_vec_read_element64(v, enr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr,
                                            uint8_t data)
 {
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 05ad99e173..0ee3470112 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -27,6 +27,15 @@ static inline uint64_t zero_search(uint64_t a, uint64_t mask)
     return ~(((a & mask) + mask) | a | mask);
 }
 
+/*
+ * Returns a bit set in the MSB of each element that is not zero,
+ * as defined by the mask.
+ */
+static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
+{
+    return (((a & mask) + mask) | a) & ~mask;
+}
+
 /*
  * Returns the byte offset for the first match, or 16 for no match.
  */
@@ -209,3 +218,68 @@ void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 DEF_VFEE_CC_HELPER(8)
 DEF_VFEE_CC_HELPER(16)
 DEF_VFEE_CC_HELPER(32)
+
+static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_inequal;
+    bool smaller = false;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = nonzero_search(a0 ^ b0, mask);
+    e1 = nonzero_search(a1 ^ b1, mask);
+    first_inequal = match_index(e0, e1);
+
+    /* identify the smaller element */
+    if (first_inequal < 16) {
+        uint8_t enr = first_inequal / (1 << es);
+        uint32_t a = s390_vec_read_element(v2, enr, es);
+        uint32_t b = s390_vec_read_element(v3, enr, es);
+
+        smaller = a < b;
+    }
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_inequal == 16) {
+        return 3;
+    } else if (first_zero < first_inequal) {
+        return 0;
+    }
+    return smaller ? 1 : 2;
+}
+
+#define DEF_VFENE_HELPER(BITS)                                                 \
+void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
+}
+DEF_VFENE_HELPER(8)
+DEF_VFENE_HELPER(16)
+DEF_VFENE_HELPER(32)
+
+#define DEF_VFENE_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 CPUS390XState *env, uint32_t desc)            \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
+}
+DEF_VFENE_CC_HELPER(8)
+DEF_VFENE_CC_HELPER(16)
+DEF_VFENE_CC_HELPER(32)

From be6324c6b73478f181bba4920de2ef6af317482b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 24 May 2019 11:27:56 +0200
Subject: [PATCH 04/33] s390x/tcg: Implement VECTOR ISOLATE STRING

Logic mostly courtesy of Richard H.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  6 +++++
 target/s390x/insn-data.def       |  2 ++
 target/s390x/translate_vx.inc.c  | 34 ++++++++++++++++++++++++
 target/s390x/vec_string_helper.c | 45 ++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index fb50b404db..1f9f0b463b 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -230,6 +230,12 @@ DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d03c1ee0b3..b4a6b59608 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1199,6 +1199,8 @@
     F(0xe780, VFEE,    VRR_b, V,   0, 0, 0, 0, vfee, 0, IF_VEC)
 /* VECTOR FIND ELEMENT NOT EQUAL */
     F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
+/* VECTOR ISOLATE STRING */
+    F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 1ad0b62517..08a62eab52 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -188,6 +188,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
 #define gen_gvec_2s(v1, v2, c, gen) \
     tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                     16, 16, c, gen)
+#define gen_gvec_2_ool(v1, v2, data, fn) \
+    tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       16, 16, data, fn)
 #define gen_gvec_2i_ool(v1, v2, c, data, fn) \
     tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                         c, 16, 16, data, fn)
@@ -2445,3 +2448,34 @@ static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    static gen_helper_gvec_2 * const g[3] = {
+        gen_helper_gvec_vistr8,
+        gen_helper_gvec_vistr16,
+        gen_helper_gvec_vistr32,
+    };
+    static gen_helper_gvec_2_ptr * const g_cc[3] = {
+        gen_helper_gvec_vistr_cc8,
+        gen_helper_gvec_vistr_cc16,
+        gen_helper_gvec_vistr_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       cpu_env, 0, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_2_ool(get_field(s->fields, v1), get_field(s->fields, v2), 0,
+                       g[es]);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 0ee3470112..6bafa23bd7 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -283,3 +283,48 @@ void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
 DEF_VFENE_CC_HELPER(8)
 DEF_VFENE_CC_HELPER(16)
 DEF_VFENE_CC_HELPER(32)
+
+static int vistr(void *v1, const void *v2, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    uint64_t z;
+    int cc = 3;
+
+    z = zero_search(a0, mask);
+    if (z) {
+        a0 &= ~(-1ull >> clz64(z));
+        a1 = 0;
+        cc = 0;
+    } else {
+        z = zero_search(a1, mask);
+        if (z) {
+            a1 &= ~(-1ull >> clz64(z));
+            cc = 0;
+        }
+    }
+
+    s390_vec_write_element64(v1, 0, a0);
+    s390_vec_write_element64(v1, 1, a1);
+    return cc;
+}
+
+#define DEF_VISTR_HELPER(BITS)                                                 \
+void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
+{                                                                              \
+    vistr(v1, v2, MO_##BITS);                                                  \
+}
+DEF_VISTR_HELPER(8)
+DEF_VISTR_HELPER(16)
+DEF_VISTR_HELPER(32)
+
+#define DEF_VISTR_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
+                                uint32_t desc)                                 \
+{                                                                              \
+    env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
+}
+DEF_VISTR_CC_HELPER(8)
+DEF_VISTR_CC_HELPER(16)
+DEF_VISTR_CC_HELPER(32)

From 13b0228f77ffa14a0f82bd8a9d0fd5859b0d6a7d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 24 May 2019 11:23:49 +0200
Subject: [PATCH 05/33] s390x/tcg: Implement VECTOR STRING RANGE COMPARE

Unfortunately, there is no easy way to avoid looping over all elements
in v2. Provide specialized variants for !cc,!rt/!cc,rt/cc,!rt/cc,rt and
all element types. Especially for different values of rt, the compiler
might be able to optimize the code a lot.

Add s390_vec_write_element().

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h            |  12 +++
 target/s390x/insn-data.def       |   2 +
 target/s390x/translate_vx.inc.c  |  59 +++++++++++++
 target/s390x/vec.h               |  21 +++++
 target/s390x/vec_string_helper.c | 143 +++++++++++++++++++++++++++++++
 5 files changed, 237 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1f9f0b463b..5db67779d3 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -236,6 +236,18 @@ DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index b4a6b59608..a2969fab58 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1201,6 +1201,8 @@
     F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
 /* VECTOR ISOLATE STRING */
     F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
+/* VECTOR STRING RANGE COMPARE */
+    F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 08a62eab52..f26ffa2895 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -217,6 +217,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
     tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                        vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
                        16, 16, data, fn)
+#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                       ptr, 16, 16, data, fn)
 #define gen_gvec_dup_i64(es, v1, c) \
     tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
 #define gen_gvec_mov(v1, v2) \
@@ -2479,3 +2483,58 @@ static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m5);
+    const uint8_t m6 = get_field(s->fields, m6);
+    static gen_helper_gvec_4 * const g[3] = {
+        gen_helper_gvec_vstrc8,
+        gen_helper_gvec_vstrc16,
+        gen_helper_gvec_vstrc32,
+    };
+    static gen_helper_gvec_4 * const g_rt[3] = {
+        gen_helper_gvec_vstrc_rt8,
+        gen_helper_gvec_vstrc_rt16,
+        gen_helper_gvec_vstrc_rt32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc[3] = {
+        gen_helper_gvec_vstrc_cc8,
+        gen_helper_gvec_vstrc_cc16,
+        gen_helper_gvec_vstrc_cc32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
+        gen_helper_gvec_vstrc_cc_rt8,
+        gen_helper_gvec_vstrc_cc_rt16,
+        gen_helper_gvec_vstrc_cc_rt32,
+    };
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m6, 0, 1)) {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc_rt[es]);
+        } else {
+            gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           cpu_env, m6, g_cc[es]);
+        }
+        set_cc_static(s);
+    } else {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g_rt[es]);
+        } else {
+            gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2),
+                           get_field(s->fields, v3), get_field(s->fields, v4),
+                           m6, g[es]);
+        }
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec.h b/target/s390x/vec.h
index affc62874c..a6e361869b 100644
--- a/target/s390x/vec.h
+++ b/target/s390x/vec.h
@@ -117,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
     v->doubleword[enr] = data;
 }
 
+static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
+                                          uint8_t es, uint64_t data)
+{
+    switch (es) {
+    case MO_8:
+        s390_vec_write_element8(v, enr, data);
+        break;
+    case MO_16:
+        s390_vec_write_element16(v, enr, data);
+        break;
+    case MO_32:
+        s390_vec_write_element32(v, enr, data);
+        break;
+    case MO_64:
+        s390_vec_write_element64(v, enr, data);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #endif /* S390X_VEC_H */
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c
index 6bafa23bd7..c516c0ceeb 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/vec_string_helper.c
@@ -328,3 +328,146 @@ void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
 DEF_VISTR_CC_HELPER(8)
 DEF_VISTR_CC_HELPER(16)
 DEF_VISTR_CC_HELPER(32)
+
+static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
+{
+    const bool equal = extract32(c, 7, 1);
+    const bool lower = extract32(c, 6, 1);
+    const bool higher = extract32(c, 5, 1);
+
+    if (data < l) {
+        return lower;
+    } else if (data > l) {
+        return higher;
+    }
+    return equal;
+}
+
+static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
+                 bool in, bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    int first_zero = 16, first_match = 16;
+    S390Vector rt_result = {};
+    uint64_t z0, z1;
+    int i, j;
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    for (i = 0; i < 16 / (1 << es); i++) {
+        const uint32_t data = s390_vec_read_element(v2, i, es);
+        const int cur_byte = i * (1 << es);
+        bool any_match = false;
+
+        /* if we don't need a bit vector, we can stop early */
+        if (cur_byte == first_zero && !rt) {
+            break;
+        }
+
+        for (j = 0; j < 16 / (1 << es); j += 2) {
+            const uint32_t l1 = s390_vec_read_element(v3, j, es);
+            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
+            /* we are only interested in the highest byte of each element */
+            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
+            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
+
+            if (element_compare(data, l1, c1) &&
+                element_compare(data, l2, c2)) {
+                any_match = true;
+                break;
+            }
+        }
+        /* invert the result if requested */
+        any_match = in ^ any_match;
+
+        if (any_match) {
+            /* indicate bit vector if requested */
+            if (rt) {
+                const uint64_t val = -1ull;
+
+                first_match = MIN(cur_byte, first_match);
+                s390_vec_write_element(&rt_result, i, es, val);
+            } else {
+                /* stop on the first match */
+                first_match = cur_byte;
+                break;
+            }
+        }
+    }
+
+    if (rt) {
+        *(S390Vector *)v1 = rt_result;
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_match == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_match < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VSTRC_HELPER(BITS)                                                 \
+void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_HELPER(8)
+DEF_VSTRC_HELPER(16)
+DEF_VSTRC_HELPER(32)
+
+#define DEF_VSTRC_RT_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, uint32_t desc)                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_RT_HELPER(8)
+DEF_VSTRC_RT_HELPER(16)
+DEF_VSTRC_RT_HELPER(32)
+
+#define DEF_VSTRC_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, CPUS390XState *env,           \
+                                 uint32_t desc)                                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_HELPER(8)
+DEF_VSTRC_CC_HELPER(16)
+DEF_VSTRC_CC_HELPER(32)
+
+#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
+void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
+                                    const void *v4, CPUS390XState *env,        \
+                                    uint32_t desc)                             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_RT_HELPER(8)
+DEF_VSTRC_CC_RT_HELPER(16)
+DEF_VSTRC_CC_RT_HELPER(32)

From ec8e23e37f80072a67ad409e6cdf67b65bb19eef Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 28 May 2019 20:46:57 +0200
Subject: [PATCH 06/33] s390x: Align vector registers to 16 bytes

11e2bfef7990 ("tcg/i386: Use MOVDQA for TCG_TYPE_V128 load/store")
revealed that the vregs are not aligned to 16 bytes. Align them to
16 bytes, to avoid segfault'ing on x86.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/cpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7305cacc7b..1bed12b6c3 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -66,7 +66,7 @@ struct CPUS390XState {
      * The floating point registers are part of the vector registers.
      * vregs[0][0] -> vregs[15][0] are 16 floating point registers
      */
-    CPU_DoubleU vregs[32][2];  /* vector registers */
+    CPU_DoubleU vregs[32][2] QEMU_ALIGNED(16);  /* vector registers */
     uint32_t aregs[16];    /* access registers */
     uint8_t riccb[64];     /* runtime instrumentation control */
     uint64_t gscb[4];      /* guarded storage control */

From 4f83d7d2121a8b4cce59c06f7d74c47cdedd79eb Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 09:15:38 +0200
Subject: [PATCH 07/33] s390x: Use uint64_t for vector registers

CPU_DoubleU is primarily used to reinterpret between integer and floats.
We don't really need this functionality. So let's just keep it simple
and use an uint64_t.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 linux-user/s390x/signal.c  |   4 +-
 target/s390x/arch_dump.c   |   8 +--
 target/s390x/cpu.h         |   4 +-
 target/s390x/excp_helper.c |   6 +-
 target/s390x/gdbstub.c     |  16 ++---
 target/s390x/helper.c      |  10 +--
 target/s390x/kvm.c         |  16 ++---
 target/s390x/machine.c     | 128 ++++++++++++++++++-------------------
 target/s390x/translate.c   |   2 +-
 9 files changed, 97 insertions(+), 97 deletions(-)

diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index 3d3cb67bbe..ecfa2a14a9 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -123,7 +123,7 @@ static void save_sigregs(CPUS390XState *env, target_sigregs *sregs)
      */
     //save_fp_regs(&current->thread.fp_regs); FIXME
     for (i = 0; i < 16; i++) {
-        __put_user(get_freg(env, i)->ll, &sregs->fpregs.fprs[i]);
+        __put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]);
     }
 }
 
@@ -254,7 +254,7 @@ restore_sigregs(CPUS390XState *env, target_sigregs *sc)
         __get_user(env->aregs[i], &sc->regs.acrs[i]);
     }
     for (i = 0; i < 16; i++) {
-        __get_user(get_freg(env, i)->ll, &sc->fpregs.fprs[i]);
+        __get_user(*get_freg(env, i), &sc->fpregs.fprs[i]);
     }
 
     return err;
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index c9ef0a6e60..50fa0ae4b6 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -104,7 +104,7 @@ static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id)
     note->hdr.n_type = cpu_to_be32(NT_FPREGSET);
     note->contents.fpregset.fpc = cpu_to_be32(cpu->env.fpc);
     for (i = 0; i <= 15; i++) {
-        note->contents.fpregset.fprs[i] = cpu_to_be64(get_freg(cs, i)->ll);
+        note->contents.fpregset.fprs[i] = cpu_to_be64(*get_freg(cs, i));
     }
 }
 
@@ -114,7 +114,7 @@ static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu,  int id)
 
     note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_LOW);
     for (i = 0; i <= 15; i++) {
-        note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+        note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1]);
     }
 }
 
@@ -127,8 +127,8 @@ static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu, int id)
 
     note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_HIGH);
     for (i = 0; i <= 15; i++) {
-        temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0].ll);
-        temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1].ll);
+        temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0]);
+        temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1]);
     }
 }
 
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 1bed12b6c3..317a1377e6 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -66,7 +66,7 @@ struct CPUS390XState {
      * The floating point registers are part of the vector registers.
      * vregs[0][0] -> vregs[15][0] are 16 floating point registers
      */
-    CPU_DoubleU vregs[32][2] QEMU_ALIGNED(16);  /* vector registers */
+    uint64_t vregs[32][2] QEMU_ALIGNED(16);  /* vector registers */
     uint32_t aregs[16];    /* access registers */
     uint8_t riccb[64];     /* runtime instrumentation control */
     uint64_t gscb[4];      /* guarded storage control */
@@ -153,7 +153,7 @@ struct CPUS390XState {
 
 };
 
-static inline CPU_DoubleU *get_freg(CPUS390XState *cs, int nr)
+static inline uint64_t *get_freg(CPUS390XState *cs, int nr)
 {
     return &cs->vregs[nr][0];
 }
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index 3a467b72c5..85223d00c0 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -390,8 +390,8 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
     }
 
     for (i = 0; i < 32; i++) {
-        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0].ll);
-        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1].ll);
+        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
+        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
     }
 
     cpu_physical_memory_unmap(sa, len, 1, len);
@@ -429,7 +429,7 @@ static void do_mchk_interrupt(CPUS390XState *env)
     lowcore->ar_access_id = 1;
 
     for (i = 0; i < 16; i++) {
-        lowcore->floating_pt_save_area[i] = cpu_to_be64(get_freg(env, i)->ll);
+        lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
         lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
         lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
         lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c
index df147596ce..9cfd8fe3e0 100644
--- a/target/s390x/gdbstub.c
+++ b/target/s390x/gdbstub.c
@@ -116,7 +116,7 @@ static int cpu_read_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
     case S390_FPC_REGNUM:
         return gdb_get_reg32(mem_buf, env->fpc);
     case S390_F0_REGNUM ... S390_F15_REGNUM:
-        return gdb_get_reg64(mem_buf, get_freg(env, n - S390_F0_REGNUM)->ll);
+        return gdb_get_reg64(mem_buf, *get_freg(env, n - S390_F0_REGNUM));
     default:
         return 0;
     }
@@ -129,7 +129,7 @@ static int cpu_write_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
         env->fpc = ldl_p(mem_buf);
         return 4;
     case S390_F0_REGNUM ... S390_F15_REGNUM:
-        get_freg(env, n - S390_F0_REGNUM)->ll = ldtul_p(mem_buf);
+        *get_freg(env, n - S390_F0_REGNUM) = ldtul_p(mem_buf);
         return 8;
     default:
         return 0;
@@ -150,11 +150,11 @@ static int cpu_read_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
 
     switch (n) {
     case S390_V0L_REGNUM ... S390_V15L_REGNUM:
-        ret = gdb_get_reg64(mem_buf, env->vregs[n][1].ll);
+        ret = gdb_get_reg64(mem_buf, env->vregs[n][1]);
         break;
     case S390_V16_REGNUM ... S390_V31_REGNUM:
-        ret = gdb_get_reg64(mem_buf, env->vregs[n][0].ll);
-        ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1].ll);
+        ret = gdb_get_reg64(mem_buf, env->vregs[n][0]);
+        ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1]);
         break;
     default:
         ret = 0;
@@ -167,11 +167,11 @@ static int cpu_write_vreg(CPUS390XState *env, uint8_t *mem_buf, int n)
 {
     switch (n) {
     case S390_V0L_REGNUM ... S390_V15L_REGNUM:
-        env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
+        env->vregs[n][1] = ldtul_p(mem_buf + 8);
         return 8;
     case S390_V16_REGNUM ... S390_V31_REGNUM:
-        env->vregs[n][0].ll = ldtul_p(mem_buf);
-        env->vregs[n][1].ll = ldtul_p(mem_buf + 8);
+        env->vregs[n][0] = ldtul_p(mem_buf);
+        env->vregs[n][1] = ldtul_p(mem_buf + 8);
         return 16;
     default:
         return 0;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 3c8f0a7615..a69e5abf5f 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -249,7 +249,7 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
         cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1);
     }
     for (i = 0; i < 16; ++i) {
-        sa->fprs[i] = cpu_to_be64(get_freg(&cpu->env, i)->ll);
+        sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i));
     }
     for (i = 0; i < 16; ++i) {
         sa->grs[i] = cpu_to_be64(cpu->env.regs[i]);
@@ -299,8 +299,8 @@ int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len)
 
     if (s390_has_feat(S390_FEAT_VECTOR)) {
         for (i = 0; i < 32; i++) {
-            sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0].ll);
-            sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1].ll);
+            sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]);
+            sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]);
         }
     }
     if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) {
@@ -341,13 +341,13 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags)
         if (s390_has_feat(S390_FEAT_VECTOR)) {
             for (i = 0; i < 32; i++) {
                 qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c",
-                             i, env->vregs[i][0].ll, env->vregs[i][1].ll,
+                             i, env->vregs[i][0], env->vregs[i][1],
                              i % 2 ? '\n' : ' ');
             }
         } else {
             for (i = 0; i < 16; i++) {
                 qemu_fprintf(f, "F%02d=%016" PRIx64 "%c",
-                             i, get_freg(env, i)->ll,
+                             i, *get_freg(env, i),
                              (i % 4) == 3 ? '\n' : ' ');
             }
         }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index e5e2b691f2..bcec9795ec 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -418,21 +418,21 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 
     if (can_sync_regs(cs, KVM_SYNC_VRS)) {
         for (i = 0; i < 32; i++) {
-            cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0].ll;
-            cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1].ll;
+            cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0];
+            cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1];
         }
         cs->kvm_run->s.regs.fpc = env->fpc;
         cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_VRS;
     } else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
         for (i = 0; i < 16; i++) {
-            cs->kvm_run->s.regs.fprs[i] = get_freg(env, i)->ll;
+            cs->kvm_run->s.regs.fprs[i] = *get_freg(env, i);
         }
         cs->kvm_run->s.regs.fpc = env->fpc;
         cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_FPRS;
     } else {
         /* Floating point */
         for (i = 0; i < 16; i++) {
-            fpu.fprs[i] = get_freg(env, i)->ll;
+            fpu.fprs[i] = *get_freg(env, i);
         }
         fpu.fpc = env->fpc;
 
@@ -586,13 +586,13 @@ int kvm_arch_get_registers(CPUState *cs)
     /* Floating point and vector registers */
     if (can_sync_regs(cs, KVM_SYNC_VRS)) {
         for (i = 0; i < 32; i++) {
-            env->vregs[i][0].ll = cs->kvm_run->s.regs.vrs[i][0];
-            env->vregs[i][1].ll = cs->kvm_run->s.regs.vrs[i][1];
+            env->vregs[i][0] = cs->kvm_run->s.regs.vrs[i][0];
+            env->vregs[i][1] = cs->kvm_run->s.regs.vrs[i][1];
         }
         env->fpc = cs->kvm_run->s.regs.fpc;
     } else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
         for (i = 0; i < 16; i++) {
-            get_freg(env, i)->ll = cs->kvm_run->s.regs.fprs[i];
+            *get_freg(env, i) = cs->kvm_run->s.regs.fprs[i];
         }
         env->fpc = cs->kvm_run->s.regs.fpc;
     } else {
@@ -601,7 +601,7 @@ int kvm_arch_get_registers(CPUState *cs)
             return r;
         }
         for (i = 0; i < 16; i++) {
-            get_freg(env, i)->ll = fpu.fprs[i];
+            *get_freg(env, i) = fpu.fprs[i];
         }
         env->fpc = fpu.fpc;
     }
diff --git a/target/s390x/machine.c b/target/s390x/machine.c
index cb792aa103..e6851a57bc 100644
--- a/target/s390x/machine.c
+++ b/target/s390x/machine.c
@@ -66,22 +66,22 @@ static const VMStateDescription vmstate_fpu = {
     .minimum_version_id = 1,
     .needed = fpu_needed,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[2][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[3][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[4][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[5][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[6][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[7][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[8][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[9][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[10][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[11][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[12][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[13][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[14][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[15][0].ll, S390CPU),
+        VMSTATE_UINT64(env.vregs[0][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[1][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[2][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[3][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[4][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[5][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[6][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[7][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[8][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[9][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[10][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[11][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[12][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[13][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[14][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[15][0], S390CPU),
         VMSTATE_UINT32(env.fpc, S390CPU),
         VMSTATE_END_OF_LIST()
     }
@@ -99,54 +99,54 @@ static const VMStateDescription vmstate_vregs = {
     .needed = vregs_needed,
     .fields = (VMStateField[]) {
         /* vregs[0][0] -> vregs[15][0] and fregs are overlays */
-        VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[17][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[18][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[19][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[20][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[21][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[22][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[23][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[24][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[25][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[26][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[27][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[28][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[29][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[30][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[31][0].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[0][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[1][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[2][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[3][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[4][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[5][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[6][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[7][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[8][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[9][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[10][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[11][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[12][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[13][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[14][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[15][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[16][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[17][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[18][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[19][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[20][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[21][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[22][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[23][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[24][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[25][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[26][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[27][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[28][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[29][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[30][1].ll, S390CPU),
-        VMSTATE_UINT64(env.vregs[31][1].ll, S390CPU),
+        VMSTATE_UINT64(env.vregs[16][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[17][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[18][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[19][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[20][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[21][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[22][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[23][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[24][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[25][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[26][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[27][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[28][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[29][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[30][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[31][0], S390CPU),
+        VMSTATE_UINT64(env.vregs[0][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[1][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[2][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[3][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[4][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[5][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[6][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[7][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[8][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[9][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[10][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[11][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[12][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[13][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[14][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[15][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[16][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[17][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[18][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[19][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[20][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[21][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[22][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[23][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[24][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[25][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[26][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[27][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[28][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[29][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[30][1], S390CPU),
+        VMSTATE_UINT64(env.vregs[31][1], S390CPU),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index fa57b7550e..ac0d8b6410 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -149,7 +149,7 @@ void s390x_translate_init(void)
 static inline int vec_full_reg_offset(uint8_t reg)
 {
     g_assert(reg < 32);
-    return offsetof(CPUS390XState, vregs[reg][0].d);
+    return offsetof(CPUS390XState, vregs[reg][0]);
 }
 
 static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp es)

From 88a29e867528c57151103e64a966a9cbaeec852f Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 31 May 2019 16:31:44 +0200
Subject: [PATCH 08/33] s390x/tcg: Fix max_byte detection for stfle

used_stfl_bytes is 0, before initialized via prepare_stfl() on the
first invocation. We have to move the calculation of max_bytes after
prepare_stfl().

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/misc_helper.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index ee67c1fa0c..34476134a4 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -669,7 +669,7 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
 {
     const uintptr_t ra = GETPC();
     const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
-    const int max_bytes = ROUND_UP(used_stfl_bytes, 8);
+    int max_bytes;
     int i;
 
     if (addr & 0x7) {
@@ -677,6 +677,7 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
     }
 
     prepare_stfl();
+    max_bytes = ROUND_UP(used_stfl_bytes, 8);
     for (i = 0; i < count_bytes; ++i) {
         cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
     }

From e19a61eb514dbf7c9a725c7539ce3b6166cd6ac4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 31 May 2019 16:33:38 +0200
Subject: [PATCH 09/33] s390x/tcg: Store only the necessary amount of
 doublewords for STFLE

The PoP (z14, 7-382) says:
    Doublewords to the right of the doubleword in which the
    highest-numbered facility bit is assigned for a model
    may or may not be stored.

However, stack protection in certain binaries can't deal with that.
"gzip" example code:

f1b4:       a7 08 00 03             lhi     %r0,3
f1b8:       b2 b0 f0 a0             stfle   160(%r15)
f1bc:       e3 20 f0 b2 00 90       llgc    %r2,178(%r15)
f1c2:       c0 2b 00 00 00 01       nilf    %r2,1
f1c8:       b2 4f 00 10             ear     %r1,%a0
f1cc:       b9 14 00 22             lgfr    %r2,%r2
f1d0:       eb 11 00 20 00 0d       sllg    %r1,%r1,32
f1d6:       b2 4f 00 11             ear     %r1,%a1
f1da:       d5 07 f0 b8 10 28       clc     184(8,%r15),40(%r1)
f1e0:       a7 74 00 06             jne     f1ec <file_read@@Base+0x1bc>
f1e4:       eb ef f1 30 00 04       lmg     %r14,%r15,304(%r15)
f1ea:       07 fe                   br      %r14
f1ec:       c0 e5 ff ff 9d 6e       brasl   %r14,2cc8 <__stack_chk_fail@plt>

In QEMU, we currently have:
    max_bytes = 24
the code asks for (3 + 1) doublewords == 32 bytes.

If we write 32 bytes instead of only 24, and return "2 + 1" doublewords
("one less than the number of doulewords needed to contain all of the
 facility bits"), the example code detects a stack corruption.

In my opinion, the code is wrong. However, it seems to work fine on
real machines. So let's limit storing to the minimum of the requested
and the maximum doublewords.

Cc: Stefan Liebler <stli@linux.ibm.com>
Cc: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/misc_helper.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 34476134a4..10aa617cf9 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -678,7 +678,13 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
 
     prepare_stfl();
     max_bytes = ROUND_UP(used_stfl_bytes, 8);
-    for (i = 0; i < count_bytes; ++i) {
+
+    /*
+     * The PoP says that doublewords beyond the highest-numbered facility
+     * bit may or may not be stored.  However, existing hardware appears to
+     * not store the words, and existing software depend on that.
+     */
+    for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
         cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
     }
 

From 9be6fa99d6b1371ced6a9b57c32daec86733cd0a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 22 Feb 2019 11:19:48 +0100
Subject: [PATCH 10/33] s390x/tcg: Introduce tcg_s390_vector_exception()

Handling is similar to data exceptions, however we can always store the
VXC into the lowore and the FPC:

z14 PoP, 6-20, "Vector-Exception Code"
    When a vector-processing exception causes a pro-
    gram interruption, a vector-exception code (VXC) is
    stored at location 147, and zeros are stored at loca-
    tions 144-146. The VXC is also placed in the DXC
    field of the floating-point-control (FPC) register if bit
    45 of control register 0 is one. When bit 45 of control
    register 0 is zero and bit 46 of control register 0 is
    one, the DXC field of the FPC register and the con-
    tents of storage at location 147 are unpredictable.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/cpu.h         |  1 +
 target/s390x/excp_helper.c | 15 +++++++++++++++
 target/s390x/tcg_s390x.h   |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 317a1377e6..4fc08a2c88 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -215,6 +215,7 @@ extern const struct VMStateDescription vmstate_s390_cpu;
 #define PGM_SPECIAL_OP                  0x0013
 #define PGM_OPERAND                     0x0015
 #define PGM_TRACE_TABLE                 0x0016
+#define PGM_VECTOR_PROCESSING           0x001b
 #define PGM_SPACE_SWITCH                0x001c
 #define PGM_HFP_SQRT                    0x001d
 #define PGM_PC_TRANS_SPEC               0x001f
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
index 85223d00c0..f21bcf79ae 100644
--- a/target/s390x/excp_helper.c
+++ b/target/s390x/excp_helper.c
@@ -62,6 +62,21 @@ void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
     tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra);
 }
 
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra)
+{
+    g_assert(vxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Always store the VXC into the lowcore, without AFP it is undefined */
+    stl_phys(CPU(s390_env_get_cpu(env))->as,
+             env->psa + offsetof(LowCore, data_exc_code), vxc);
+#endif
+
+    /* Always store the VXC into the FPC, without AFP it is undefined */
+    env->fpc = deposit32(env->fpc, 8, 8, vxc);
+    tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ILEN_AUTO, ra);
+}
+
 void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
 {
     tcg_s390_data_exception(env, dxc, GETPC());
diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg_s390x.h
index ab2c4ba703..2813f9d48e 100644
--- a/target/s390x/tcg_s390x.h
+++ b/target/s390x/tcg_s390x.h
@@ -18,5 +18,7 @@ void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code,
                                               int ilen, uintptr_t ra);
 void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
                                            uintptr_t ra);
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra);
 
 #endif /* TCG_S390X_H */

From aae65009726858390d8bfca73d795613698f317a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 25 Feb 2019 10:46:36 +0100
Subject: [PATCH 11/33] s390x/tcg: Export float_comp_to_cc() and
 float(32|64|128)_dcmask()

Vector floating-point instructions will require these functions, so
allow to use them from other files.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/fpu_helper.c | 4 ++--
 target/s390x/internal.h   | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index 1be68bafea..d2c17ed942 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
     }
 }
 
-static inline int float_comp_to_cc(CPUS390XState *env, int float_compare)
+int float_comp_to_cc(CPUS390XState *env, int float_compare)
 {
     S390CPU *cpu = s390_env_get_cpu(env);
 
@@ -746,7 +746,7 @@ static inline uint16_t dcmask(int bit, bool neg)
 }
 
 #define DEF_FLOAT_DCMASK(_TYPE) \
-static uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)       \
+uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)              \
 {                                                                  \
     const bool neg = _TYPE##_is_neg(f1);                           \
                                                                    \
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 9893fc094b..c243fa725b 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -285,6 +285,10 @@ uint32_t set_cc_nz_f128(float128 v);
 uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
 int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
 void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
+int float_comp_to_cc(CPUS390XState *env, int float_compare);
+uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
+uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
+uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
 
 
 /* gdbstub.c */

From 3a0eae8546b4cda2526f1d913d50c4eb63f5c05e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:21:21 +0200
Subject: [PATCH 12/33] s390x/tcg: Implement VECTOR FP ADD

1. We'll reuse op_vfa() for similar instructions later, prepare for
   that.
2. We'll reuse vop64_3() for other instructions later.
3. Take care of modifying the vector register only if no trap happened.
 - on traps, flags are not updated and no elements are modified
 - traps don't modify the fpc flags
 - without traps, all exceptions of all elements are merged
4. We'll reuse check_ieee_exc() later when we need the XxC flag.

We have to check for exceptions after processing each element.
Provide separate handlers for single/all element processing. We'll do
the same for all applicable FP instructions.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/Makefile.objs      |   1 +
 target/s390x/helper.h           |   4 ++
 target/s390x/insn-data.def      |   5 ++
 target/s390x/translate_vx.inc.c |  29 ++++++++
 target/s390x/vec_fpu_helper.c   | 119 ++++++++++++++++++++++++++++++++
 5 files changed, 158 insertions(+)
 create mode 100644 target/s390x/vec_fpu_helper.c

diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs
index ffdd484ef0..3e2745594a 100644
--- a/target/s390x/Makefile.objs
+++ b/target/s390x/Makefile.objs
@@ -2,6 +2,7 @@ obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o
 obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o
 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o
 obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o
+obj-$(CONFIG_TCG) += vec_fpu_helper.o
 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o
 obj-$(CONFIG_SOFTMMU) += sigp.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 5db67779d3..21658a2771 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -249,6 +249,10 @@ DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
+/* === Vector Floating-Point Instructions */
+DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
 DEF_HELPER_4(diag, void, env, i32, i32, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index a2969fab58..79892f6042 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1204,6 +1204,11 @@
 /* VECTOR STRING RANGE COMPARE */
     F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
 
+/* === Vector Floating-Point Instructions */
+
+/* VECTOR FP ADD */
+    F(0xe7e3, VFA,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
     E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index f26ffa2895..44da9f2645 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -52,6 +52,11 @@
 #define ES_64   MO_64
 #define ES_128  4
 
+/* Floating-Point Format */
+#define FPF_SHORT       2
+#define FPF_LONG        3
+#define FPF_EXT         4
+
 static inline bool valid_vec_element(uint8_t enr, TCGMemOp es)
 {
     return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
@@ -2538,3 +2543,27 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    const bool se = extract32(m5, 3, 1);
+    gen_helper_gvec_3_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields->op2) {
+    case 0xe3:
+        fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                   get_field(s->fields, v3), cpu_env, 0, fn);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
new file mode 100644
index 0000000000..c7db0791d7
--- /dev/null
+++ b/target/s390x/vec_fpu_helper.c
@@ -0,0 +1,119 @@
+/*
+ * QEMU TCG support -- s390x vector floating point instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "internal.h"
+#include "vec.h"
+#include "tcg_s390x.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#define VIC_INVALID         0x1
+#define VIC_DIVBYZERO       0x2
+#define VIC_OVERFLOW        0x3
+#define VIC_UNDERFLOW       0x4
+#define VIC_INEXACT         0x5
+
+/* returns the VEX. If the VEX is 0, there is no trap */
+static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
+                              uint8_t *vec_exc)
+{
+    uint8_t vece_exc = 0, trap_exc;
+    unsigned qemu_exc;
+
+    /* Retrieve and clear the softfloat exceptions */
+    qemu_exc = env->fpu_status.float_exception_flags;
+    if (qemu_exc == 0) {
+        return 0;
+    }
+    env->fpu_status.float_exception_flags = 0;
+
+    vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /* Add them to the vector-wide s390x exception bits */
+    *vec_exc |= vece_exc;
+
+    /* Check for traps and construct the VXC */
+    trap_exc = vece_exc & env->fpc >> 24;
+    if (trap_exc) {
+        if (trap_exc & S390_IEEE_MASK_INVALID) {
+            return enr << 4 | VIC_INVALID;
+        } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
+            return enr << 4 | VIC_DIVBYZERO;
+        } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
+            return enr << 4 | VIC_OVERFLOW;
+        } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
+            return enr << 4 | VIC_UNDERFLOW;
+        } else if (!XxC) {
+            g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
+            /* inexact has lowest priority on traps */
+            return enr << 4 | VIC_INEXACT;
+        }
+    }
+    return 0;
+}
+
+static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
+                            uintptr_t retaddr)
+{
+    if (vxc) {
+        /* on traps, the fpc flags are not updated, instruction is suppressed */
+        tcg_s390_vector_exception(env, vxc, retaddr);
+    }
+    if (vec_exc) {
+        /* indicate exceptions for all elements combined */
+        env->fpc |= vec_exc << 16;
+    }
+}
+
+typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s);
+static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop64_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const uint64_t a = s390_vec_read_element64(v2, i);
+        const uint64_t b = s390_vec_read_element64(v3, i);
+
+        s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_add(a, b, s);
+}
+
+void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfa64, GETPC());
+}
+
+void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfa64, GETPC());
+}

From 5b89f0fba2b6301e124668443f8bfed124a0317c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:30:56 +0200
Subject: [PATCH 13/33] s390x/tcg: Implement VECTOR FP COMPARE (AND SIGNAL)
 SCALAR

As far as I can see, there is only a tiny difference.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  4 ++++
 target/s390x/translate_vx.inc.c | 21 +++++++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 21658a2771..d34d6802a6 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -252,6 +252,8 @@ DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 /* === Vector Floating-Point Instructions */
 DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 79892f6042..c45e101b10 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1208,6 +1208,10 @@
 
 /* VECTOR FP ADD */
     F(0xe7e3, VFA,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP COMPARE SCALAR */
+    F(0xe7cb, WFC,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE AND SIGNAL SCALAR */
+    F(0xe7ca, WFK,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 44da9f2645..283e8aa07a 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2567,3 +2567,24 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
                    get_field(s->fields, v3), cpu_env, 0, fn);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+
+    if (fpf != FPF_LONG || m4) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (s->fields->op2 == 0xcb) {
+        gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       cpu_env, 0, gen_helper_gvec_wfc64);
+    } else {
+        gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                       cpu_env, 0, gen_helper_gvec_wfk64);
+    }
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index c7db0791d7..f9357d9221 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -117,3 +117,35 @@ void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3,
 {
     vop64_3(v1, v2, v3, env, true, vfa64, GETPC());
 }
+
+static int wfc64(const S390Vector *v1, const S390Vector *v2,
+                 CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float64 a = s390_vec_read_element64(v1, 0);
+    const float64 b = s390_vec_read_element64(v2, 0);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float64_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float64_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env,
+                        uint32_t desc)
+{
+    env->cc_op = wfc64(v1, v2, env, false, GETPC());
+}
+
+void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
+                        uint32_t desc)
+{
+    env->cc_op = wfc64(v1, v2, env, true, GETPC());
+}

From 2c806ab4437abde451b99dcf21b45169c04d08e9 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:35:08 +0200
Subject: [PATCH 14/33] s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH
 OR EQUAL)

Provide for all three instructions all four combinations of cc bit and
s bit.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  12 ++++
 target/s390x/insn-data.def      |   6 ++
 target/s390x/translate_vx.inc.c |  51 ++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 104 ++++++++++++++++++++++++++++++++
 4 files changed, 173 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index d34d6802a6..33d3bacf74 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -254,6 +254,18 @@ DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index c45e101b10..446552f251 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1212,6 +1212,12 @@
     F(0xe7cb, WFC,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
 /* VECTOR FP COMPARE AND SIGNAL SCALAR */
     F(0xe7ca, WFK,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE EQUAL */
+    F(0xe7e8, VFCE,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH */
+    F(0xe7eb, VFCH,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH OR EQUAL */
+    F(0xe7ea, VFCHE,   VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 283e8aa07a..5571a71e1a 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2588,3 +2588,54 @@ static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
     set_cc_static(s);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    const uint8_t m6 = get_field(s->fields, m6);
+    const bool se = extract32(m5, 3, 1);
+    const bool cs = extract32(m6, 0, 1);
+    gen_helper_gvec_3_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (cs) {
+        switch (s->fields->op2) {
+        case 0xe8:
+            fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
+            break;
+        case 0xeb:
+            fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
+            break;
+        case 0xea:
+            fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        switch (s->fields->op2) {
+        case 0xe8:
+            fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
+            break;
+        case 0xeb:
+            fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
+            break;
+        case 0xea:
+            fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                   get_field(s->fields, v3), cpu_env, 0, fn);
+    if (cs) {
+        set_cc_static(s);
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index f9357d9221..e72500d4d5 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -149,3 +149,107 @@ void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
 {
     env->cc_op = wfc64(v1, v2, env, true, GETPC());
 }
+
+typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status);
+static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int match = 0;
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_element64(v2, i);
+        const float64 b = s390_vec_read_element64(v3, i);
+
+        /* swap the order of the parameters, so we can use existing functions */
+        if (fn(b, a, &env->fpu_status)) {
+            match++;
+            s390_vec_write_element64(&tmp, i, -1ull);
+        }
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    if (match) {
+        return s || match == 2 ? 0 : 1;
+    }
+    return 3;
+}
+
+void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3,
+                          CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3,
+                            CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3,
+                            CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3,
+                          CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3,
+                            CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3,
+                             CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3,
+                          CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3,
+                           CPUS390XState *env, uint32_t desc)
+{
+    vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3,
+                             CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
+}
+
+void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3,
+                              CPUS390XState *env, uint32_t desc)
+{
+    env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
+}

From bb03fd841cb33f3a85cedc1f89169fe2649fc258 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:42:05 +0200
Subject: [PATCH 15/33] s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED
 64-BIT

1. We'll reuse op_vcdg() for similar instructions later, prepare for
   that.
2. We'll reuse vop64_2() later for other instructions.

We have to mangle the erm (effective rounding mode) and the m4 into
the simd_data(), and properly unmangle them again.

Make sure to restore the erm before triggering an exception.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 25 ++++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 47 +++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 33d3bacf74..a60f4c49fc 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -266,6 +266,8 @@ DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32
 DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 446552f251..d3386024c8 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1218,6 +1218,8 @@
     F(0xe7eb, VFCH,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
 /* VECTOR FP COMPARE HIGH OR EQUAL */
     F(0xe7ea, VFCHE,   VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM FIXED 64-BIT */
+    F(0xe7c3, VCDG,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 5571a71e1a..6741b707cc 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2639,3 +2639,28 @@ static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    const uint8_t erm = get_field(s->fields, m5);
+    const bool se = extract32(m4, 3, 1);
+    gen_helper_gvec_2_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields->op2) {
+    case 0xc3:
+        fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   deposit32(m4, 4, 4, erm), fn);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index e72500d4d5..53430c7843 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -78,6 +78,30 @@ static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
     }
 }
 
+typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s);
+static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        const uint64_t a = s390_vec_read_element64(v2, i);
+
+        s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
 typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s);
 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
                     CPUS390XState *env, bool s, vop64_3_fn fn,
@@ -253,3 +277,26 @@ void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3,
 {
     env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
 }
+
+static uint64_t vcdg64(uint64_t a, float_status *s)
+{
+    return int64_to_float64(a, s);
+}
+
+void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC());
+}
+
+void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC());
+}

From 9b8d1a387daf1a71c44fdf64a513672b3b327de6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:47:20 +0200
Subject: [PATCH 16/33] s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL
 64-BIT

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 23 +++++++++++++++++++++++
 4 files changed, 30 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index a60f4c49fc..6fd996e924 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -268,6 +268,8 @@ DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d3386024c8..465b36dd70 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1220,6 +1220,8 @@
     F(0xe7ea, VFCHE,   VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
 /* VECTOR FP CONVERT FROM FIXED 64-BIT */
     F(0xe7c3, VCDG,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */
+    F(0xe7c1, VCDLG,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 6741b707cc..fa755cd1d6 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2657,6 +2657,9 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
     case 0xc3:
         fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
         break;
+    case 0xc1:
+        fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 53430c7843..181378e167 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -300,3 +300,26 @@ void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env,
 
     vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC());
 }
+
+static uint64_t vcdlg64(uint64_t a, float_status *s)
+{
+    return uint64_to_float64(a, s);
+}
+
+void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC());
+}
+
+void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC());
+}

From 35b3bb1c55953e2924303cb5d9ab1b242918a1e4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:49:52 +0200
Subject: [PATCH 17/33] s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 23 +++++++++++++++++++++++
 4 files changed, 30 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 6fd996e924..9893c677da 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -270,6 +270,8 @@ DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 465b36dd70..97c62a8af5 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1222,6 +1222,8 @@
     F(0xe7c3, VCDG,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR FP CONVERT FROM LOGICAL 64-BIT */
     F(0xe7c1, VCDLG,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO FIXED 64-BIT */
+    F(0xe7c2, VCGD,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index fa755cd1d6..a42de2ff01 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2660,6 +2660,9 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
     case 0xc1:
         fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
         break;
+    case 0xc2:
+        fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 181378e167..e7251aca04 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -323,3 +323,26 @@ void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env,
 
     vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC());
 }
+
+static uint64_t vcgd64(uint64_t a, float_status *s)
+{
+    return float64_to_int64(a, s);
+}
+
+void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC());
+}
+
+void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC());
+}

From 09c04e4b884566ffe5b931d3e809e2c003916382 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:51:23 +0200
Subject: [PATCH 18/33] s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL
 64-BIT

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 23 +++++++++++++++++++++++
 4 files changed, 30 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9893c677da..9b9062970a 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -272,6 +272,8 @@ DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 97c62a8af5..ed8b888d59 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1224,6 +1224,8 @@
     F(0xe7c1, VCDLG,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR FP CONVERT TO FIXED 64-BIT */
     F(0xe7c2, VCGD,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO LOGICAL 64-BIT */
+    F(0xe7c0, VCLGD,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index a42de2ff01..0395d69968 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2663,6 +2663,9 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
     case 0xc2:
         fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
         break;
+    case 0xc0:
+        fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index e7251aca04..09ef0abfb0 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -346,3 +346,26 @@ void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env,
 
     vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC());
 }
+
+static uint64_t vclgd64(uint64_t a, float_status *s)
+{
+    return float64_to_uint64(a, s);
+}
+
+void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC());
+}
+
+void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC());
+}

From 817a1cec89ed1bb94c112cc8fa013efb74a4df83 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:54:33 +0200
Subject: [PATCH 19/33] s390x/tcg: Implement VECTOR FP DIVIDE

We can reuse most of the infrastructure added for VECTOR FP ADD.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 17 +++++++++++++++++
 4 files changed, 24 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9b9062970a..238bfa2509 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -274,6 +274,8 @@ DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index ed8b888d59..f9830deace 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1226,6 +1226,8 @@
     F(0xe7c2, VCGD,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR FP CONVERT TO LOGICAL 64-BIT */
     F(0xe7c0, VCLGD,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP DIVIDE */
+    F(0xe7e5, VFD,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 0395d69968..9e55d4488b 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2560,6 +2560,9 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
     case 0xe3:
         fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
         break;
+    case 0xe5:
+        fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 09ef0abfb0..3c62eb5787 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -369,3 +369,20 @@ void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env,
 
     vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC());
 }
+
+static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_div(a, b, s);
+}
+
+void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfd64, GETPC());
+}
+
+void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfd64, GETPC());
+}

From 60d0ab29a134bd75ba9aafe6ed5a91c0d43ad67a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 21:57:10 +0200
Subject: [PATCH 20/33] s390x/tcg: Implement VECTOR LOAD FP INTEGER

We can reuse most of the infrastructure introduced for
VECTOR FP CONVERT FROM FIXED 64-BIT and friends.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 23 +++++++++++++++++++++++
 4 files changed, 30 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 238bfa2509..10a9cb39b6 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -276,6 +276,8 @@ DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index f9830deace..f77aa41253 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1228,6 +1228,8 @@
     F(0xe7c0, VCLGD,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR FP DIVIDE */
     F(0xe7e5, VFD,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR LOAD FP INTEGER */
+    F(0xe7c7, VFI,     VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 9e55d4488b..59d8b971c0 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2669,6 +2669,9 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
     case 0xc0:
         fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
         break;
+    case 0xc7:
+        fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 3c62eb5787..bdcafc6738 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -386,3 +386,26 @@ void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3,
 {
     vop64_3(v1, v2, v3, env, true, vfd64, GETPC());
 }
+
+static uint64_t vfi64(uint64_t a, float_status *s)
+{
+    return float64_round_to_int(a, s);
+}
+
+void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env,
+                        uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC());
+}
+
+void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC());
+}

From 1a76e59da3162625561f0417133f26be39bbceb0 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:02:09 +0200
Subject: [PATCH 21/33] s390x/tcg: Implement VECTOR LOAD LENGTHENED

Take care of reading/indicating the 32-bit elements.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 19 ++++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 35 +++++++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 10a9cb39b6..cb25141ffe 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -278,6 +278,8 @@ DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index f77aa41253..5afdb36aec 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1230,6 +1230,8 @@
     F(0xe7e5, VFD,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 /* VECTOR LOAD FP INTEGER */
     F(0xe7c7, VFI,     VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR LOAD LENGTHENED */
+    F(0xe7c4, VFLL,    VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 59d8b971c0..a25985e5c9 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2679,3 +2679,22 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
                    deposit32(m4, 4, 4, erm), fn);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32;
+
+    if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m4, 3, 1)) {
+        fn = gen_helper_gvec_vfll32s;
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   0, fn);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index bdcafc6738..f5b7fdf871 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -409,3 +409,38 @@ void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env,
 
     vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC());
 }
+
+static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                   bool s, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        /* load from even element */
+        const float32 a = s390_vec_read_element32(v2, i * 2);
+        const uint64_t ret = float32_to_float64(a, &env->fpu_status);
+
+        s390_vec_write_element64(&tmp, i, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    vfll32(v1, v2, env, false, GETPC());
+}
+
+void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    vfll32(v1, v2, env, true, GETPC());
+}

From 4500ede452e6e1fc079e05ce67a94bb6804757f8 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:06:42 +0200
Subject: [PATCH 22/33] s390x/tcg: Implement VECTOR LOAD ROUNDED

We can reuse some of the infrastructure introduced for
VECTOR FP CONVERT FROM FIXED 64-BIT and friends.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 43 +++++++++++++++++++++++++++++++++
 4 files changed, 50 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index cb25141ffe..7526f8e8c6 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -280,6 +280,8 @@ DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 5afdb36aec..f03914d528 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1232,6 +1232,8 @@
     F(0xe7c7, VFI,     VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR LOAD LENGTHENED */
     F(0xe7c4, VFLL,    VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
+/* VECTOR LOAD ROUNDED */
+    F(0xe7c5, VFLR,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index a25985e5c9..73e1b1062a 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2672,6 +2672,9 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
     case 0xc7:
         fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
         break;
+    case 0xc5:
+        fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index f5b7fdf871..764f7379da 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -444,3 +444,46 @@ void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
 {
     vfll32(v1, v2, env, true, GETPC());
 }
+
+static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                   bool s, bool XxC, uint8_t erm, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_element64(v2, i);
+        uint32_t ret = float64_to_float32(a, &env->fpu_status);
+
+        /* place at even element */
+        s390_vec_write_element32(&tmp, i * 2, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vflr64(v1, v2, env, false, XxC, erm, GETPC());
+}
+
+void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+
+    vflr64(v1, v2, env, true, XxC, erm, GETPC());
+}

From 8d47d4d2124ea61b993ec90feac302d540cf6e24 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:09:33 +0200
Subject: [PATCH 23/33] s390x/tcg: Implement VECTOR FP MULTIPLY

Very similar to VECTOR FP DIVIDE.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 17 +++++++++++++++++
 4 files changed, 24 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7526f8e8c6..22e02a0178 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -282,6 +282,8 @@ DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index f03914d528..e56059ac34 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1234,6 +1234,8 @@
     F(0xe7c4, VFLL,    VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
 /* VECTOR LOAD ROUNDED */
     F(0xe7c5, VFLR,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP MULTIPLY */
+    F(0xe7e7, VFM,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 73e1b1062a..ae31a327cf 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2563,6 +2563,9 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
     case 0xe5:
         fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
         break;
+    case 0xe7:
+        fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 764f7379da..57b7fe4aff 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -487,3 +487,20 @@ void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
 
     vflr64(v1, v2, env, true, XxC, erm, GETPC());
 }
+
+static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_mul(a, b, s);
+}
+
+void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfm64, GETPC());
+}
+
+void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfm64, GETPC());
+}

From c64c598402c0e20cf60aaad43700eb80eed889e4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:17:09 +0200
Subject: [PATCH 24/33] s390x/tcg: Implement VECTOR FP MULTIPLY AND
 (ADD|SUBTRACT)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  4 +++
 target/s390x/insn-data.def      |  4 +++
 target/s390x/translate_vx.inc.c | 23 ++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 48 +++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 22e02a0178..bcaabb91a5 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -284,6 +284,10 @@ DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e56059ac34..e86ade9e44 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1236,6 +1236,10 @@
     F(0xe7c5, VFLR,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
 /* VECTOR FP MULTIPLY */
     F(0xe7e7, VFM,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND ADD */
+    F(0xe78f, VFMA,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND SUBTRACT */
+    F(0xe78e, VFMS,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index ae31a327cf..b624c7a8aa 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2704,3 +2704,26 @@ static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
                    0, fn);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
+{
+    const uint8_t m5 = get_field(s->fields, m5);
+    const uint8_t fpf = get_field(s->fields, m6);
+    const bool se = extract32(m5, 3, 1);
+    gen_helper_gvec_4_ptr *fn;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (s->fields->op2 == 0x8f) {
+        fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
+    } else {
+        fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
+    }
+    gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
+                   get_field(s->fields, v3), get_field(s->fields, v4), cpu_env,
+                   0, fn);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 57b7fe4aff..7993a6fafa 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -504,3 +504,51 @@ void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3,
 {
     vop64_3(v1, v2, v3, env, true, vfm64, GETPC());
 }
+
+static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                   uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const uint64_t a = s390_vec_read_element64(v2, i);
+        const uint64_t b = s390_vec_read_element64(v3, i);
+        const uint64_t c = s390_vec_read_element64(v4, i);
+        uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
+
+        s390_vec_write_element64(&tmp, i, ret);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
+}
+
+void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
+}
+
+void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
+}
+
+void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
+                         const void *v4, CPUS390XState *env, uint32_t desc)
+{
+    vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
+}

From 76e35cc7a52390d2743a9746a1bec29754f60306 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:18:57 +0200
Subject: [PATCH 25/33] s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION

The only FP instruction we can implement without an helper.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 52 +++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e86ade9e44..fa2e801747 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1240,6 +1240,8 @@
     F(0xe78f, VFMA,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
 /* VECTOR FP MULTIPLY AND SUBTRACT */
     F(0xe78e, VFMS,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP PERFORM SIGN OPERATION */
+    F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index b624c7a8aa..9b8606ba25 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2727,3 +2727,55 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
                    0, fn);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s->fields, v1);
+    const uint8_t v2 = get_field(s->fields, v2);
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    TCGv_i64 tmp;
+
+    if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m4, 3, 1)) {
+        tmp = tcg_temp_new_i64();
+        read_vec_element_i64(tmp, v2, 0, ES_64);
+        switch (m5) {
+        case 0:
+            /* sign bit is inverted (complement) */
+            tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
+            break;
+        case 1:
+            /* sign bit is set to one (negative) */
+            tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
+            break;
+        case 2:
+            /* sign bit is set to zero (positive) */
+            tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
+            break;
+        }
+        write_vec_element_i64(tmp, v1, 0, ES_64);
+        tcg_temp_free_i64(tmp);
+    } else {
+        switch (m5) {
+        case 0:
+            /* sign bit is inverted (complement) */
+            gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
+            break;
+        case 1:
+            /* sign bit is set to one (negative) */
+            gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
+            break;
+        case 2:
+            /* sign bit is set to zero (positive) */
+            gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
+            break;
+        }
+    }
+    return DISAS_NEXT;
+}

From 5938f20cb807b584799feef411a1906a9d8f7a1b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:22:35 +0200
Subject: [PATCH 26/33] s390x/tcg: Implement VECTOR FP SQUARE ROOT

Simulate XxC=0 and ERM=0 (current mode), so we can use the existing
helper function.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 19 +++++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 17 +++++++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index bcaabb91a5..23b37af1e4 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -288,6 +288,8 @@ DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env
 DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index fa2e801747..354252d57c 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1242,6 +1242,8 @@
     F(0xe78e, VFMS,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
 /* VECTOR FP PERFORM SIGN OPERATION */
     F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
+/* VECTOR FP SQUARE ROOT */
+    F(0xe7ce, VFSQ,    VRR_a, V,   0, 0, 0, 0, vfsq, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 9b8606ba25..75f3596c4b 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2779,3 +2779,22 @@ static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
     }
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s->fields, m3);
+    const uint8_t m4 = get_field(s->fields, m4);
+    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64;
+
+    if (fpf != FPF_LONG || extract32(m4, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m4, 3, 1)) {
+        fn = gen_helper_gvec_vfsq64s;
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   0, fn);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 7993a6fafa..18773eb559 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -552,3 +552,20 @@ void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
 {
     vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
 }
+
+static uint64_t vfsq64(uint64_t a, float_status *s)
+{
+    return float64_sqrt(a, s);
+}
+
+void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC());
+}
+
+void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC());
+}

From 658a395f6c41d72525f575de56fc9d4902161f2b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:25:34 +0200
Subject: [PATCH 27/33] s390x/tcg: Implement VECTOR FP SUBTRACT

Similar to VECTOR FP ADD.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c |  3 +++
 target/s390x/vec_fpu_helper.c   | 17 +++++++++++++++++
 4 files changed, 24 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 23b37af1e4..c788fc1b7f 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -290,6 +290,8 @@ DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env
 DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 354252d57c..4426f40250 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1244,6 +1244,8 @@
     F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
 /* VECTOR FP SQUARE ROOT */
     F(0xe7ce, VFSQ,    VRR_a, V,   0, 0, 0, 0, vfsq, 0, IF_VEC)
+/* VECTOR FP SUBTRACT */
+    F(0xe7e2, VFS,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 75f3596c4b..c0a19aa9f3 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2566,6 +2566,9 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
     case 0xe7:
         fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
         break;
+    case 0xe2:
+        fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64;
+        break;
     default:
         g_assert_not_reached();
     }
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 18773eb559..ccdc975628 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -569,3 +569,20 @@ void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env,
 {
     vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC());
 }
+
+static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s)
+{
+    return float64_sub(a, b, s);
+}
+
+void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3,
+                        CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, false, vfs64, GETPC());
+}
+
+void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3,
+                         CPUS390XState *env, uint32_t desc)
+{
+    vop64_3(v1, v2, v3, env, true, vfs64, GETPC());
+}

From 83b955f9a8d149c31eea015020e7cbb25918839d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:27:21 +0200
Subject: [PATCH 28/33] s390x/tcg: Implement VECTOR FP TEST DATA CLASS
 IMMEDIATE

We can reuse float64_dcmask().

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  2 ++
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 21 +++++++++++++++++++
 target/s390x/vec_fpu_helper.c   | 37 +++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index c788fc1b7f..e9aff83b05 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -292,6 +292,8 @@ DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 4426f40250..f421184fcd 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1246,6 +1246,8 @@
     F(0xe7ce, VFSQ,    VRR_a, V,   0, 0, 0, 0, vfsq, 0, IF_VEC)
 /* VECTOR FP SUBTRACT */
     F(0xe7e2, VFS,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP TEST DATA CLASS IMMEDIATE */
+    F(0xe74a, VFTCI,   VRI_e, V,   0, 0, 0, 0, vftci, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index c0a19aa9f3..69c675e411 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -2801,3 +2801,24 @@ static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
                    0, fn);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
+{
+    const uint16_t i3 = get_field(s->fields, i3);
+    const uint8_t fpf = get_field(s->fields, m4);
+    const uint8_t m5 = get_field(s->fields, m5);
+    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64;
+
+    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 3, 1)) {
+        fn = gen_helper_gvec_vftci64s;
+    }
+    gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env,
+                   i3, fn);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index ccdc975628..a48bd704bc 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -586,3 +586,40 @@ void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3,
 {
     vop64_3(v1, v2, v3, env, true, vfs64, GETPC());
 }
+
+static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                   bool s, uint16_t i3)
+{
+    int i, match = 0;
+
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_element64(v2, i);
+
+        if (float64_dcmask(env, a) & i3) {
+            match++;
+            s390_vec_write_element64(v1, i, -1ull);
+        } else {
+            s390_vec_write_element64(v1, i, 0);
+        }
+        if (s) {
+            break;
+        }
+    }
+
+    if (match) {
+        return s || match == 2 ? 0 : 1;
+    }
+    return 3;
+}
+
+void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    env->cc_op = vftci64(v1, v2, env, false, simd_data(desc));
+}
+
+void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    env->cc_op = vftci64(v1, v2, env, true, simd_data(desc));
+}

From fd4818518bd8b05023a99841f8d6de7b91340099 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 27 Feb 2019 09:07:34 +0100
Subject: [PATCH 29/33] s390x/tcg: Allow linux-user to use vector instructions

Once we unlock S390_FEAT_VECTOR for TCG, we want linux-user to be
able to make use of it.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/cpu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index b1df63d82c..6af1a1530f 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -145,6 +145,9 @@ static void s390_cpu_full_reset(CPUState *s)
 #if defined(CONFIG_USER_ONLY)
     /* user mode should always be allowed to use the full FPU */
     env->cregs[0] |= CR0_AFP;
+    if (s390_has_feat(S390_FEAT_VECTOR)) {
+        env->cregs[0] |= CR0_VECTOR;
+    }
 #endif
 
     /* architectured initial value for Breaking-Event-Address register */

From c7f22b31ce534be758dc01e2ed4e5ce984d5bd3d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2019 22:29:46 +0200
Subject: [PATCH 30/33] s390x/tcg: We support the Vector Facility

Let's add it to the max model, so we can enable it.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/gen-features.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index c346b76bdf..a818c80332 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -702,6 +702,7 @@ static uint16_t qemu_LATEST[] = {
 static uint16_t qemu_MAX[] = {
     /* z13+ features */
     S390_FEAT_STFLE_53,
+    S390_FEAT_VECTOR,
     /* generates a dependency warning, leave it out for now */
     S390_FEAT_MSA_EXT_5,
 };

From 08ef92d556c584c7faf594ff3af46df456276e1b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 27 Feb 2019 09:12:09 +0100
Subject: [PATCH 31/33] s390x: Bump the "qemu" CPU model up to a stripped-down
 z13

We don't care about the other two missing base features:
- S390_FEAT_DFP_PACKED_CONVERSION
- S390_FEAT_GROUP_GEN13_PTFF

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/s390x/s390-virtio-ccw.c  |  2 ++
 target/s390x/cpu_models.c   |  4 ++--
 target/s390x/gen-features.c | 11 +++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index bbc6e8fa0b..4d643686cb 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -669,7 +669,9 @@ DEFINE_CCW_MACHINE(4_1, "4.1", true);
 
 static void ccw_machine_4_0_instance_options(MachineState *machine)
 {
+    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_0 };
     ccw_machine_4_1_instance_options(machine);
+    s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_4_0_class_options(MachineClass *mc)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 21ea819483..b5d16e4c89 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -86,8 +86,8 @@ static S390CPUDef s390_cpu_defs[] = {
     CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM 8562 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x2827
-#define QEMU_MAX_CPU_GEN 12
+#define QEMU_MAX_CPU_TYPE 0x2964
+#define QEMU_MAX_CPU_GEN 13
 #define QEMU_MAX_CPU_EC_GA 2
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a818c80332..dc320a06c2 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -689,7 +689,7 @@ static uint16_t qemu_V3_1[] = {
     S390_FEAT_MSA_EXT_4,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V4_0[] = {
     /*
      * Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not
      * implemented yet).
@@ -698,11 +698,13 @@ static uint16_t qemu_LATEST[] = {
     S390_FEAT_ZPCI,
 };
 
-/* add all new definitions before this point */
-static uint16_t qemu_MAX[] = {
-    /* z13+ features */
+static uint16_t qemu_LATEST[] = {
     S390_FEAT_STFLE_53,
     S390_FEAT_VECTOR,
+};
+
+/* add all new definitions before this point */
+static uint16_t qemu_MAX[] = {
     /* generates a dependency warning, leave it out for now */
     S390_FEAT_MSA_EXT_5,
 };
@@ -821,6 +823,7 @@ static FeatGroupDefSpec FeatGroupDef[] = {
 static FeatGroupDefSpec QemuFeatDef[] = {
     QEMU_FEAT_INITIALIZER(V2_11),
     QEMU_FEAT_INITIALIZER(V3_1),
+    QEMU_FEAT_INITIALIZER(V4_0),
     QEMU_FEAT_INITIALIZER(LATEST),
     QEMU_FEAT_INITIALIZER(MAX),
 };

From 37c70c43dc29b764969cd8b704fbf946fcc2ef5a Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Mon, 3 Jun 2019 11:57:35 -0500
Subject: [PATCH 32/33] s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT

This replaces the target-specific implementations for VSEL.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/translate_vx.inc.c | 38 ++++++---------------------------
 1 file changed, 6 insertions(+), 32 deletions(-)

diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 69c675e411..7b1d31cba5 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -245,6 +245,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
 #define gen_gvec_fn_3(fn, es, v1, v2, v3) \
     tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
                       vec_full_reg_offset(v3), 16, 16)
+#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
 
 /*
  * Helper to carry out a 128 bit vector computation using 2 i64 values per
@@ -915,40 +918,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
-static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
-{
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    /* bit in c not set -> copy bit from b */
-    tcg_gen_andc_i64(t, b, c);
-    /* bit in c set -> copy bit from a */
-    tcg_gen_and_i64(d, a, c);
-    /* merge the results */
-    tcg_gen_or_i64(d, d, t);
-    tcg_temp_free_i64(t);
-}
-
-static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b,
-                        TCGv_vec c)
-{
-    TCGv_vec t = tcg_temp_new_vec_matching(d);
-
-    tcg_gen_andc_vec(vece, t, b, c);
-    tcg_gen_and_vec(vece, d, a, c);
-    tcg_gen_or_vec(vece, d, d, t);
-    tcg_temp_free_vec(t);
-}
-
 static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
 {
-    static const GVecGen4 gvec_op = {
-        .fni8 = gen_sel_i64,
-        .fniv = gen_sel_vec,
-        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
-    };
-
-    gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
-               get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op);
+    gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1),
+                  get_field(s->fields, v4), get_field(s->fields, v2),
+                  get_field(s->fields, v3));
     return DISAS_NEXT;
 }
 

From 6d88baf18653ff8826db3dd840a6b372d3477280 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 4 Jun 2019 11:30:07 +0200
Subject: [PATCH 33/33] linux-user: elf: ELF_HWCAP for s390x

Let's add all HWCAPs that we can support under TCG right now, when the
respective CPU facilities are enabled.

Cc: Riku Voipio <riku.voipio@iki.fi>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Richard Henderson <richard.henderson@linaro.org>
Acked-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/elf.h        |  1 +
 linux-user/elfload.c | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/include/elf.h b/include/elf.h
index ea7708a4ea..3501e0c8d0 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -598,6 +598,7 @@ typedef struct {
 #define HWCAP_S390_ETF3EH       256
 #define HWCAP_S390_HIGH_GPRS    512
 #define HWCAP_S390_TE           1024
+#define HWCAP_S390_VXRS         2048
 
 /* M68K specific definitions. */
 /* We use the top 24 bits to encode information about the
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index a57b7049dd..5451d262ec 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1308,6 +1308,34 @@ static inline void init_thread(struct target_pt_regs *regs,
 #define ELF_DATA	ELFDATA2MSB
 #define ELF_ARCH	EM_S390
 
+#include "elf.h"
+
+#define ELF_HWCAP get_elf_hwcap()
+
+#define GET_FEATURE(_feat, _hwcap) \
+    do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0)
+
+static uint32_t get_elf_hwcap(void)
+{
+    /*
+     * Let's assume we always have esan3 and zarch.
+     * 31-bit processes can use 64-bit registers (high gprs).
+     */
+    uint32_t hwcap = HWCAP_S390_ESAN3 | HWCAP_S390_ZARCH | HWCAP_S390_HIGH_GPRS;
+
+    GET_FEATURE(S390_FEAT_STFLE, HWCAP_S390_STFLE);
+    GET_FEATURE(S390_FEAT_MSA, HWCAP_S390_MSA);
+    GET_FEATURE(S390_FEAT_LONG_DISPLACEMENT, HWCAP_S390_LDISP);
+    GET_FEATURE(S390_FEAT_EXTENDED_IMMEDIATE, HWCAP_S390_EIMM);
+    if (s390_has_feat(S390_FEAT_EXTENDED_TRANSLATION_3) &&
+        s390_has_feat(S390_FEAT_ETF3_ENH)) {
+        hwcap |= HWCAP_S390_ETF3EH;
+    }
+    GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
+
+    return hwcap;
+}
+
 static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
 {
     regs->psw.addr = infop->entry;