1From 4d5202aad706fd338646d19aafbf255c3864333c Mon Sep 17 00:00:00 2001
2From: Matheus Ferst <matheus.ferst@eldorado.org.br>
3Date: Fri, 17 Dec 2021 17:57:13 +0100
4Subject: [PATCH 19/21] target/ppc: Implement Vector Mask Move insns
5MIME-Version: 1.0
6Content-Type: text/plain; charset=UTF-8
7Content-Transfer-Encoding: 8bit
8
9Implement the following PowerISA v3.1 instructions:
10mtvsrbm: Move to VSR Byte Mask
11mtvsrhm: Move to VSR Halfword Mask
12mtvsrwm: Move to VSR Word Mask
13mtvsrdm: Move to VSR Doubleword Mask
14mtvsrqm: Move to VSR Quadword Mask
15mtvsrbmi: Move to VSR Byte Mask Immediate
16
17Upstream-Status: Backport
18[https://git.qemu.org/?p=qemu.git;a=commit;h=9193eaa901c54dbff4a91ea0b12a99e0135dbca1]
19
20Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
21Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
22Message-Id: <20211203194229.746275-4-matheus.ferst@eldorado.org.br>
23Signed-off-by: Cédric Le Goater <clg@kaod.org>
24Signed-off-by: Xiangyu Chen <xiangyu.chen@windriver.com>
25---
26 target/ppc/insn32.decode            |  11 +++
27 target/ppc/translate/vmx-impl.c.inc | 115 ++++++++++++++++++++++++++++
28 2 files changed, 126 insertions(+)
29
30diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
31index b0568b1356..8bdc059a4c 100644
32--- a/target/ppc/insn32.decode
33+++ b/target/ppc/insn32.decode
34@@ -40,6 +40,10 @@
35 %ds_rtp         22:4   !function=times_2
36 @DS_rtp         ...... ....0 ra:5 .............. ..             &D rt=%ds_rtp si=%ds_si
37
38+&DX_b           vrt b
39+%dx_b           6:10 16:5 0:1
40+@DX_b           ...... vrt:5  ..... .......... ..... .          &DX_b b=%dx_b
41+
42 &DX             rt d
43 %dx_d           6:s10 16:5 0:1
44 @DX             ...... rt:5  ..... .......... ..... .   &DX d=%dx_d
45@@ -417,6 +421,13 @@ VSRDBI          000100 ..... ..... ..... 01 ... 010110  @VN
46
47 ## Vector Mask Manipulation Instructions
48
49+MTVSRBM         000100 ..... 10000 ..... 11001000010    @VX_tb
50+MTVSRHM         000100 ..... 10001 ..... 11001000010    @VX_tb
51+MTVSRWM         000100 ..... 10010 ..... 11001000010    @VX_tb
52+MTVSRDM         000100 ..... 10011 ..... 11001000010    @VX_tb
53+MTVSRQM         000100 ..... 10100 ..... 11001000010    @VX_tb
54+MTVSRBMI        000100 ..... ..... .......... 01010 .   @DX_b
55+
56 VEXPANDBM       000100 ..... 00000 ..... 11001000010    @VX_tb
57 VEXPANDHM       000100 ..... 00001 ..... 11001000010    @VX_tb
58 VEXPANDWM       000100 ..... 00010 ..... 11001000010    @VX_tb
59diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
60index 96c97bf6e7..d5e02fd7f2 100644
61--- a/target/ppc/translate/vmx-impl.c.inc
62+++ b/target/ppc/translate/vmx-impl.c.inc
63@@ -1607,6 +1607,121 @@ static bool trans_VEXTRACTQM(DisasContext *ctx, arg_VX_tb *a)
64     return true;
65 }
66
67+static bool do_mtvsrm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
68+{
69+    const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece;
70+    uint64_t c;
71+    int i, j;
72+    TCGv_i64 hi, lo, t0, t1;
73+
74+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
75+    REQUIRE_VECTOR(ctx);
76+
77+    hi = tcg_temp_new_i64();
78+    lo = tcg_temp_new_i64();
79+    t0 = tcg_temp_new_i64();
80+    t1 = tcg_temp_new_i64();
81+
82+    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->vrb]);
83+    tcg_gen_extract_i64(hi, t0, elem_count_half, elem_count_half);
84+    tcg_gen_extract_i64(lo, t0, 0, elem_count_half);
85+
86+    /*
87+     * Spread the bits into their respective elements.
88+     * E.g. for bytes:
89+     * 00000000000000000000000000000000000000000000000000000000abcdefgh
90+     *   << 32 - 4
91+     * 0000000000000000000000000000abcdefgh0000000000000000000000000000
92+     *   |
93+     * 0000000000000000000000000000abcdefgh00000000000000000000abcdefgh
94+     *   << 16 - 2
95+     * 00000000000000abcdefgh00000000000000000000abcdefgh00000000000000
96+     *   |
97+     * 00000000000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh
98+     *   << 8 - 1
99+     * 0000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh0000000
100+     *   |
101+     * 0000000abcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgh
102+     *   & dup(1)
103+     * 0000000a0000000b0000000c0000000d0000000e0000000f0000000g0000000h
104+     *   * 0xff
105+     * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
106+     */
107+    for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
108+        tcg_gen_shli_i64(t0, hi, j - i);
109+        tcg_gen_shli_i64(t1, lo, j - i);
110+        tcg_gen_or_i64(hi, hi, t0);
111+        tcg_gen_or_i64(lo, lo, t1);
112+    }
113+
114+    c = dup_const(vece, 1);
115+    tcg_gen_andi_i64(hi, hi, c);
116+    tcg_gen_andi_i64(lo, lo, c);
117+
118+    c = MAKE_64BIT_MASK(0, elem_width);
119+    tcg_gen_muli_i64(hi, hi, c);
120+    tcg_gen_muli_i64(lo, lo, c);
121+
122+    set_avr64(a->vrt, lo, false);
123+    set_avr64(a->vrt, hi, true);
124+
125+    tcg_temp_free_i64(hi);
126+    tcg_temp_free_i64(lo);
127+    tcg_temp_free_i64(t0);
128+    tcg_temp_free_i64(t1);
129+
130+    return true;
131+}
132+
133+TRANS(MTVSRBM, do_mtvsrm, MO_8)
134+TRANS(MTVSRHM, do_mtvsrm, MO_16)
135+TRANS(MTVSRWM, do_mtvsrm, MO_32)
136+TRANS(MTVSRDM, do_mtvsrm, MO_64)
137+
138+static bool trans_MTVSRQM(DisasContext *ctx, arg_VX_tb *a)
139+{
140+    TCGv_i64 tmp;
141+
142+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
143+    REQUIRE_VECTOR(ctx);
144+
145+    tmp = tcg_temp_new_i64();
146+
147+    tcg_gen_ext_tl_i64(tmp, cpu_gpr[a->vrb]);
148+    tcg_gen_sextract_i64(tmp, tmp, 0, 1);
149+    set_avr64(a->vrt, tmp, false);
150+    set_avr64(a->vrt, tmp, true);
151+
152+    tcg_temp_free_i64(tmp);
153+
154+    return true;
155+}
156+
157+static bool trans_MTVSRBMI(DisasContext *ctx, arg_DX_b *a)
158+{
159+    const uint64_t mask = dup_const(MO_8, 1);
160+    uint64_t hi, lo;
161+
162+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
163+    REQUIRE_VECTOR(ctx);
164+
165+    hi = extract16(a->b, 8, 8);
166+    lo = extract16(a->b, 0, 8);
167+
168+    for (int i = 4, j = 32; i > 0; i >>= 1, j >>= 1) {
169+        hi |= hi << (j - i);
170+        lo |= lo << (j - i);
171+    }
172+
173+    hi = (hi & mask) * 0xFF;
174+    lo = (lo & mask) * 0xFF;
175+
176+    set_avr64(a->vrt, tcg_constant_i64(hi), true);
177+    set_avr64(a->vrt, tcg_constant_i64(lo), false);
178+
179+    return true;
180+}
181+
182 #define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
183 static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
184     {                                                                   \
185--
1862.17.1
187
188