diff --git a/src/common/gsvector_formatter.h b/src/common/gsvector_formatter.h index 9a09d6004..6c86e67fc 100644 --- a/src/common/gsvector_formatter.h +++ b/src/common/gsvector_formatter.h @@ -8,6 +8,26 @@ #include "fmt/format.h" +template<> +struct fmt::formatter : formatter +{ + auto format(const GSVector2i& rc, format_context& ctx) const + { + const TinyString str = TinyString::from_format("{},{}", rc.x, rc.y); + return fmt::formatter::format(str.view(), ctx); + } +}; + +template<> +struct fmt::formatter : formatter +{ + auto format(const GSVector2& rc, format_context& ctx) const + { + const TinyString str = TinyString::from_format("{},{}", rc.x, rc.y); + return fmt::formatter::format(str.view(), ctx); + } +}; + template<> struct fmt::formatter : formatter { @@ -19,3 +39,13 @@ struct fmt::formatter : formatter return fmt::formatter::format(str.view(), ctx); } }; + +template<> +struct fmt::formatter : formatter +{ + auto format(const GSVector4& rc, format_context& ctx) const + { + const TinyString str = TinyString::from_format("{},{},{},{}", rc.x, rc.y, rc.z, rc.w); + return fmt::formatter::format(str.view(), ctx); + } +}; diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index 8c6122210..bcde43d49 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -359,7 +359,7 @@ public: #endif - ALWAYS_INLINE GSVector2i i8to16() const + ALWAYS_INLINE GSVector2i s8to16() const { return GSVector2i(vreinterpret_s32_s16(vget_low_s8(vmovl_s8(vreinterpret_s8_s32(v2s))))); } @@ -369,6 +369,23 @@ public: return GSVector2i(vreinterpret_s32_u16(vget_low_u8(vmovl_u8(vreinterpret_u8_s32(v2s))))); } + ALWAYS_INLINE GSVector2i s8to32() const + { + return GSVector2i(vget_low_s32(vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_s32(v2s)))))); + } + + ALWAYS_INLINE GSVector2i u8to32() const + { + return GSVector2i(vreinterpret_s32_u32(vget_low_u32(vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_s32(v2s))))))); + } + + ALWAYS_INLINE GSVector2i s16to32() const { return GSVector2i(vget_low_s32(vmovl_s16(vreinterpret_s16_s32(v2s)))); } + + ALWAYS_INLINE GSVector2i u16to32() const + { + return GSVector2i(vreinterpret_s32_u32(vget_low_u32(vmovl_u16(vreinterpret_u16_s32(v2s))))); + } + template ALWAYS_INLINE GSVector2i srl() const { diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index d40b80f03..6be4c1175 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -248,7 +248,12 @@ public: GSVector2i upl32() const { return GSVector2i(S32[0], 0); } - GSVector2i i8to16() const { ALL_LANES_16(ret.S16[i] = S8[i]); } + GSVector2i s8to16() const { ALL_LANES_16(ret.S16[i] = S8[i]); } + GSVector2i s8to32() const { ALL_LANES_32(ret.S32[i] = S8[i]); } + GSVector2i u8to16() const { ALL_LANES_16(ret.U16[i] = U8[i]); } + GSVector2i u8to32() const { ALL_LANES_32(ret.U32[i] = U8[i]); } + GSVector2i u16to32() const { ALL_LANES_32(ret.U32[i] = U16[i]); } + GSVector2i s16to32() const { ALL_LANES_32(ret.S32[i] = S16[i]); } template GSVector2i srl() const @@ -1244,7 +1249,7 @@ public: GSVector4i s16to32() const { ALL_LANES_32(ret.S32[i] = S16[i]); } GSVector4i s16to64() const { ALL_LANES_64(ret.S64[i] = S16[i]); } GSVector4i s32to64() const { ALL_LANES_64(ret.S64[i] = S32[i]); } - GSVector4i u8to16() const { ALL_LANES_64(ret.U16[i] = U8[i]); } + GSVector4i u8to16() const { ALL_LANES_16(ret.U16[i] = U8[i]); } GSVector4i u8to32() const { ALL_LANES_32(ret.U32[i] = U8[i]); } GSVector4i u8to64() const { ALL_LANES_64(ret.U64[i] = U8[i]); } GSVector4i u16to32() const { ALL_LANES_32(ret.U32[i] = U16[i]); } @@ -1846,10 +1851,7 @@ public: GSVector4 hsub(const GSVector4& v) const { return GSVector4(x - y, z - w, v.x - v.y, v.z - v.w); } - ALWAYS_INLINE float dot(const GSVector4& v) const - { - return (x * v.x) + (y * v.y) + (z * v.z) + (w * v.w); - } + ALWAYS_INLINE float dot(const GSVector4& v) const { return (x * v.x) + (y * v.y) + (z * v.z) + (w * v.w); } GSVector4 sat(const GSVector4& min, const GSVector4& max) const { @@ -2316,10 +2318,7 @@ public: ALWAYS_INLINE GSVector2 zw() const { return GSVector2(z, w); } - ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h) - { - return GSVector4(l.x, l.y, h.x, h.y); - } + ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h) { return GSVector4(l.x, l.y, h.x, h.y); } #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const { return GSVector4(F32[xn], F32[yn], F32[zn], F32[wn]); } diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index 0b875c6fc..77c944552 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -369,10 +369,21 @@ public: ALWAYS_INLINE GSVector2i upl32() const { return GSVector2i(_mm_unpacklo_epi32(m, _mm_setzero_si128())); } ALWAYS_INLINE GSVector2i uph32() const { return GSVector2i(_mm_unpackhi_epi32(m, _mm_setzero_si128())); } - ALWAYS_INLINE GSVector2i i8to16() const { return GSVector2i(_mm_cvtepi8_epi16(m)); } - #ifdef CPU_ARCH_SSE41 ALWAYS_INLINE GSVector2i u8to16() const { return GSVector2i(_mm_cvtepu8_epi16(m)); } + ALWAYS_INLINE GSVector2i u8to32() const { return GSVector2i(_mm_cvtepu8_epi32(m)); } + ALWAYS_INLINE GSVector2i s16to32() const { return GSVector2i(_mm_cvtepi16_epi32(m)); } + ALWAYS_INLINE GSVector2i u16to32() const { return GSVector2i(_mm_cvtepu16_epi32(m)); } +#else + // These are a pain, adding only as needed... + ALWAYS_INLINE GSVector2i u8to16() const { return upl8(); } + ALWAYS_INLINE GSVector2i u8to32() const + { + return GSVector2i(_mm_unpacklo_epi16(_mm_unpacklo_epi8(m, _mm_setzero_si128()), _mm_setzero_si128())); + } + + ALWAYS_INLINE GSVector2i s16to32() const { return upl16().sll32<16>().sra32<16>(); } + ALWAYS_INLINE GSVector2i u16to32() const { return upl16(); } #endif template