You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
6.2 KiB
127 lines
6.2 KiB
From 2f8587a8b30cb2e4b45df36c6c8461a440a77336 Mon Sep 17 00:00:00 2001
|
|
From: Luca Barbato <lu_zero@gentoo.org>
|
|
Date: Sat, 23 Jun 2018 02:47:16 -0500
|
|
Subject: [PATCH] Update the VSX support
|
|
|
|
The CFL function signature takes a source and destination pointer.
|
|
Also make the typedef compatible with clang.
|
|
|
|
Change-Id: Ia633d10141ac0fd10af0a40b9878d18293463fb8
|
|
---
|
|
av1/common/ppc/cfl_ppc.c | 85 ++++++++++++++++++++--------------------
|
|
1 file changed, 42 insertions(+), 43 deletions(-)
|
|
|
|
diff --git a/av1/common/ppc/cfl_ppc.c b/av1/common/ppc/cfl_ppc.c
|
|
index 58933a7b3..026a07809 100644
|
|
--- a/av1/common/ppc/cfl_ppc.c
|
|
+++ b/av1/common/ppc/cfl_ppc.c
|
|
@@ -24,19 +24,21 @@
|
|
#define CFL_LINE_2 128
|
|
#define CFL_LINE_3 192
|
|
|
|
-typedef vector int8_t int8x16_t;
|
|
-typedef vector uint8_t uint8x16_t;
|
|
-typedef vector int16_t int16x8_t;
|
|
-typedef vector uint16_t uint16x8_t;
|
|
-typedef vector int32_t int32x4_t;
|
|
-typedef vector uint32_t uint32x4_t;
|
|
-typedef vector uint64_t uint64x2_t;
|
|
+typedef vector signed char int8x16_t; // NOLINT(runtime/int)
|
|
+typedef vector unsigned char uint8x16_t; // NOLINT(runtime/int)
|
|
+typedef vector signed short int16x8_t; // NOLINT(runtime/int)
|
|
+typedef vector unsigned short uint16x8_t; // NOLINT(runtime/int)
|
|
+typedef vector signed int int32x4_t; // NOLINT(runtime/int)
|
|
+typedef vector unsigned int uint32x4_t; // NOLINT(runtime/int)
|
|
+typedef vector unsigned long long uint64x2_t; // NOLINT(runtime/int)
|
|
|
|
-static INLINE void subtract_average_vsx(int16_t *pred_buf, int width,
|
|
- int height, int round_offset,
|
|
+static INLINE void subtract_average_vsx(const uint16_t *src_ptr, int16_t *dst,
|
|
+ int width, int height, int round_offset,
|
|
int num_pel_log2) {
|
|
- const int16_t *end = pred_buf + height * CFL_BUF_LINE;
|
|
- const int16_t *sum_buf = pred_buf;
|
|
+ // int16_t *dst = dst_ptr;
|
|
+ const int16_t *dst_end = dst + height * CFL_BUF_LINE;
|
|
+ const int16_t *sum_buf = (const int16_t *)src_ptr;
|
|
+ const int16_t *end = sum_buf + height * CFL_BUF_LINE;
|
|
const uint32x4_t div_shift = vec_splats((uint32_t)num_pel_log2);
|
|
const uint8x16_t mask_64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
|
|
@@ -71,43 +73,40 @@ static INLINE void subtract_average_vsx(int16_t *pred_buf, int width,
|
|
const int32x4_t avg = vec_sr(sum_32x4, div_shift);
|
|
const int16x8_t vec_avg = vec_pack(avg, avg);
|
|
do {
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, pred_buf), vec_avg), OFF_0, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, pred_buf), vec_avg),
|
|
- OFF_0 + CFL_BUF_LINE_BYTES, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, pred_buf), vec_avg),
|
|
- OFF_0 + CFL_LINE_2, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, pred_buf), vec_avg),
|
|
- OFF_0 + CFL_LINE_3, pred_buf);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0, dst), vec_avg), OFF_0, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_1, dst), vec_avg),
|
|
+ OFF_0 + CFL_BUF_LINE_BYTES, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_2, dst), vec_avg),
|
|
+ OFF_0 + CFL_LINE_2, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_0 + CFL_LINE_3, dst), vec_avg),
|
|
+ OFF_0 + CFL_LINE_3, dst);
|
|
if (width >= 16) {
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, pred_buf), vec_avg), OFF_1,
|
|
- pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, pred_buf), vec_avg),
|
|
- OFF_1 + CFL_LINE_1, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, pred_buf), vec_avg),
|
|
- OFF_1 + CFL_LINE_2, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, pred_buf), vec_avg),
|
|
- OFF_1 + CFL_LINE_3, pred_buf);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1, dst), vec_avg), OFF_1, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_1, dst), vec_avg),
|
|
+ OFF_1 + CFL_LINE_1, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_2, dst), vec_avg),
|
|
+ OFF_1 + CFL_LINE_2, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_1 + CFL_LINE_3, dst), vec_avg),
|
|
+ OFF_1 + CFL_LINE_3, dst);
|
|
}
|
|
if (width == 32) {
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, pred_buf), vec_avg), OFF_2,
|
|
- pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, pred_buf), vec_avg),
|
|
- OFF_2 + CFL_LINE_1, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, pred_buf), vec_avg),
|
|
- OFF_2 + CFL_LINE_2, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, pred_buf), vec_avg),
|
|
- OFF_2 + CFL_LINE_3, pred_buf);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2, dst), vec_avg), OFF_2, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_1, dst), vec_avg),
|
|
+ OFF_2 + CFL_LINE_1, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_2, dst), vec_avg),
|
|
+ OFF_2 + CFL_LINE_2, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_2 + CFL_LINE_3, dst), vec_avg),
|
|
+ OFF_2 + CFL_LINE_3, dst);
|
|
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, pred_buf), vec_avg), OFF_3,
|
|
- pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, pred_buf), vec_avg),
|
|
- OFF_3 + CFL_LINE_1, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, pred_buf), vec_avg),
|
|
- OFF_3 + CFL_LINE_2, pred_buf);
|
|
- vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, pred_buf), vec_avg),
|
|
- OFF_3 + CFL_LINE_3, pred_buf);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3, dst), vec_avg), OFF_3, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_1, dst), vec_avg),
|
|
+ OFF_3 + CFL_LINE_1, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_2, dst), vec_avg),
|
|
+ OFF_3 + CFL_LINE_2, dst);
|
|
+ vec_vsx_st(vec_sub(vec_vsx_ld(OFF_3 + CFL_LINE_3, dst), vec_avg),
|
|
+ OFF_3 + CFL_LINE_3, dst);
|
|
}
|
|
- } while ((pred_buf += CFL_BUF_LINE * 4) < end);
|
|
+ } while ((dst += CFL_BUF_LINE * 4) < dst_end);
|
|
}
|
|
|
|
// Declare wrappers for VSX sizes
|
|
--
|
|
2.22.0
|
|
|