You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
217 lines
6.6 KiB
217 lines
6.6 KiB
From 8ce920dddac9846254aaf6261bafd8b22976b04e Mon Sep 17 00:00:00 2001
|
|
From: Jeremy Newton <alexjnewt@hotmail.com>
|
|
Date: Sun, 18 Dec 2022 20:48:21 -0500
|
|
Subject: [PATCH] Revert "Update counters for gfx11"
|
|
|
|
This reverts commit 85f95b94960c6f7ff4ff0242a399deb4a204fb6a.
|
|
---
|
|
doc/OCKL.md | 4 ++--
|
|
ockl/inc/ockl.h | 3 ---
|
|
ockl/src/dm.cl | 15 +++++++++++----
|
|
ockl/src/mtime.cl | 35 ++---------------------------------
|
|
ockl/src/wait.cl | 18 +++++++++---------
|
|
5 files changed, 24 insertions(+), 51 deletions(-)
|
|
|
|
diff --git a/doc/OCKL.md b/doc/OCKL.md
|
|
index 07574f6..05c5c49 100644
|
|
--- a/doc/OCKL.md
|
|
+++ b/doc/OCKL.md
|
|
@@ -99,8 +99,8 @@ The following table lists the available functions along with a brief description
|
|
| `int __ockl_mul24_i32(int,int);` | Multiply assuming operands fit in 24 bits |
|
|
| `uint __ockl_mul24_u32(uint,uint);` | |
|
|
| - | |
|
|
-| `ulong __ockl_cyclectr_u64(void);` | Current value of free running 64-bit clock counter |
|
|
-| `ulong __ockl_steadyctr_u64(void);` | Current value of constant speed 64-bit clock counter |
|
|
+| `ulong __ockl_memtime_u64(void);` | Current value of free running 64-bit clock counter |
|
|
+| `ulong __ockl_memrealtime_u64(void);` | Current value of constant speed 64-bit clock counter |
|
|
| - | |
|
|
| `uint __ockl_activelane_u32(void);` | Index of currently lane counting only active lanes in wavefront |
|
|
| - | |
|
|
diff --git a/ockl/inc/ockl.h b/ockl/inc/ockl.h
|
|
index d0b98d4..6300279 100644
|
|
--- a/ockl/inc/ockl.h
|
|
+++ b/ockl/inc/ockl.h
|
|
@@ -143,9 +143,6 @@ DECL_OCKL_NULLARY_U32(activelane)
|
|
|
|
DECL_OCKL_NULLARY_U64(memtime)
|
|
DECL_OCKL_NULLARY_U64(memrealtime)
|
|
-DECL_OCKL_NULLARY_U64(cyclectr)
|
|
-DECL_OCKL_NULLARY_U64(steadyctr)
|
|
-
|
|
|
|
extern half OCKL_MANGLE_T(wfred_add,f16)(half x);
|
|
extern float OCKL_MANGLE_T(wfred_add,f32)(float x);
|
|
diff --git a/ockl/src/dm.cl b/ockl/src/dm.cl
|
|
index 245b4a1..26373dd 100644
|
|
--- a/ockl/src/dm.cl
|
|
+++ b/ockl/src/dm.cl
|
|
@@ -201,6 +201,13 @@ get_heap_ptr(void) {
|
|
}
|
|
}
|
|
|
|
+// realtime
|
|
+__attribute__((target("s-memrealtime"))) static ulong
|
|
+realtime(void)
|
|
+{
|
|
+ return __builtin_amdgcn_s_memrealtime();
|
|
+}
|
|
+
|
|
// The actual number of blocks in a slab with blocks of kind k
|
|
static uint
|
|
num_blocks(kind_t k)
|
|
@@ -466,7 +473,7 @@ new_slab_wait(__global heap_t *hp, kind_t k)
|
|
uint aid = __ockl_activelane_u32();
|
|
if (aid == 0) {
|
|
ulong expected = AL(&hp->salloc_time[k].value, memory_order_relaxed);
|
|
- ulong now = __ockl_steadyctr_u64();
|
|
+ ulong now = realtime();
|
|
ulong dt = now - expected;
|
|
if (dt < SLAB_TICKS)
|
|
__ockl_rtcwait_u32(SLAB_TICKS - (uint)dt);
|
|
@@ -480,7 +487,7 @@ grow_recordable_wait(__global heap_t *hp, kind_t k)
|
|
uint aid = __ockl_activelane_u32();
|
|
if (aid == 0) {
|
|
ulong expected = AL(&hp->grow_time[k].value, memory_order_relaxed);
|
|
- ulong now = __ockl_steadyctr_u64();
|
|
+ ulong now = realtime();
|
|
ulong dt = now - expected;
|
|
if (dt < GROW_TICKS)
|
|
__ockl_rtcwait_u32(GROW_TICKS - (uint)dt);
|
|
@@ -540,7 +547,7 @@ try_grow_num_recordable_slabs(__global heap_t *hp, kind_t k)
|
|
uint ret = GROW_BUSY;
|
|
if (aid == 0) {
|
|
ulong expected = AL(&hp->grow_time[k].value, memory_order_relaxed);
|
|
- ulong now = __ockl_steadyctr_u64();
|
|
+ ulong now = realtime();
|
|
if (now - expected >= GROW_TICKS &&
|
|
ACE(&hp->grow_time[k].value, &expected, now, memory_order_relaxed))
|
|
ret = GROW_FAILURE;
|
|
@@ -687,7 +694,7 @@ try_allocate_new_slab(__global heap_t *hp, kind_t k)
|
|
|
|
if (aid == 0) {
|
|
ulong expected = AL(&hp->salloc_time[k].value, memory_order_relaxed);
|
|
- ulong now = __ockl_steadyctr_u64();
|
|
+ ulong now = realtime();
|
|
if (now - expected >= SLAB_TICKS &&
|
|
ACE(&hp->salloc_time[k].value, &expected, now, memory_order_relaxed))
|
|
ret = (__global sdata_t *)0;
|
|
diff --git a/ockl/src/mtime.cl b/ockl/src/mtime.cl
|
|
index 43f4161..543aaa3 100644
|
|
--- a/ockl/src/mtime.cl
|
|
+++ b/ockl/src/mtime.cl
|
|
@@ -5,48 +5,17 @@
|
|
* License. See LICENSE.TXT for details.
|
|
*===------------------------------------------------------------------------*/
|
|
|
|
-#include "oclc.h"
|
|
#include "ockl.h"
|
|
|
|
-__attribute__((target("s-memrealtime"))) static ulong
|
|
-mem_realtime(void)
|
|
-{
|
|
- return __builtin_amdgcn_s_memrealtime();
|
|
-}
|
|
-
|
|
-__attribute__((target("gfx11-insts"))) static ulong
|
|
-msg_realtime(void)
|
|
-{
|
|
- return __builtin_amdgcn_s_sendmsg_rtnl(0x83);
|
|
-}
|
|
-
|
|
-// Deprecated
|
|
__attribute__((target("s-memtime-inst"))) ulong
|
|
OCKL_MANGLE_U64(memtime)(void)
|
|
{
|
|
return __builtin_amdgcn_s_memtime();
|
|
}
|
|
|
|
-// Deprecated
|
|
-ulong
|
|
+__attribute__((target("s-memrealtime"))) ulong
|
|
OCKL_MANGLE_U64(memrealtime)(void)
|
|
{
|
|
- return mem_realtime();
|
|
-}
|
|
-
|
|
-ulong
|
|
-OCKL_MANGLE_U64(cyclectr)(void)
|
|
-{
|
|
- return __builtin_readcyclecounter();
|
|
-}
|
|
-
|
|
-ulong
|
|
-OCKL_MANGLE_U64(steadyctr)(void)
|
|
-{
|
|
- if (__oclc_ISA_version >= 11000) {
|
|
- return msg_realtime();
|
|
- } else {
|
|
- return mem_realtime();
|
|
- }
|
|
+ return __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
|
|
diff --git a/ockl/src/wait.cl b/ockl/src/wait.cl
|
|
index 49b038e..b249599 100644
|
|
--- a/ockl/src/wait.cl
|
|
+++ b/ockl/src/wait.cl
|
|
@@ -10,47 +10,47 @@
|
|
#include "ockl.h"
|
|
#include "oclc.h"
|
|
|
|
-void
|
|
+__attribute__((target("s-memrealtime"))) void
|
|
OCKL_MANGLE_T(rtcwait,u32)(uint ticks)
|
|
{
|
|
- ulong now = __ockl_steadyctr_u64();
|
|
+ ulong now = __builtin_amdgcn_s_memrealtime();
|
|
ulong end = now + __builtin_amdgcn_readfirstlane(ticks);
|
|
|
|
if (__oclc_ISA_version >= 9000) {
|
|
while (end > now + 1625) {
|
|
__builtin_amdgcn_s_sleep(127);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
|
|
while (end > now + 806) {
|
|
__builtin_amdgcn_s_sleep(63);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
|
|
while (end > now + 396) {
|
|
__builtin_amdgcn_s_sleep(31);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
}
|
|
|
|
while (end > now + 192) {
|
|
__builtin_amdgcn_s_sleep(15);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
|
|
while (end > now + 89) {
|
|
__builtin_amdgcn_s_sleep(7);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
|
|
while (end > now + 38) {
|
|
__builtin_amdgcn_s_sleep(3);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
|
|
while (end > now) {
|
|
__builtin_amdgcn_s_sleep(1);
|
|
- now = __ockl_steadyctr_u64();
|
|
+ now = __builtin_amdgcn_s_memrealtime();
|
|
}
|
|
}
|
|
|
|
--
|
|
2.34.1
|
|
|