Update block caching feature to use member in thread.

Abstract tls handling routine in thread to decouple thread from knowing
the GC's TLS specific data layout.
This commit is contained in:
Steven Schveighoffer 2024-11-15 21:12:42 -05:00 committed by Nicholas Wilson
parent 5f64a6baaf
commit 2c420d94af
9 changed files with 107 additions and 96 deletions

View file

@ -71,6 +71,7 @@ COPY=\
$(IMPDIR)\core\internal\elf\io.d \
\
$(IMPDIR)\core\internal\gc\bits.d \
$(IMPDIR)\core\internal\gc\blkcache.d \
$(IMPDIR)\core\internal\gc\os.d \
$(IMPDIR)\core\internal\gc\pooltable.d \
$(IMPDIR)\core\internal\gc\proxy.d \

View file

@ -528,6 +528,7 @@ DOCS=\
$(DOCDIR)\etc_linux_memoryerror.html \
\
$(DOCDIR)\core_internal_gc_bits.html \
$(DOCDIR)\core_internal_gc_blkcache.html \
$(DOCDIR)\core_internal_gc_os.html \
$(DOCDIR)\core_internal_gc_pooltable.html \
$(DOCDIR)\core_internal_gc_proxy.html \

View file

@ -74,6 +74,7 @@ SRCS=\
src\core\internal\elf\io.d \
\
src\core\internal\gc\bits.d \
src\core\internal\gc\blkcache.d \
src\core\internal\gc\os.d \
src\core\internal\gc\pooltable.d \
src\core\internal\gc\proxy.d \

View file

@ -6,6 +6,7 @@ Note: this used to be in rt.lifetime, but was moved here to allow GCs to take ov
module core.internal.gc.blkcache;
import core.memory;
import core.attribute;
alias BlkInfo = GC.BlkInfo;
alias BlkAttr = GC.BlkAttr;
@ -13,12 +14,12 @@ alias BlkAttr = GC.BlkAttr;
/**
cache for the lookup of the block info
*/
private enum N_CACHE_BLOCKS=8;
private enum N_CACHE_BLOCKS = 8;
// note this is TLS, so no need to sync.
BlkInfo *__blkcache_storage;
static if (N_CACHE_BLOCKS==1)
static if (N_CACHE_BLOCKS == 1)
{
version=single_cache;
}
@ -43,47 +44,72 @@ else
{
import core.stdc.stdlib;
import core.stdc.string;
import core.thread.threadbase;
auto tBase = ThreadBase.getThis();
if (tBase is null)
// if we don't have a thread object, this is a detached thread, and
// this won't be properly maintained by the GC.
return null;
// allocate the block cache for the first time
immutable size = BlkInfo.sizeof * N_CACHE_BLOCKS;
__blkcache_storage = cast(BlkInfo *)malloc(size);
memset(__blkcache_storage, 0, size);
// use C alloc, because this may become a detached thread, and the GC
// would then clean up the cache without zeroing this pointer.
__blkcache_storage = cast(BlkInfo*) calloc(size, 1);
tBase.tlsGCData = __blkcache_storage;
}
return __blkcache_storage;
}
// called when thread is exiting.
static ~this()
// free the allocation on thread exit.
@standalone static ~this()
{
// free the blkcache
if (__blkcache_storage)
{
import core.stdc.stdlib;
import core.thread.threadbase;
auto tBase = ThreadBase.getThis();
if (tBase !is null)
tBase.tlsGCData = null;
free(__blkcache_storage);
__blkcache_storage = null;
}
}
// we expect this to be called with the lock in place
void processGCMarks(BlkInfo* cache, scope rt.tlsgc.IsMarkedDg isMarked) nothrow
/**
* Indicates whether an address has been marked by the GC.
*/
enum IsMarked : int
{
no, /// Address is not marked.
yes, /// Address is marked.
unknown, /// Address is not managed by the GC.
}
alias IsMarkedDg = IsMarked delegate(void* addr) nothrow; /// The isMarked callback function.
// we expect this to be called with the lock in place
void processGCMarks(void* data, scope IsMarkedDg isMarked) nothrow
{
if (!data)
return;
auto cache = cast(BlkInfo*) data;
// called after the mark routine to eliminate block cache data when it
// might be ready to sweep
debug(PRINTF) printf("processing GC Marks, %x\n", cache);
if (cache)
debug(PRINTF) foreach (i; 0 .. N_CACHE_BLOCKS)
{
debug(PRINTF) foreach (i; 0 .. N_CACHE_BLOCKS)
printf("cache entry %d has base ptr %x\tsize %d\tflags %x\n", i, cache[i].base, cache[i].size, cache[i].attr);
}
auto cache_end = cache + N_CACHE_BLOCKS;
for (;cache < cache_end; ++cache)
{
if (cache.base != null && isMarked(cache.base) == IsMarked.no)
{
printf("cache entry %d has base ptr %x\tsize %d\tflags %x\n", i, cache[i].base, cache[i].size, cache[i].attr);
}
auto cache_end = cache + N_CACHE_BLOCKS;
for (;cache < cache_end; ++cache)
{
if (cache.base != null && !isMarked(cache.base))
{
debug(PRINTF) printf("clearing cache entry at %x\n", cache.base);
cache.base = null; // clear that data.
}
debug(PRINTF) printf("clearing cache entry at %x\n", cache.base);
cache.base = null; // clear that data.
}
}
}
@ -100,18 +126,21 @@ unittest
Get the cached block info of an interior pointer. Returns null if the
interior pointer's block is not cached.
NOTE: The base ptr in this struct can be cleared asynchronously by the GC,
NOTE: The following note was not valid, but is retained for historical
purposes. The data cannot be cleared because the stack contains a
reference to the affected block (e.g. through `interior`). Therefore,
the element will not be collected, and the data will remain valid.
ORIGINAL: The base ptr in this struct can be cleared asynchronously by the GC,
so any use of the returned BlkInfo should copy it and then check the
base ptr of the copy before actually using it.
TODO: Change this function so the caller doesn't have to be aware of this
issue. Either return by value and expect the caller to always check
the base ptr as an indication of whether the struct is valid, or set
the BlkInfo as a side-effect and return a bool to indicate success.
*/
BlkInfo *__getBlkInfo(void *interior) nothrow
{
BlkInfo *ptr = __blkcache;
if (ptr is null)
// if for some reason we don't have a cache, return null.
return null;
version (single_cache)
{
if (ptr.base && ptr.base <= interior && (interior - ptr.base) < ptr.size)
@ -148,9 +177,15 @@ BlkInfo *__getBlkInfo(void *interior) nothrow
void __insertBlkInfoCache(BlkInfo bi, BlkInfo *curpos) nothrow
{
auto cache = __blkcache;
if (cache is null)
// no cache to use.
return;
version (single_cache)
{
*__blkcache = bi;
*cache = bi;
return;
}
else
{
@ -165,7 +200,7 @@ void __insertBlkInfoCache(BlkInfo bi, BlkInfo *curpos) nothrow
// cache block info. This means that the ordering of the cache
// doesn't mean anything. Certain patterns of allocation may
// render the cache near-useless.
__blkcache[__nextBlkIdx] = bi;
cache[__nextBlkIdx] = bi;
__nextBlkIdx = (__nextBlkIdx+1) & (N_CACHE_BLOCKS - 1);
}
}
@ -174,7 +209,6 @@ void __insertBlkInfoCache(BlkInfo bi, BlkInfo *curpos) nothrow
// strategy: if the block currently is in the cache, move the
// current block index to the a random element and evict that
// element.
auto cache = __blkcache;
if (!curpos)
{
__nextBlkIdx = (__nextRndNum = 1664525 * __nextRndNum + 1013904223) & (N_CACHE_BLOCKS - 1);
@ -193,7 +227,6 @@ void __insertBlkInfoCache(BlkInfo bi, BlkInfo *curpos) nothrow
// the head element. Otherwise, move the head element up by one,
// and insert it there.
//
auto cache = __blkcache;
if (!curpos)
{
__nextBlkIdx = (__nextBlkIdx+1) & (N_CACHE_BLOCKS - 1);

View file

@ -41,6 +41,7 @@ import core.gc.gcinterface;
import core.internal.container.treap;
import core.internal.spinlock;
import core.internal.gc.pooltable;
import core.internal.gc.blkcache;
import cstdlib = core.stdc.stdlib : calloc, free, malloc, realloc;
import core.stdc.string : memcpy, memset, memmove;
@ -2873,7 +2874,7 @@ struct Gcx
markProcPid = 0;
// process GC marks then sweep
thread_suspendAll();
thread_processGCMarks(&isMarked);
thread_processTLSGCData(&clearBlkCacheData);
thread_resumeAll();
break;
case ChildStatus.running:
@ -3108,7 +3109,7 @@ Lmark:
markAll!(markConservative!false)();
}
thread_processGCMarks(&isMarked);
thread_processTLSGCData(&clearBlkCacheData);
thread_resumeAll();
isFinal = false;
}
@ -3161,13 +3162,27 @@ Lmark:
return freedPages;
}
/**
* Clear the block cache data if it exists, given the data which is the
* block info cache.
*
* Warning! This should only be called while the world is stopped inside
* the fullcollect function after all live objects have been marked, but
* before sweeping.
*/
void *clearBlkCacheData(void* data) scope nothrow
{
processGCMarks(data, &isMarked);
return data;
}
/**
* Returns true if the addr lies within a marked block.
*
* Warning! This should only be called while the world is stopped inside
* the fullcollect function after all live objects have been marked, but before sweeping.
*/
int isMarked(void *addr) scope nothrow
IsMarked isMarked(void *addr) scope nothrow
{
// first, we find the Pool this block is in, then check to see if the
// mark bit is clear.

View file

@ -1171,7 +1171,7 @@ private extern (D) ThreadBase attachThread(ThreadBase _thisThread) @nogc nothrow
atomicStore!(MemoryOrder.raw)(thisThread.toThread.m_isRunning, true);
}
thisThread.m_isDaemon = true;
thisThread.tlsGCdataInit();
thisThread.tlsRTdataInit();
Thread.setThis( thisThread );
version (Darwin)
@ -1246,7 +1246,7 @@ version (Windows)
if ( addr == GetCurrentThreadId() )
{
thisThread.m_hndl = GetCurrentThreadHandle();
thisThread.tlsGCdataInit();
thisThread.tlsRTdataInit();
Thread.setThis( thisThread );
}
else
@ -1254,7 +1254,7 @@ version (Windows)
thisThread.m_hndl = OpenThreadHandle( addr );
impersonate_thread(addr,
{
thisThread.tlsGCdataInit();
thisThread.tlsRTdataInit();
Thread.setThis( thisThread );
});
}

View file

@ -29,9 +29,6 @@ private
alias ScanDg = void delegate(void* pstart, void* pend) nothrow;
alias rt_tlsgc_scan =
externDFunc!("rt.tlsgc.scan", void function(void*, scope ScanDg) nothrow);
alias rt_tlsgc_processGCMarks =
externDFunc!("rt.tlsgc.processGCMarks", void function(void*, scope IsMarkedDg) nothrow);
}
@ -128,9 +125,14 @@ class ThreadBase
return (no_context || not_registered);
}
package void tlsGCdataInit() nothrow @nogc
ref void* tlsGCData() nothrow @nogc
{
m_tlsgcdata = rt_tlsgc_init();
return m_tlsgcdata;
}
package void tlsRTdataInit() nothrow @nogc
{
m_tlsrtdata = rt_tlsgc_init();
}
package void initDataStorage() nothrow
@ -139,18 +141,18 @@ class ThreadBase
m_main.bstack = getStackBottom();
m_main.tstack = m_main.bstack;
tlsGCdataInit();
tlsRTdataInit();
}
package void destroyDataStorage() nothrow @nogc
{
rt_tlsgc_destroy(m_tlsgcdata);
m_tlsgcdata = null;
rt_tlsgc_destroy(m_tlsrtdata);
m_tlsrtdata = null;
}
package void destroyDataStorageIfAvail() nothrow @nogc
{
if (m_tlsgcdata)
if (m_tlsrtdata)
destroyDataStorage();
}
@ -473,6 +475,7 @@ package(core.thread):
StackContext* m_curr;
bool m_lock;
private void* m_tlsgcdata;
private void* m_tlsrtdata;
///////////////////////////////////////////////////////////////////////////
// Thread Context and GC Scanning Support
@ -1108,8 +1111,8 @@ private void scanAllTypeImpl(scope ScanAllThreadsTypeFn scan, void* curStackTop)
scanWindowsOnly(scan, t);
}
if (t.m_tlsgcdata !is null)
rt_tlsgc_scan(t.m_tlsgcdata, (p1, p2) => scan(ScanType.tls, p1, p2));
if (t.m_tlsrtdata !is null)
rt_tlsgc_scan(t.m_tlsrtdata, (p1, p2) => scan(ScanType.tls, p1, p2));
}
}
@ -1159,43 +1162,15 @@ package void onThreadError(string msg) nothrow @nogc
}
/**
* Indicates whether an address has been marked by the GC.
*/
enum IsMarked : int
{
no, /// Address is not marked.
yes, /// Address is marked.
unknown, /// Address is not managed by the GC.
}
// GC-specific processing of TLSGC data.
alias ProcessTLSGCDataDg = void* delegate(void* data) nothrow;
alias IsMarkedDg = int delegate(void* addr) nothrow; /// The isMarked callback function.
/**
* This routine allows the runtime to process any special per-thread handling
* for the GC. This is needed for taking into account any memory that is
* referenced by non-scanned pointers but is about to be freed. That currently
* means the array append cache.
*
* Params:
* isMarked = The function used to check if $(D addr) is marked.
*
* In:
* This routine must be called just prior to resuming all threads.
*/
extern(C) void thread_processGCMarks(scope IsMarkedDg isMarked) nothrow
void thread_processTLSGCData(ProcessTLSGCDataDg dg) nothrow
{
for (ThreadBase t = ThreadBase.sm_tbeg; t; t = t.next)
{
/* Can be null if collection was triggered between adding a
* thread and calling rt_tlsgc_init.
*/
if (t.m_tlsgcdata !is null)
rt_tlsgc_processGCMarks(t.m_tlsgcdata, isMarked);
}
t.m_tlsgcdata = dg(t.m_tlsgcdata);
}
/**
* Returns the stack top of the currently active stack within the calling
* thread.

View file

@ -15,6 +15,7 @@ module rt.lifetime;
import core.attribute : weak;
import core.internal.array.utils : __arrayStart, __arrayClearPad;
import core.memory;
import core.internal.gc.blkcache;
debug(PRINTF) import core.stdc.stdio;
static import rt.tlsgc;

View file

@ -23,7 +23,6 @@ static import rt.lifetime, rt.sections;
struct Data
{
typeof(rt.sections.initTLSRanges()) tlsRanges;
rt.lifetime.BlkInfo** blockInfoCache;
}
/**
@ -39,8 +38,6 @@ void* init() nothrow @nogc
// do module specific initialization
data.tlsRanges = rt.sections.initTLSRanges();
data.blockInfoCache = &rt.lifetime.__blkcache_storage;
return data;
}
@ -67,16 +64,3 @@ void scan(void* data, scope ScanDg dg) nothrow
// do module specific marking
rt.sections.scanTLSRanges((cast(Data*)data).tlsRanges, dg);
}
alias int delegate(void* addr) nothrow IsMarkedDg;
/**
* GC sweep hook, called FOR each thread. Can be used to free
* additional thread local memory or associated data structures. Note
* that only memory allocated from the GC can have marks.
*/
void processGCMarks(void* data, scope IsMarkedDg dg) nothrow
{
// do module specific sweeping
rt.lifetime.processGCMarks(*(cast(Data*)data).blockInfoCache, dg);
}