mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
22507dea0e
coarse.comp knows the maximum stack depth, and can pre-allocate scratch space for kernel4.comp. Kernel4 no longer contains allocations nor control barriers. The invocation local blend stack is gone as well; it didn't seem to make any difference in performance to always use global memory for pushing and popping. Signed-off-by: Elias Naur <mail@eliasnaur.com>
143 lines
3.8 KiB
C
143 lines
3.8 KiB
C
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
layout(set = 0, binding = 0) buffer Memory {
|
|
// offset into memory of the next allocation, initialized by the user.
|
|
uint mem_offset;
|
|
// mem_error tracks the status of memory accesses, initialized to NO_ERROR
|
|
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
|
|
// If MEM_DEBUG is defined the following errors are reported:
|
|
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
|
|
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
|
|
uint mem_error;
|
|
uint[] memory;
|
|
};
|
|
|
|
// Uncomment this line to add the size field to Alloc and enable memory checks.
|
|
// Note that the Config struct in setup.h grows size fields as well.
|
|
//#define MEM_DEBUG
|
|
|
|
#define NO_ERROR 0
|
|
#define ERR_MALLOC_FAILED 1
|
|
#define ERR_OUT_OF_BOUNDS 2
|
|
#define ERR_UNALIGNED_ACCESS 3
|
|
|
|
#ifdef MEM_DEBUG
|
|
#define Alloc_size 16
|
|
#else
|
|
#define Alloc_size 8
|
|
#endif
|
|
|
|
// Alloc represents a memory allocation.
|
|
struct Alloc {
|
|
// offset in bytes into memory.
|
|
uint offset;
|
|
#ifdef MEM_DEBUG
|
|
// size in bytes of the allocation.
|
|
uint size;
|
|
#endif
|
|
};
|
|
|
|
struct MallocResult {
|
|
Alloc alloc;
|
|
// failed is true if the allocation overflowed memory.
|
|
bool failed;
|
|
};
|
|
|
|
// new_alloc synthesizes an Alloc from an offset and size.
|
|
Alloc new_alloc(uint offset, uint size) {
|
|
Alloc a;
|
|
a.offset = offset;
|
|
#ifdef MEM_DEBUG
|
|
a.size = size;
|
|
#endif
|
|
return a;
|
|
}
|
|
|
|
// malloc allocates size bytes of memory.
|
|
MallocResult malloc(uint size) {
|
|
MallocResult r;
|
|
r.failed = false;
|
|
uint offset = atomicAdd(mem_offset, size);
|
|
r.alloc = new_alloc(offset, size);
|
|
if (offset + size > memory.length() * 4) {
|
|
r.failed = true;
|
|
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
|
return r;
|
|
}
|
|
#ifdef MEM_DEBUG
|
|
if ((size & 3) != 0) {
|
|
r.failed = true;
|
|
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
|
return r;
|
|
}
|
|
#endif
|
|
return r;
|
|
}
|
|
|
|
// touch_mem checks whether access to the memory word at offset is valid.
|
|
// If MEM_DEBUG is defined, touch_mem returns false if offset is out of bounds.
|
|
// Offset is in words.
|
|
bool touch_mem(Alloc alloc, uint offset) {
|
|
#ifdef MEM_DEBUG
|
|
if (offset < alloc.offset/4 || offset >= (alloc.offset + alloc.size)/4) {
|
|
atomicMax(mem_error, ERR_OUT_OF_BOUNDS);
|
|
return false;
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
// write_mem writes val to memory at offset.
|
|
// Offset is in words.
|
|
void write_mem(Alloc alloc, uint offset, uint val) {
|
|
if (!touch_mem(alloc, offset)) {
|
|
return;
|
|
}
|
|
memory[offset] = val;
|
|
}
|
|
|
|
// read_mem reads the value from memory at offset.
|
|
// Offset is in words.
|
|
uint read_mem(Alloc alloc, uint offset) {
|
|
if (!touch_mem(alloc, offset)) {
|
|
return 0;
|
|
}
|
|
uint v = memory[offset];
|
|
return v;
|
|
}
|
|
|
|
// slice_mem returns a sub-allocation inside another. Offset and size are in
|
|
// bytes, relative to a.offset.
|
|
Alloc slice_mem(Alloc a, uint offset, uint size) {
|
|
#ifdef MEM_DEBUG
|
|
if ((offset & 3) != 0 || (size & 3) != 0) {
|
|
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
|
return Alloc(0, 0);
|
|
}
|
|
if (offset + size > a.size) {
|
|
// slice_mem is sometimes used for slices outside bounds,
|
|
// but never written.
|
|
return Alloc(0, 0);
|
|
}
|
|
#endif
|
|
return new_alloc(a.offset + offset, size);
|
|
}
|
|
|
|
// alloc_write writes alloc to memory at offset bytes.
|
|
void alloc_write(Alloc a, uint offset, Alloc alloc) {
|
|
write_mem(a, offset >> 2, alloc.offset);
|
|
#ifdef MEM_DEBUG
|
|
write_mem(a, (offset >> 2) + 1, alloc.size);
|
|
#endif
|
|
}
|
|
|
|
// alloc_read reads an Alloc from memory at offset bytes.
|
|
Alloc alloc_read(Alloc a, uint offset) {
|
|
Alloc alloc;
|
|
alloc.offset = read_mem(a, offset >> 2);
|
|
#ifdef MEM_DEBUG
|
|
alloc.size = read_mem(a, (offset >> 2) + 1);
|
|
#endif
|
|
return alloc;
|
|
}
|