Commit 9fa4d9ea authored by David Reid's avatar David Reid

Move ma_slot_allocator into the main library.

parent 71185464
......@@ -5939,6 +5939,63 @@ Helper for converting gain in decibels to a linear factor.
MA_API float ma_gain_db_to_factor(float gain);
/*
Slot Allocator
--------------
The idea of the slot allocator is for it to be used in conjunction with a fixed sized buffer. You use the slot allocator to allocator an index that can be used
as the insertion point for an object.
Slots are reference counted to help mitigate the ABA problem in the lock-free queue we use for tracking jobs.
The slot index is stored in the low 32 bits. The reference counter is stored in the high 32 bits:
+-----------------+-----------------+
| 32 Bits | 32 Bits |
+-----------------+-----------------+
| Reference Count | Slot Index |
+-----------------+-----------------+
*/
typedef struct
{
ma_uint32 capacity; /* The number of slots to make available. */
} ma_slot_allocator_config;
MA_API ma_slot_allocator_config ma_slot_allocator_config_init(ma_uint32 capacity);
typedef struct
{
MA_ATOMIC ma_uint32 bitfield; /* Must be used atomically because the allocation and freeing routines need to make copies of this which must never be optimized away by the compiler. */
} ma_slot_allocator_group;
typedef struct
{
ma_slot_allocator_group* pGroups; /* Slots are grouped in chunks of 32. */
ma_uint32* pSlots; /* 32 bits for reference counting for ABA mitigation. */
ma_uint32 count; /* Allocation count. */
ma_uint32 capacity;
/* Memory management. */
ma_bool32 _ownsHeap;
void* _pHeap;
} ma_slot_allocator;
MA_API ma_result ma_slot_allocator_get_heap_size(const ma_slot_allocator_config* pConfig, size_t* pHeapSizeInBytes);
MA_API ma_result ma_slot_allocator_init_preallocated(const ma_slot_allocator_config* pConfig, void* pHeap, ma_slot_allocator* pAllocator);
MA_API ma_result ma_slot_allocator_init(const ma_slot_allocator_config* pConfig, const ma_allocation_callbacks* pAllocationCallbacks, ma_slot_allocator* pAllocator);
MA_API void ma_slot_allocator_uninit(ma_slot_allocator* pAllocator, const ma_allocation_callbacks* pAllocationCallbacks);
MA_API ma_result ma_slot_allocator_alloc(ma_slot_allocator* pAllocator, ma_uint64* pSlot);
MA_API ma_result ma_slot_allocator_free(ma_slot_allocator* pAllocator, ma_uint64 slot);
/**************************************************************************************************
Data Source
**************************************************************************************************/
typedef void ma_data_source;
typedef struct
......@@ -8815,6 +8872,21 @@ static MA_INLINE ma_uint32 ma_gcf_u32(ma_uint32 a, ma_uint32 b)
}
static ma_uint32 ma_ffs_32(ma_uint32 x)
{
ma_uint32 i;
/* Just a naive implementation just to get things working for now. Will optimize this later. */
for (i = 0; i < 32; i += 1) {
if ((x & (1 << i)) != 0) {
return i;
}
}
return i;
}
/*
Random Number Generation
......@@ -34707,6 +34779,259 @@ MA_API float ma_gain_db_to_factor(float gain)
}
MA_API ma_slot_allocator_config ma_slot_allocator_config_init(ma_uint32 capacity)
{
ma_slot_allocator_config config;
MA_ZERO_OBJECT(&config);
config.capacity = capacity;
return config;
}
static MA_INLINE ma_uint32 ma_slot_allocator_calculate_group_capacity(ma_uint32 slotCapacity)
{
ma_uint32 cap = slotCapacity / 32;
if ((slotCapacity % 32) != 0) {
cap += 1;
}
return cap;
}
static MA_INLINE ma_uint32 ma_slot_allocator_group_capacity(const ma_slot_allocator* pAllocator)
{
return ma_slot_allocator_calculate_group_capacity(pAllocator->capacity);
}
typedef struct
{
size_t sizeInBytes;
size_t groupsOffset;
size_t slotsOffset;
} ma_slot_allocator_heap_layout;
static ma_result ma_slot_allocator_get_heap_layout(const ma_slot_allocator_config* pConfig, ma_slot_allocator_heap_layout* pHeapLayout)
{
MA_ASSERT(pHeapLayout != NULL);
MA_ZERO_OBJECT(pHeapLayout);
if (pConfig == NULL) {
return MA_INVALID_ARGS;
}
if (pConfig->capacity == 0) {
return MA_INVALID_ARGS;
}
pHeapLayout->sizeInBytes = 0;
/* Groups. */
pHeapLayout->groupsOffset = pHeapLayout->sizeInBytes;
pHeapLayout->sizeInBytes += ma_align_64(ma_slot_allocator_calculate_group_capacity(pConfig->capacity) * sizeof(ma_slot_allocator_group));
/* Slots. */
pHeapLayout->slotsOffset = pHeapLayout->sizeInBytes;
pHeapLayout->sizeInBytes += ma_align_64(pConfig->capacity * sizeof(ma_uint32));
return MA_SUCCESS;
}
MA_API ma_result ma_slot_allocator_get_heap_size(const ma_slot_allocator_config* pConfig, size_t* pHeapSizeInBytes)
{
ma_result result;
ma_slot_allocator_heap_layout layout;
if (pHeapSizeInBytes == NULL) {
return MA_INVALID_ARGS;
}
*pHeapSizeInBytes = 0;
result = ma_slot_allocator_get_heap_layout(pConfig, &layout);
if (result != MA_SUCCESS) {
return result;
}
*pHeapSizeInBytes = layout.sizeInBytes;
return result;
}
MA_API ma_result ma_slot_allocator_init_preallocated(const ma_slot_allocator_config* pConfig, void* pHeap, ma_slot_allocator* pAllocator)
{
ma_result result;
ma_slot_allocator_heap_layout heapLayout;
if (pAllocator == NULL) {
return MA_INVALID_ARGS;
}
MA_ZERO_OBJECT(pAllocator);
if (pHeap == NULL) {
return MA_INVALID_ARGS;
}
result = ma_slot_allocator_get_heap_layout(pConfig, &heapLayout);
if (result != MA_SUCCESS) {
return result;
}
pAllocator->_pHeap = pHeap;
pAllocator->pGroups = (ma_slot_allocator_group*)ma_offset_ptr(pHeap, heapLayout.groupsOffset);
pAllocator->pSlots = (ma_uint32*)ma_offset_ptr(pHeap, heapLayout.slotsOffset);
pAllocator->capacity = pConfig->capacity;
return MA_SUCCESS;
}
MA_API ma_result ma_slot_allocator_init(const ma_slot_allocator_config* pConfig, const ma_allocation_callbacks* pAllocationCallbacks, ma_slot_allocator* pAllocator)
{
ma_result result;
size_t heapSizeInBytes;
void* pHeap;
result = ma_slot_allocator_get_heap_size(pConfig, &heapSizeInBytes);
if (result != MA_SUCCESS) {
return result; /* Failed to retrieve the size of the heap allocation. */
}
if (heapSizeInBytes > 0) {
pHeap = ma_malloc(heapSizeInBytes, pAllocationCallbacks);
if (pHeap == NULL) {
return MA_OUT_OF_MEMORY;
}
} else {
pHeap = NULL;
}
result = ma_slot_allocator_init_preallocated(pConfig, pHeap, pAllocator);
if (result != MA_SUCCESS) {
ma_free(pHeap, pAllocationCallbacks);
return result;
}
pAllocator->_ownsHeap = MA_TRUE;
return MA_SUCCESS;
}
MA_API void ma_slot_allocator_uninit(ma_slot_allocator* pAllocator, const ma_allocation_callbacks* pAllocationCallbacks)
{
if (pAllocator == NULL) {
return;
}
if (pAllocator->_ownsHeap) {
ma_free(pAllocator->_pHeap, pAllocationCallbacks);
}
}
MA_API ma_result ma_slot_allocator_alloc(ma_slot_allocator* pAllocator, ma_uint64* pSlot)
{
ma_uint32 iAttempt;
const ma_uint32 maxAttempts = 2; /* The number of iterations to perform until returning MA_OUT_OF_MEMORY if no slots can be found. */
if (pAllocator == NULL || pSlot == NULL) {
return MA_INVALID_ARGS;
}
for (iAttempt = 0; iAttempt < maxAttempts; iAttempt += 1) {
/* We need to acquire a suitable bitfield first. This is a bitfield that's got an available slot within it. */
ma_uint32 iGroup;
for (iGroup = 0; iGroup < ma_slot_allocator_group_capacity(pAllocator); iGroup += 1) {
/* CAS */
for (;;) {
ma_uint32 oldBitfield;
ma_uint32 newBitfield;
ma_uint32 bitOffset;
oldBitfield = c89atomic_load_32(&pAllocator->pGroups[iGroup].bitfield); /* <-- This copy must happen. The compiler must not optimize this away. */
/* Fast check to see if anything is available. */
if (oldBitfield == 0xFFFFFFFF) {
break; /* No available bits in this bitfield. */
}
bitOffset = ma_ffs_32(~oldBitfield);
MA_ASSERT(bitOffset < 32);
newBitfield = oldBitfield | (1 << bitOffset);
if (c89atomic_compare_and_swap_32(&pAllocator->pGroups[iGroup].bitfield, oldBitfield, newBitfield) == oldBitfield) {
ma_uint32 slotIndex;
/* Increment the counter as soon as possible to have other threads report out-of-memory sooner than later. */
c89atomic_fetch_add_32(&pAllocator->count, 1);
/* The slot index is required for constructing the output value. */
slotIndex = (iGroup << 5) + bitOffset; /* iGroup << 5 = iGroup * 32 */
/* Increment the reference count before constructing the output value. */
pAllocator->pSlots[slotIndex] += 1;
/* Construct the output value. */
*pSlot = ((ma_uint64)pAllocator->pSlots[slotIndex] << 32 | slotIndex);
return MA_SUCCESS;
}
}
}
/* We weren't able to find a slot. If it's because we've reached our capacity we need to return MA_OUT_OF_MEMORY. Otherwise we need to do another iteration and try again. */
if (pAllocator->count < pAllocator->capacity) {
ma_yield();
} else {
return MA_OUT_OF_MEMORY;
}
}
/* We couldn't find a slot within the maximum number of attempts. */
return MA_OUT_OF_MEMORY;
}
MA_API ma_result ma_slot_allocator_free(ma_slot_allocator* pAllocator, ma_uint64 slot)
{
ma_uint32 iGroup;
ma_uint32 iBit;
if (pAllocator == NULL) {
return MA_INVALID_ARGS;
}
iGroup = (slot & 0xFFFFFFFF) >> 5; /* slot / 32 */
iBit = (slot & 0xFFFFFFFF) & 31; /* slot % 32 */
if (iGroup >= ma_slot_allocator_group_capacity(pAllocator)) {
return MA_INVALID_ARGS;
}
MA_ASSERT(iBit < 32); /* This must be true due to the logic we used to actually calculate it. */
while (pAllocator->count > 0) {
/* CAS */
ma_uint32 oldBitfield;
ma_uint32 newBitfield;
oldBitfield = c89atomic_load_32(&pAllocator->pGroups[iGroup].bitfield); /* <-- This copy must happen. The compiler must not optimize this away. */
newBitfield = oldBitfield & ~(1 << iBit);
if (c89atomic_compare_and_swap_32(&pAllocator->pGroups[iGroup].bitfield, oldBitfield, newBitfield) == oldBitfield) {
c89atomic_fetch_sub_32(&pAllocator->count, 1);
return MA_SUCCESS;
}
}
/* Getting here means there are no allocations available for freeing. */
return MA_INVALID_OPERATION;
}
/**************************************************************************************************************************************************************
Format Conversion
......@@ -1100,57 +1100,6 @@ typedef struct ma_resource_manager_data_source ma_resource_manager_data_sou
#define MA_RESOURCE_MANAGER_JOB_CUSTOM 0x00000100 /* Number your custom job codes as (MA_RESOURCE_MANAGER_JOB_CUSTOM + 0), (MA_RESOURCE_MANAGER_JOB_CUSTOM + 1), etc. */
/*
The idea of the slot allocator is for it to be used in conjunction with a fixed sized buffer. You use the slot allocator to allocator an index that can be used
as the insertion point for an object.
Slots are reference counted to help mitigate the ABA problem in the lock-free queue we use for tracking jobs.
The slot index is stored in the low 32 bits. The reference counter is stored in the high 32 bits:
+-----------------+-----------------+
| 32 Bits | 32 Bits |
+-----------------+-----------------+
| Reference Count | Slot Index |
+-----------------+-----------------+
*/
typedef struct
{
ma_uint32 capacity; /* The number of slots to make available. */
} ma_slot_allocator_config;
MA_API ma_slot_allocator_config ma_slot_allocator_config_init(ma_uint32 capacity);
typedef struct
{
MA_ATOMIC ma_uint32 bitfield; /* Must be used atomically because the allocation and freeing routines need to make copies of this which must never be optimized away by the compiler. */
} ma_slot_allocator_group;
typedef struct
{
ma_slot_allocator_group* pGroups; /* Slots are grouped in chunks of 32. */
ma_uint32* pSlots; /* 32 bits for reference counting for ABA mitigation. */
ma_uint32 count; /* Allocation count. */
ma_uint32 capacity;
/* Memory management. */
ma_bool32 _ownsHeap;
void* _pHeap;
} ma_slot_allocator;
MA_API ma_result ma_slot_allocator_get_heap_size(const ma_slot_allocator_config* pConfig, size_t* pHeapSizeInBytes);
MA_API ma_result ma_slot_allocator_init_preallocated(const ma_slot_allocator_config* pConfig, void* pHeap, ma_slot_allocator* pAllocator);
MA_API ma_result ma_slot_allocator_init(const ma_slot_allocator_config* pConfig, const ma_allocation_callbacks* pAllocationCallbacks, ma_slot_allocator* pAllocator);
MA_API void ma_slot_allocator_uninit(ma_slot_allocator* pAllocator, const ma_allocation_callbacks* pAllocationCallbacks);
MA_API ma_result ma_slot_allocator_alloc(ma_slot_allocator* pAllocator, ma_uint64* pSlot);
MA_API ma_result ma_slot_allocator_free(ma_slot_allocator* pAllocator, ma_uint64 slot);
/*
Notification callback for asynchronous operations.
......@@ -4651,21 +4600,6 @@ MA_API void ma_splitter_node_uninit(ma_splitter_node* pSplitterNode, const ma_al
#endif
static ma_uint32 ma_ffs_32(ma_uint32 x)
{
ma_uint32 i;
/* Just a naive implementation just to get things working for now. Will optimize this later. */
for (i = 0; i < 32; i += 1) {
if ((x & (1 << i)) != 0) {
return i;
}
}
return i;
}
static void ma_clip_samples_u8(ma_uint8* pDst, const ma_int16* pSrc, ma_uint64 count)
{
ma_uint64 iSample;
......@@ -4849,258 +4783,6 @@ static void ma_volume_and_clip_pcm_frames(void* pDst, const void* pSrc, ma_uint6
MA_API ma_slot_allocator_config ma_slot_allocator_config_init(ma_uint32 capacity)
{
ma_slot_allocator_config config;
MA_ZERO_OBJECT(&config);
config.capacity = capacity;
return config;
}
static MA_INLINE ma_uint32 ma_slot_allocator_calculate_group_capacity(ma_uint32 slotCapacity)
{
ma_uint32 cap = slotCapacity / 32;
if ((slotCapacity % 32) != 0) {
cap += 1;
}
return cap;
}
static MA_INLINE ma_uint32 ma_slot_allocator_group_capacity(const ma_slot_allocator* pAllocator)
{
return ma_slot_allocator_calculate_group_capacity(pAllocator->capacity);
}
typedef struct
{
size_t sizeInBytes;
size_t groupsOffset;
size_t slotsOffset;
} ma_slot_allocator_heap_layout;
static ma_result ma_slot_allocator_get_heap_layout(const ma_slot_allocator_config* pConfig, ma_slot_allocator_heap_layout* pHeapLayout)
{
MA_ASSERT(pHeapLayout != NULL);
MA_ZERO_OBJECT(pHeapLayout);
if (pConfig == NULL) {
return MA_INVALID_ARGS;
}
if (pConfig->capacity == 0) {
return MA_INVALID_ARGS;
}
pHeapLayout->sizeInBytes = 0;
/* Groups. */
pHeapLayout->groupsOffset = pHeapLayout->sizeInBytes;
pHeapLayout->sizeInBytes += ma_align_64(ma_slot_allocator_calculate_group_capacity(pConfig->capacity) * sizeof(ma_slot_allocator_group));
/* Slots. */
pHeapLayout->slotsOffset = pHeapLayout->sizeInBytes;
pHeapLayout->sizeInBytes += ma_align_64(pConfig->capacity * sizeof(ma_uint32));
return MA_SUCCESS;
}
MA_API ma_result ma_slot_allocator_get_heap_size(const ma_slot_allocator_config* pConfig, size_t* pHeapSizeInBytes)
{
ma_result result;
ma_slot_allocator_heap_layout layout;
if (pHeapSizeInBytes == NULL) {
return MA_INVALID_ARGS;
}
*pHeapSizeInBytes = 0;
result = ma_slot_allocator_get_heap_layout(pConfig, &layout);
if (result != MA_SUCCESS) {
return result;
}
*pHeapSizeInBytes = layout.sizeInBytes;
return result;
}
MA_API ma_result ma_slot_allocator_init_preallocated(const ma_slot_allocator_config* pConfig, void* pHeap, ma_slot_allocator* pAllocator)
{
ma_result result;
ma_slot_allocator_heap_layout heapLayout;
if (pAllocator == NULL) {
return MA_INVALID_ARGS;
}
MA_ZERO_OBJECT(pAllocator);
if (pHeap == NULL) {
return MA_INVALID_ARGS;
}
result = ma_slot_allocator_get_heap_layout(pConfig, &heapLayout);
if (result != MA_SUCCESS) {
return result;
}
pAllocator->_pHeap = pHeap;
pAllocator->pGroups = (ma_slot_allocator_group*)ma_offset_ptr(pHeap, heapLayout.groupsOffset);
pAllocator->pSlots = (ma_uint32*)ma_offset_ptr(pHeap, heapLayout.slotsOffset);
pAllocator->capacity = pConfig->capacity;
return MA_SUCCESS;
}
MA_API ma_result ma_slot_allocator_init(const ma_slot_allocator_config* pConfig, const ma_allocation_callbacks* pAllocationCallbacks, ma_slot_allocator* pAllocator)
{
ma_result result;
size_t heapSizeInBytes;
void* pHeap;
result = ma_slot_allocator_get_heap_size(pConfig, &heapSizeInBytes);
if (result != MA_SUCCESS) {
return result; /* Failed to retrieve the size of the heap allocation. */
}
if (heapSizeInBytes > 0) {
pHeap = ma_malloc(heapSizeInBytes, pAllocationCallbacks);
if (pHeap == NULL) {
return MA_OUT_OF_MEMORY;
}
} else {
pHeap = NULL;
}
result = ma_slot_allocator_init_preallocated(pConfig, pHeap, pAllocator);
if (result != MA_SUCCESS) {
ma_free(pHeap, pAllocationCallbacks);
return result;
}
pAllocator->_ownsHeap = MA_TRUE;
return MA_SUCCESS;
}
MA_API void ma_slot_allocator_uninit(ma_slot_allocator* pAllocator, const ma_allocation_callbacks* pAllocationCallbacks)
{
if (pAllocator == NULL) {
return;
}
if (pAllocator->_ownsHeap) {
ma_free(pAllocator->_pHeap, pAllocationCallbacks);
}
}
MA_API ma_result ma_slot_allocator_alloc(ma_slot_allocator* pAllocator, ma_uint64* pSlot)
{
ma_uint32 iAttempt;
const ma_uint32 maxAttempts = 2; /* The number of iterations to perform until returning MA_OUT_OF_MEMORY if no slots can be found. */
if (pAllocator == NULL || pSlot == NULL) {
return MA_INVALID_ARGS;
}
for (iAttempt = 0; iAttempt < maxAttempts; iAttempt += 1) {
/* We need to acquire a suitable bitfield first. This is a bitfield that's got an available slot within it. */
ma_uint32 iGroup;
for (iGroup = 0; iGroup < ma_slot_allocator_group_capacity(pAllocator); iGroup += 1) {
/* CAS */
for (;;) {
ma_uint32 oldBitfield;
ma_uint32 newBitfield;
ma_uint32 bitOffset;
oldBitfield = c89atomic_load_32(&pAllocator->pGroups[iGroup].bitfield); /* <-- This copy must happen. The compiler must not optimize this away. */
/* Fast check to see if anything is available. */
if (oldBitfield == 0xFFFFFFFF) {
break; /* No available bits in this bitfield. */
}
bitOffset = ma_ffs_32(~oldBitfield);
MA_ASSERT(bitOffset < 32);
newBitfield = oldBitfield | (1 << bitOffset);
if (c89atomic_compare_and_swap_32(&pAllocator->pGroups[iGroup].bitfield, oldBitfield, newBitfield) == oldBitfield) {
ma_uint32 slotIndex;
/* Increment the counter as soon as possible to have other threads report out-of-memory sooner than later. */
c89atomic_fetch_add_32(&pAllocator->count, 1);
/* The slot index is required for constructing the output value. */
slotIndex = (iGroup << 5) + bitOffset; /* iGroup << 5 = iGroup * 32 */
/* Increment the reference count before constructing the output value. */
pAllocator->pSlots[slotIndex] += 1;
/* Construct the output value. */
*pSlot = ((ma_uint64)pAllocator->pSlots[slotIndex] << 32 | slotIndex);
return MA_SUCCESS;
}
}
}
/* We weren't able to find a slot. If it's because we've reached our capacity we need to return MA_OUT_OF_MEMORY. Otherwise we need to do another iteration and try again. */
if (pAllocator->count < pAllocator->capacity) {
ma_yield();
} else {
return MA_OUT_OF_MEMORY;
}
}
/* We couldn't find a slot within the maximum number of attempts. */
return MA_OUT_OF_MEMORY;
}
MA_API ma_result ma_slot_allocator_free(ma_slot_allocator* pAllocator, ma_uint64 slot)
{
ma_uint32 iGroup;
ma_uint32 iBit;
if (pAllocator == NULL) {
return MA_INVALID_ARGS;
}
iGroup = (slot & 0xFFFFFFFF) >> 5; /* slot / 32 */
iBit = (slot & 0xFFFFFFFF) & 31; /* slot % 32 */
if (iGroup >= ma_slot_allocator_group_capacity(pAllocator)) {
return MA_INVALID_ARGS;
}
MA_ASSERT(iBit < 32); /* This must be true due to the logic we used to actually calculate it. */
while (pAllocator->count > 0) {
/* CAS */
ma_uint32 oldBitfield;
ma_uint32 newBitfield;
oldBitfield = c89atomic_load_32(&pAllocator->pGroups[iGroup].bitfield); /* <-- This copy must happen. The compiler must not optimize this away. */
newBitfield = oldBitfield & ~(1 << iBit);
if (c89atomic_compare_and_swap_32(&pAllocator->pGroups[iGroup].bitfield, oldBitfield, newBitfield) == oldBitfield) {
c89atomic_fetch_sub_32(&pAllocator->count, 1);
return MA_SUCCESS;
}
}
/* Getting here means there are no allocations available for freeing. */
return MA_INVALID_OPERATION;
}
MA_API ma_result ma_async_notification_signal(ma_async_notification* pNotification)
{
ma_async_notification_callbacks* pNotificationCallbacks = (ma_async_notification_callbacks*)pNotification;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment