Posts Tagged ‘memory allocator’

h1

ultra performance go!!!!!

November 3, 2008

This is awesome!!!  I just now implemented a memory block manager for all the stuff that gets allocated / deallocated hundreds if not thousands of times an iteration, which essentially recycles all blocks of that size’s cache deallocated until cleared.  There is a list of optimal sizes which all go evenly in to the chunks, which are propagated on out of free blocks.  It is basically just never deallocating anything ’till the end, but since we recycle everything, there are no leaks and it’s faster.  Twenty times faster, according to Monsieur Framerate, and we don’t argue with him.  If you want the full code for all my memory management used in Rat Physics, I’m going to put up articles about it.  But in the meantime, here’s the plain ol’ code.  But wait, credit where due: Erin Catto is responsible for his idea to use both stack allocators and block recyclers in Box2D.  That is where I directly ripped the idea off from.  The code, however, is my own.  Here it is now:

memory.h

#ifndef RAT_PHYSICS_MEMORY
#define RAT_PHYSICS_MEMORY

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

// main alloc/dealloc functions, just calls dlmalloc|malloc/dlfree|free

void rat_set_alloc_fn(void *(*alloc_fn)(size_t));
void rat_set_realloc_fn(void *(*realloc_fn)(void *,size_t));
void rat_set_dealloc_fn(void (*dealloc_fn)(void *));

void *rat_alloc(size_t size);
void *rat_realloc(void *mem,size_t newsize);
void rat_dealloc(void *mem);

unsigned int rat_blocks_alloced();
unsigned int rat_mem_alloced();

void rat_mem_alloced_string(char *str);


// a simple stack allocator
// must push/pop memory allocations; faster than
// arbitrary malloc/free by a int shot.

struct rat_stack_allocator;
typedef struct rat_stack_allocator rat_stack_allocator;

extern const size_t rat_max_stack_cells;
extern const size_t rat_stack_size;

rat_stack_allocator *rat_stack_allocator_create();
void rat_stack_allocator_destroy(rat_stack_allocator *sal);

void *rat_stack_alloc(rat_stack_allocator *sal,size_t size);
void rat_stack_dealloc(rat_stack_allocator *sal,void *mem);

unsigned int rat_stack_most_alloced(rat_stack_allocator *sal);


// a simple small block allocator for objects of about
// the same size being alloced/dealloced on a regular
// basis.  it is very fast at recycling

struct rat_block_allocator;
typedef struct rat_block_allocator rat_block_allocator;

extern const unsigned int rat_chunk_size;
extern const unsigned int rat_max_block_size;
extern const unsigned int rat_block_sizes;
extern const unsigned int rat_chunk_arr_inc;

rat_block_allocator *rat_block_allocator_create();
void rat_block_allocator_destroy(rat_block_allocator *bal);
void rat_block_allocator_clear(rat_block_allocator *bal);

void *rat_block_alloc(rat_block_allocator *bal,size_t size);
void *rat_block_realloc(rat_block_allocator *bal,void *mem,size_t oldsize,size_t size);
void rat_block_dealloc(rat_block_allocator *bal,void *mem,size_t size);

#ifdef __cplusplus
}
#endif

#endif

memory.c

#include "common.h"

#define RAT_MEMORY_GUARD_WORD 0x52415453 // spells "RATS" in ascii 😛

#include "memory.h"
#ifndef RAT_PHYSICS_MSVC
#	include "dlmalloc.h"
#endif

static unsigned int blocksalloced=0;
static unsigned int amtmemalloced=0;
static unsigned int extrafootprint=0;

#ifdef RAT_PHYSICS_MSVC
	static void *(*rat_alloc_fn)(size_t)=malloc;
	static void *(*rat_realloc_fn)(void *,size_t)=realloc;
	static void (*rat_dealloc_fn)(void *)=free;
#else
	static void *(*rat_alloc_fn)(size_t)=dlmalloc;
	static void *(*rat_realloc_fn)(void *,size_t)=dlrealloc;
	static void (*rat_dealloc_fn)(void *)=dlfree;
#endif

void rat_set_alloc_fn(void *(*alloc_fn)(size_t))
{
	rat_alloc_fn=alloc_fn;
}

void rat_set_realloc_fn(void *(*realloc_fn)(void *,size_t))
{
	rat_realloc_fn=realloc_fn;
}

void rat_set_dealloc_fn(void (*dealloc_fn)(void *))
{
	rat_dealloc_fn=dealloc_fn;
}

void *rat_alloc(size_t size)
{
	size_t *ptr;
	void *mem=rat_alloc_fn(size+sizeof(size_t)*2);
	if (mem==NULL) return NULL;
	ptr=mem;

	ptr[0]=RAT_MEMORY_GUARD_WORD;
	ptr[1]=size;
	amtmemalloced+=size;

	blocksalloced++;
	extrafootprint=blocksalloced*sizeof(size_t)*2;
	return (void *)(ptr+2);
}

void *rat_realloc(void *mem,size_t newsize)
{
	size_t *ptr=mem;
	if (mem==NULL) return rat_alloc(newsize);
	if (ptr[-2]!=RAT_MEMORY_GUARD_WORD) return NULL;

	amtmemalloced+=newsize-ptr[-1];
	ptr=(size_t *)rat_realloc_fn((void *)(ptr-2),newsize+sizeof(size_t)*2);

	ptr[0]=RAT_MEMORY_GUARD_WORD;
	ptr[1]=newsize;
	return (void *)(ptr+2);
}

void rat_dealloc(void *mem)
{
	size_t *ptr=mem;
	if (mem==NULL) return;
	if (ptr[-2]!=RAT_MEMORY_GUARD_WORD) return;
	amtmemalloced-=ptr[-1];
	blocksalloced--;
	extrafootprint=blocksalloced*sizeof(size_t)*2;
	rat_dealloc_fn((void *)(ptr-2));
}

unsigned int rat_blocks_alloced()	{return blocksalloced;}
unsigned int rat_mem_alloced()		{return amtmemalloced+extrafootprint;}

void rat_mem_alloced_string(char *str)
{
	size_t malc=amtmemalloced+extrafootprint;
	if (malc<1024)
		sprintf(str,"%04u Bytes",malc);
	else if (malc>=1024&&malc<1048576)
	{
		rat_real kb=(float)malc/1024.0f;
		sprintf(str,"%04.4f KB",kb);
	}
	else if (malc>=1048576&&malc<1073741824)
	{
		rat_real mb=(float)malc/1048576.0f;
		sprintf(str,"%04.4f MB",mb);
	}
	else
		sprintf(str,"overflowed");
}

// stack allocator...

const size_t rat_max_stack_cells=128;	// 128 stack cells
const size_t rat_stack_size=512*1024;	// 512KB total memory

typedef struct rat_stack_cell
{
	char *data;
	size_t size;
	int nonstackalloc;
} rat_stack_cell;

struct rat_stack_allocator
{
	unsigned int idx;
	char *data;

	unsigned int alloced;
	unsigned int most_alloced;

	unsigned int numcells;
	rat_stack_cell *cells;
};

rat_stack_allocator *rat_stack_allocator_create()
{
	rat_stack_allocator *sal=(rat_stack_allocator *)rat_alloc_fn(sizeof(rat_stack_allocator));
	memset(sal,0,sizeof(rat_stack_allocator));

	sal->alloced=0;
	sal->most_alloced=0;
	sal->idx=0;
	sal->numcells=0;

	sal->data=(char *)rat_alloc_fn(rat_stack_size);
	sal->cells=(rat_stack_cell *)rat_alloc_fn(sizeof(rat_stack_cell)*rat_max_stack_cells);

	return sal;
}

void rat_stack_allocator_destroy(rat_stack_allocator *sal)
{
	if (sal->idx||sal->numcells)
	{
		fprintf(stderr,"stack destroyed with memory still allocated!!!\n");
		fflush(stderr);
	}

	rat_dealloc_fn((void *)sal->data);
	rat_dealloc_fn((void *)sal->cells);
	rat_dealloc_fn((void *)sal);
}

void *rat_stack_alloc(rat_stack_allocator *sal,size_t size)
{
	rat_stack_cell *cell;

	if (sal->numcells>=rat_max_stack_cells)
	{
		fprintf(stderr,"stack allocator overflow!!! only %u cells allowed.\n",rat_max_stack_cells);
		fflush(stderr);
		return NULL;
	}

	cell=sal->cells+sal->numcells;
	cell->size=size;
	if (sal->idx+size>rat_stack_size)
	{
		cell->data=(char *)rat_alloc_fn(size);
		cell->nonstackalloc=1; // we did not use memory from the stack
	}
	else
	{
		cell->data=sal->data+sal->idx;
		cell->nonstackalloc=0; // we used memory from the stack
		sal->idx+=cell->size;
	}

	sal->alloced+=size;
	sal->most_alloced=sal->alloced>sal->most_alloced?sal->alloced:sal->most_alloced;
	sal->numcells++;

	amtmemalloced+=size;
	blocksalloced++;

	return (void *)cell->data;
}

void rat_stack_dealloc(rat_stack_allocator *sal,void *mem)
{
	rat_stack_cell *cell;

	if (sal->numcells==0)
	{
		fprintf(stderr,"there's nothing left to deallocate!!!\n");
		fflush(stderr);
		return;
	}

	cell=sal->cells+sal->numcells-1;
	if (mem!=(void *)cell->data)
	{
		fprintf(stderr,
			"memory at 0x%08x is not at top of stack; deallocate fail! 0x%08x is next.\n",
			(unsigned int)mem,(unsigned int)cell->data);
		fflush(stderr);
		return;
	}

	if (cell->nonstackalloc)
		rat_dealloc_fn((void *)mem);
	else
		sal->idx-=cell->size;

	sal->alloced-=cell->size;
	sal->numcells--;

	amtmemalloced-=cell->size;
	blocksalloced--;
}

unsigned int rat_stack_most_alloced(rat_stack_allocator *sal)
{
	return sal->most_alloced;
}

// block allocator...

const unsigned int rat_chunk_size=32768;
const unsigned int rat_max_block_size=16384;
const unsigned int rat_block_sizes=20;
const unsigned int rat_chunk_arr_inc=128;

typedef struct rat_block
{
	struct rat_block *next;
} rat_block;

typedef struct rat_chunk
{
	unsigned int block_size;
	rat_block *blocks;
} rat_chunk;

static unsigned int block_sizes[]={16,32,64,96,128,160,192,224,256,320,384,448,512,640,1024,2048,4096,8192,16384};

static unsigned char *block_size_lookup;
static int is_block_size_lookup_init=0;

struct rat_block_allocator
{
	unsigned int numchunks;
	rat_chunk *chunks;

	unsigned int chunk_space;
	rat_block **recycling_lists;
};

void free_bsl(void)	// we use (void) to conform to that retarded standard
					// and get rid of the associated warning message
{
	free((void *)block_size_lookup);
}

rat_block_allocator *rat_block_allocator_create()
{
	rat_block_allocator *bal=(rat_block_allocator *)rat_alloc(sizeof(rat_block_allocator));
	memset(bal,0,sizeof(rat_block_allocator));

	bal->chunk_space=rat_chunk_arr_inc;
	
	bal->numchunks=0;
	bal->chunks=(rat_chunk *)rat_alloc(sizeof(rat_chunk)*bal->chunk_space);
	bal->recycling_lists=(rat_block **)rat_alloc(sizeof(rat_block *)*rat_block_sizes);
	
	memset(bal->chunks,0,sizeof(rat_chunk)*bal->chunk_space);
	memset(bal->recycling_lists,0,sizeof(rat_block *)*rat_block_sizes);

	if (!is_block_size_lookup_init)
	{
		register unsigned int i,j=0;
		block_size_lookup=(char *)malloc(rat_max_block_size+1);
		atexit(free_bsl);
		
		for (i=1; i<rat_max_block_size+1; i++)
		{
			if (j>=rat_block_sizes)
			{
				fprintf(stderr,"blocks size list exceeds block sizes!\n");
				fflush(stderr);
			}
			if (i<=block_sizes&#91;j&#93;)
				block_size_lookup&#91;i&#93;=(unsigned char)j;
			else
				block_size_lookup&#91;i&#93;=(unsigned char)++j;
		}
		is_block_size_lookup_init=1;
	}

	return bal;
}

void rat_block_allocator_destroy(rat_block_allocator *bal)
{
	register unsigned int i;

	for (i=0; i<bal->numchunks; i++)
		rat_dealloc((void *)bal->chunks[i].blocks);
	
	rat_dealloc((void *)bal->chunks);
	rat_dealloc((void *)bal->recycling_lists);
	rat_dealloc((void *)bal);
}

void rat_block_allocator_clear(rat_block_allocator *bal)
{
	register unsigned int i;

	for (i=0; i<bal->numchunks; i++)
		rat_dealloc((void *)bal->chunks[i].blocks);
	bal->numchunks=0;

	memset(bal->chunks,0,sizeof(rat_chunk)*bal->chunk_space);
	memset(bal->recycling_lists,0,sizeof(rat_block *)*rat_block_sizes);
}

void *rat_block_alloc(rat_block_allocator *bal,size_t size)
{
	register unsigned int i,idx;

	if (size==0) return NULL;
	if (size>rat_max_block_size)
	{
		fprintf(stderr,"this block is too fucking big! you can't allocate more than %u bytes!\n",rat_max_block_size);
		fflush(stderr);
		return NULL;
	}

	idx=block_size_lookup[size];
	if (idx<0||rat_block_sizes<idx)
	{
		fprintf(stderr,"block size lookup failed!\n");
		fflush(stderr);
		return NULL;
	}

	if (bal->recycling_lists[idx]) // this is our fast memory recycler!!! look at that ultra speed!!!
	{
		rat_block *block=bal->recycling_lists[idx];
		bal->recycling_lists[idx]=block->next;

		return block;
	}
	else
	{
		register unsigned int block_size,numblocks;
		rat_chunk *chunk;
		rat_block *last;

		if (bal->numchunks==bal->chunk_space)
		{
			rat_chunk *old_chunks=bal->chunks;

			bal->chunk_space+=rat_chunk_arr_inc;
			bal->chunks=(rat_chunk *)rat_alloc(sizeof(rat_chunk)*bal->chunk_space);

			memcpy(bal->chunks,old_chunks,sizeof(rat_chunk)*bal->numchunks);
			memset(bal->chunks+bal->numchunks,0,sizeof(rat_chunk)*rat_chunk_arr_inc);
			rat_dealloc((void *)old_chunks);
		}

		chunk=bal->chunks+bal->numchunks;
		chunk->blocks=(rat_block *)rat_alloc(rat_chunk_size);

		block_size=block_sizes[idx];
		chunk->block_size=block_size;
		numblocks=rat_chunk_size/block_size;

		if (numblocks*block_size>rat_chunk_size)
		{
			fprintf(stderr,"internal memory block listing error!\n");
			fflush(stderr);
			return NULL;
		}

		for (i=0; i<numblocks-1; i++)
		{
			rat_block *thisblock=(rat_block *)((char *)chunk->blocks+block_size*i);
			rat_block *nextblock=(rat_block *)((char *)chunk->blocks+block_size*(i+1));
			thisblock->next=nextblock;
		}
		last=(rat_block *)((char *)chunk->blocks+block_size*(numblocks-1));
		last->next=NULL;

		bal->recycling_lists[idx]=chunk->blocks->next;
		bal->numchunks++;

		return chunk->blocks;
	}
}

void *rat_block_realloc(rat_block_allocator *bal,void *mem,size_t oldsize,size_t size)
{
	void *newmem;
	size_t roldsize=block_sizes[block_size_lookup[oldsize]];
	size_t rsize=block_sizes[block_size_lookup[size]];

	if (rsize==roldsize)
		return mem;
	else if (rsize>roldsize)
	{
		newmem=rat_block_alloc(bal,size);
		memcpy(newmem,mem,roldsize);
		rat_block_dealloc(bal,mem,oldsize);
	}
	else
	{
		newmem=rat_block_alloc(bal,size);
		memcpy(newmem,mem,rsize);
		rat_block_dealloc(bal,mem,oldsize);
	}

	return newmem;
}

void rat_block_dealloc(rat_block_allocator *bal,void *mem,size_t size)
{
	register unsigned int idx;
	rat_block *block;

	if (size==0||mem==NULL) return;
	if (size<0||size>rat_max_block_size)
	{
		fprintf(stderr,"block size out of range; impossible dealloc size!\n");
		fflush(stderr);
		return;
	}

	idx=block_size_lookup[size];
	if (idx<0||rat_block_sizes<idx)
	{
		fprintf(stderr,"block size lookup failed!\n");
		fflush(stderr);
		return;
	}

	block=(rat_block *)mem;
	block->next=bal->recycling_lists[idx];
	bal->recycling_lists[idx]=block;
}
Advertisements