Posts Tagged ‘memory management’

h1

threading the memory management and future dev plans

July 2, 2009

So, I finally got the memory management system threaded and defeated the only race condition, which could be found on the close down because of the order it waited for shit in. I fixed it with a shut down event that would poke when both mutexes were released in a timeout waiting loop so there was no wrong order to wait for them in. Anyway, I’m not here to show you that, but instead my threading library. It’s very bare bones, and only has a Win32 implementation for now, but could easily be implemented in another threading library, and I will do so with pthreads sometime in the future.

But I want to show it. I will show it now. SHOWING!

It makes use of factories and singletons, but those should be obvious in terms of their functionality. Factory inherits Factory<Singleton > as well. I might change this if it becomes more efficient to create multiple factory instances for different creation modes (i.e.) each has a different set of creation parameters.

threading.h
#ifndef THREADING_H
#define THREADING_H

#include
#include
#include
#include
#include
#include

#include “dlmalloc_object.h”
#include “factory.h”

namespace SpaceSim
{
class WaitingObject: public DLMallocAlloced
{
protected:
static std::list *wobjs;
public:
WaitingObject();
~WaitingObject();

static bool WaitForAll(std::list *objs = NULL);

virtual bool Wait() = 0;
};

class Mutex: public WaitingObject
{
public:
static Mutex *Create(bool is_global = false, unsigned long spincount = 0x80000400);

virtual bool Lock() = 0;
virtual void Unlock() = 0;

virtual unsigned long SpinCount() const = 0;
};

class GlobalMutex: public Mutex
{
protected:
HANDLE mutex;
public:
GlobalMutex();
~GlobalMutex();

virtual bool Lock();
virtual void Unlock();

virtual bool Wait();

virtual unsigned long SpinCount() const;
};

class LocalMutex: public Mutex
{
protected:
unsigned long spincount;
CRITICAL_SECTION mutex;

public:
LocalMutex(unsigned long spincount);
~LocalMutex();

virtual bool Lock();
virtual void Unlock();

virtual bool Wait();

virtual unsigned long SpinCount() const;
};

class Event: public WaitingObject
{
protected:
HANDLE event;

public:
static Event *Create();

Event();
~Event();

bool Wait();
void Poke();
void Reset();
};

typedef int (*ThreadMainProc)(void *args);
typedef unsigned long ThreadID;

class Thread: public WaitingObject
{
protected:
void *args;
HANDLE thread;
ThreadMainProc proc;
bool is_running;

public:
static Thread *Create(ThreadMainProc main_proc, void *args);
static ThreadID GetID();

Thread(ThreadMainProc main_proc, void *args);
~Thread();

bool Run();
bool Break();
bool Wait();

bool IsRunning() const;
};
};

#endif

threading.cpp
#include “threading.h”
#include “logger.h”

namespace SpaceSim
{
std::list *WaitingObject::wobjs = NULL;

WaitingObject::WaitingObject()
{
if (!wobjs)
wobjs = new std::list;

wobjs->push_back(this);
}

WaitingObject::~WaitingObject()
{
wobjs->remove(this);

if (wobjs->empty())
{
delete wobjs;
wobjs = NULL;
}
}

bool WaitingObject::WaitForAll(std::list *objs)
{
bool success = true;
if ((!objs) || objs->empty())
objs = wobjs;

std::list::iterator it;
for (it = objs->begin(); it != objs->end(); ++it)
success = success && (*it)->Wait();

return success;
}

class MutexFactoryParameters: public FactoryParameters
{
public:
bool is_global;
unsigned long spincount;

MutexFactoryParameters(bool is_global = false, unsigned long spincount = 0x80000400):
is_global(is_global), spincount(spincount) {}
};

template<> class Factory: public Singleton >
{
public:
virtual Mutex *Create(const FactoryParameters &params)
{
const MutexFactoryParameters &mparams = param_cast(params);

if (mparams.is_global)
return new GlobalMutex();
else
return new LocalMutex(mparams.spincount);
}
};

Mutex *Mutex::Create(bool is_global, unsigned long spincount)
{
return Factory::GetSingleton().Create(MutexFactoryParameters(is_global, spincount));
}

GlobalMutex::GlobalMutex()
{
static unsigned long mtctr = 0;
char name[256], cvbuf[16];

ltoa(mtctr++, cvbuf, 16);
strcpy(name, “SPACESIM_MUTEX_0x”);
strcat(name, cvbuf);

assert(mutex = CreateMutex(NULL, FALSE, name));
}

GlobalMutex::~GlobalMutex()
{
CloseHandle(mutex);
}

bool GlobalMutex::Lock()
{
return WaitForSingleObject(mutex, INFINITE) == WAIT_OBJECT_0;
}

void GlobalMutex::Unlock()
{
ReleaseMutex(mutex);
}

bool GlobalMutex::Wait()
{
bool ls = Lock();
Unlock();
return ls;
}

unsigned long GlobalMutex::SpinCount() const {return 0;}

LocalMutex::LocalMutex(unsigned long spincount): spincount(spincount)
{
assert(InitializeCriticalSectionAndSpinCount(&mutex, spincount));
}

LocalMutex::~LocalMutex()
{
DeleteCriticalSection(&mutex);
}

bool LocalMutex::Lock()
{
EnterCriticalSection(&mutex);

return true;
}

void LocalMutex::Unlock()
{
LeaveCriticalSection(&mutex);
}

bool LocalMutex::Wait()
{
bool ls = Lock();
Unlock();
return ls;
}

unsigned long LocalMutex::SpinCount() const {return spincount;}

Event *Event::Create()
{
return Factory::GetSingleton().Create(FactoryParameters());
}

Event::Event()
{
static unsigned long evctr = 0;
char name[256], cvbuf[16];

ltoa(evctr++, cvbuf, 16);
strcpy(name, “SPACESIM_EVENT_0x”);
strcat(name, cvbuf);

assert(event = CreateEvent(NULL, TRUE, FALSE, name));
}

Event::~Event()
{
CloseHandle(event);
}

bool Event::Wait()
{
return WaitForSingleObject(event, INFINITE) == WAIT_OBJECT_0;
}

void Event::Poke()
{
SetEvent(event);
}

void Event::Reset()
{
ResetEvent(event);
}

class ThreadFactoryParameters: public FactoryParameters
{
public:
void *args;
ThreadMainProc proc;

ThreadFactoryParameters(ThreadMainProc proc, void *args): proc(proc), args(args) {}
};

template<> class Factory: public Singleton >
{
public:
virtual Thread *Create(const FactoryParameters &params)
{
const ThreadFactoryParameters &mparams = param_cast(params);

return new Thread(mparams.proc, mparams.args);
}
};

Thread *Thread::Create(ThreadMainProc main_proc, void *args)
{
return Factory::GetSingleton().Create(ThreadFactoryParameters(main_proc, args));
}

struct proc_attribs
{
ThreadMainProc proc;
void *args;
Event *mevent;
};

static DWORD WINAPI __ThreadMainProc(LPVOID param)
{
proc_attribs &pa = *((proc_attribs *)param);

ThreadMainProc proc = pa.proc;
void *args = pa.args;

pa.mevent->Poke();

return proc(args);
}

ThreadID Thread::GetID() {return GetCurrentThreadId();}

Thread::Thread(ThreadMainProc main_proc, void *args): proc(main_proc), args(args) {}

Thread::~Thread()
{
if (is_running) Break();
}

bool Thread::Run()
{
if (is_running) return false;

DWORD temp;

proc_attribs pa;
pa.args = args;
pa.proc = proc;
pa.mevent = Event::Create();

thread = CreateThread(NULL, 0,
__ThreadMainProc, (void *)&pa, 0, &temp);

is_running = (bool)thread;

pa.mevent->Wait();
return is_running;
}

bool Thread::Break()
{
if (!is_running) return false;

bool success = CloseHandle(thread);
is_running = !success;
return success;
}

bool Thread::Wait()
{
if (!is_running) return false;

bool success = (WaitForSingleObject(thread, INFINITE) == WAIT_OBJECT_0);
return success && Break();
}

bool Thread::IsRunning() const {return is_running;}
};

Now, for stuff more interesting to the consumer who buys the final game (yes… the game will be sold, and I don’t care if people see snippets of the code). The Actor and StaticObject classes will extend the SceneObject class, and all that good stuff will be Lua bound. Actors will be partly virtual and will have to have trigger functions such as being told where to go by the controller filled in. This would be in certain implementations, especially ships and the like. Anyway, that about says it and all that good stuff. I’m making tracks, and it is AWESOME.

Advertisements
h1

reference counter based memory management + bitching about life

June 8, 2009

The bitching about life part comes first.  I believe the DCFC song “No Sunlight” sums up my rant in that regard nicely, so I won’t waste the space here.  Okay I’m done with that.

Now, on to the not depressing stuff.  I was thinking of ways I could potentially speed up memory management with threads.  I was basically thinking along these lines:  I should queue my object deletions so that I can delete them on the side while the next iteration is in progress.  So I came up with an interesting system.  I haven’t implemented the threading yet, but I have seperated object deletion from the main loop entirely and it’s just tacked on the end for now.

Basically I implemented a reference counter object management system.  Each ManagedObject has a grab() and drop() method, used to count references.  When the number of references reaches zero, the drop() method calls the queue_deletion() method in the ObjectManager singleton.  The ObjectManager also keeps track of ManagedObject’s that are still kicking, so that they can be cleaned up at the very end if they weren’t properly dropped.  This should keep memory leaks from being as bad, though that doesn’t mean I won’t bust my ass trying to find all of them.  So the current code for updating is as follows:

void Engine::RunMainLoop(double timestep)
{
while (running)
{
if (!Update(timestep)) running = false;
if (!Render()) running = false;
ObjectManager::Singleton().CleanUp();
}
}

The ObjectManager deletes all things queued for deletion in CleanUp(). What I could do is simply start a cleanup thread that cleans up every second or so. For the threaded version it will lock the mutex for the queue to copy it over to a private queue and clear the public one so that it can be used again while the cleanup iterator goes through and deletes everything added previously. If you want the full classes, I shall have t3h c0dez in an article on custom memory management for C++ classes shortly.

EDIT: Okay, article’s up. It’s called “C++ custom memory management primer”. Link under My Articles in the sidebar.

h1

ultra performance go!!!!!

November 3, 2008

This is awesome!!!  I just now implemented a memory block manager for all the stuff that gets allocated / deallocated hundreds if not thousands of times an iteration, which essentially recycles all blocks of that size’s cache deallocated until cleared.  There is a list of optimal sizes which all go evenly in to the chunks, which are propagated on out of free blocks.  It is basically just never deallocating anything ’till the end, but since we recycle everything, there are no leaks and it’s faster.  Twenty times faster, according to Monsieur Framerate, and we don’t argue with him.  If you want the full code for all my memory management used in Rat Physics, I’m going to put up articles about it.  But in the meantime, here’s the plain ol’ code.  But wait, credit where due: Erin Catto is responsible for his idea to use both stack allocators and block recyclers in Box2D.  That is where I directly ripped the idea off from.  The code, however, is my own.  Here it is now:

memory.h

#ifndef RAT_PHYSICS_MEMORY
#define RAT_PHYSICS_MEMORY

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

// main alloc/dealloc functions, just calls dlmalloc|malloc/dlfree|free

void rat_set_alloc_fn(void *(*alloc_fn)(size_t));
void rat_set_realloc_fn(void *(*realloc_fn)(void *,size_t));
void rat_set_dealloc_fn(void (*dealloc_fn)(void *));

void *rat_alloc(size_t size);
void *rat_realloc(void *mem,size_t newsize);
void rat_dealloc(void *mem);

unsigned int rat_blocks_alloced();
unsigned int rat_mem_alloced();

void rat_mem_alloced_string(char *str);


// a simple stack allocator
// must push/pop memory allocations; faster than
// arbitrary malloc/free by a int shot.

struct rat_stack_allocator;
typedef struct rat_stack_allocator rat_stack_allocator;

extern const size_t rat_max_stack_cells;
extern const size_t rat_stack_size;

rat_stack_allocator *rat_stack_allocator_create();
void rat_stack_allocator_destroy(rat_stack_allocator *sal);

void *rat_stack_alloc(rat_stack_allocator *sal,size_t size);
void rat_stack_dealloc(rat_stack_allocator *sal,void *mem);

unsigned int rat_stack_most_alloced(rat_stack_allocator *sal);


// a simple small block allocator for objects of about
// the same size being alloced/dealloced on a regular
// basis.  it is very fast at recycling

struct rat_block_allocator;
typedef struct rat_block_allocator rat_block_allocator;

extern const unsigned int rat_chunk_size;
extern const unsigned int rat_max_block_size;
extern const unsigned int rat_block_sizes;
extern const unsigned int rat_chunk_arr_inc;

rat_block_allocator *rat_block_allocator_create();
void rat_block_allocator_destroy(rat_block_allocator *bal);
void rat_block_allocator_clear(rat_block_allocator *bal);

void *rat_block_alloc(rat_block_allocator *bal,size_t size);
void *rat_block_realloc(rat_block_allocator *bal,void *mem,size_t oldsize,size_t size);
void rat_block_dealloc(rat_block_allocator *bal,void *mem,size_t size);

#ifdef __cplusplus
}
#endif

#endif

memory.c

#include "common.h"

#define RAT_MEMORY_GUARD_WORD 0x52415453 // spells "RATS" in ascii 😛

#include "memory.h"
#ifndef RAT_PHYSICS_MSVC
#	include "dlmalloc.h"
#endif

static unsigned int blocksalloced=0;
static unsigned int amtmemalloced=0;
static unsigned int extrafootprint=0;

#ifdef RAT_PHYSICS_MSVC
	static void *(*rat_alloc_fn)(size_t)=malloc;
	static void *(*rat_realloc_fn)(void *,size_t)=realloc;
	static void (*rat_dealloc_fn)(void *)=free;
#else
	static void *(*rat_alloc_fn)(size_t)=dlmalloc;
	static void *(*rat_realloc_fn)(void *,size_t)=dlrealloc;
	static void (*rat_dealloc_fn)(void *)=dlfree;
#endif

void rat_set_alloc_fn(void *(*alloc_fn)(size_t))
{
	rat_alloc_fn=alloc_fn;
}

void rat_set_realloc_fn(void *(*realloc_fn)(void *,size_t))
{
	rat_realloc_fn=realloc_fn;
}

void rat_set_dealloc_fn(void (*dealloc_fn)(void *))
{
	rat_dealloc_fn=dealloc_fn;
}

void *rat_alloc(size_t size)
{
	size_t *ptr;
	void *mem=rat_alloc_fn(size+sizeof(size_t)*2);
	if (mem==NULL) return NULL;
	ptr=mem;

	ptr[0]=RAT_MEMORY_GUARD_WORD;
	ptr[1]=size;
	amtmemalloced+=size;

	blocksalloced++;
	extrafootprint=blocksalloced*sizeof(size_t)*2;
	return (void *)(ptr+2);
}

void *rat_realloc(void *mem,size_t newsize)
{
	size_t *ptr=mem;
	if (mem==NULL) return rat_alloc(newsize);
	if (ptr[-2]!=RAT_MEMORY_GUARD_WORD) return NULL;

	amtmemalloced+=newsize-ptr[-1];
	ptr=(size_t *)rat_realloc_fn((void *)(ptr-2),newsize+sizeof(size_t)*2);

	ptr[0]=RAT_MEMORY_GUARD_WORD;
	ptr[1]=newsize;
	return (void *)(ptr+2);
}

void rat_dealloc(void *mem)
{
	size_t *ptr=mem;
	if (mem==NULL) return;
	if (ptr[-2]!=RAT_MEMORY_GUARD_WORD) return;
	amtmemalloced-=ptr[-1];
	blocksalloced--;
	extrafootprint=blocksalloced*sizeof(size_t)*2;
	rat_dealloc_fn((void *)(ptr-2));
}

unsigned int rat_blocks_alloced()	{return blocksalloced;}
unsigned int rat_mem_alloced()		{return amtmemalloced+extrafootprint;}

void rat_mem_alloced_string(char *str)
{
	size_t malc=amtmemalloced+extrafootprint;
	if (malc<1024)
		sprintf(str,"%04u Bytes",malc);
	else if (malc>=1024&&malc<1048576)
	{
		rat_real kb=(float)malc/1024.0f;
		sprintf(str,"%04.4f KB",kb);
	}
	else if (malc>=1048576&&malc<1073741824)
	{
		rat_real mb=(float)malc/1048576.0f;
		sprintf(str,"%04.4f MB",mb);
	}
	else
		sprintf(str,"overflowed");
}

// stack allocator...

const size_t rat_max_stack_cells=128;	// 128 stack cells
const size_t rat_stack_size=512*1024;	// 512KB total memory

typedef struct rat_stack_cell
{
	char *data;
	size_t size;
	int nonstackalloc;
} rat_stack_cell;

struct rat_stack_allocator
{
	unsigned int idx;
	char *data;

	unsigned int alloced;
	unsigned int most_alloced;

	unsigned int numcells;
	rat_stack_cell *cells;
};

rat_stack_allocator *rat_stack_allocator_create()
{
	rat_stack_allocator *sal=(rat_stack_allocator *)rat_alloc_fn(sizeof(rat_stack_allocator));
	memset(sal,0,sizeof(rat_stack_allocator));

	sal->alloced=0;
	sal->most_alloced=0;
	sal->idx=0;
	sal->numcells=0;

	sal->data=(char *)rat_alloc_fn(rat_stack_size);
	sal->cells=(rat_stack_cell *)rat_alloc_fn(sizeof(rat_stack_cell)*rat_max_stack_cells);

	return sal;
}

void rat_stack_allocator_destroy(rat_stack_allocator *sal)
{
	if (sal->idx||sal->numcells)
	{
		fprintf(stderr,"stack destroyed with memory still allocated!!!\n");
		fflush(stderr);
	}

	rat_dealloc_fn((void *)sal->data);
	rat_dealloc_fn((void *)sal->cells);
	rat_dealloc_fn((void *)sal);
}

void *rat_stack_alloc(rat_stack_allocator *sal,size_t size)
{
	rat_stack_cell *cell;

	if (sal->numcells>=rat_max_stack_cells)
	{
		fprintf(stderr,"stack allocator overflow!!! only %u cells allowed.\n",rat_max_stack_cells);
		fflush(stderr);
		return NULL;
	}

	cell=sal->cells+sal->numcells;
	cell->size=size;
	if (sal->idx+size>rat_stack_size)
	{
		cell->data=(char *)rat_alloc_fn(size);
		cell->nonstackalloc=1; // we did not use memory from the stack
	}
	else
	{
		cell->data=sal->data+sal->idx;
		cell->nonstackalloc=0; // we used memory from the stack
		sal->idx+=cell->size;
	}

	sal->alloced+=size;
	sal->most_alloced=sal->alloced>sal->most_alloced?sal->alloced:sal->most_alloced;
	sal->numcells++;

	amtmemalloced+=size;
	blocksalloced++;

	return (void *)cell->data;
}

void rat_stack_dealloc(rat_stack_allocator *sal,void *mem)
{
	rat_stack_cell *cell;

	if (sal->numcells==0)
	{
		fprintf(stderr,"there's nothing left to deallocate!!!\n");
		fflush(stderr);
		return;
	}

	cell=sal->cells+sal->numcells-1;
	if (mem!=(void *)cell->data)
	{
		fprintf(stderr,
			"memory at 0x%08x is not at top of stack; deallocate fail! 0x%08x is next.\n",
			(unsigned int)mem,(unsigned int)cell->data);
		fflush(stderr);
		return;
	}

	if (cell->nonstackalloc)
		rat_dealloc_fn((void *)mem);
	else
		sal->idx-=cell->size;

	sal->alloced-=cell->size;
	sal->numcells--;

	amtmemalloced-=cell->size;
	blocksalloced--;
}

unsigned int rat_stack_most_alloced(rat_stack_allocator *sal)
{
	return sal->most_alloced;
}

// block allocator...

const unsigned int rat_chunk_size=32768;
const unsigned int rat_max_block_size=16384;
const unsigned int rat_block_sizes=20;
const unsigned int rat_chunk_arr_inc=128;

typedef struct rat_block
{
	struct rat_block *next;
} rat_block;

typedef struct rat_chunk
{
	unsigned int block_size;
	rat_block *blocks;
} rat_chunk;

static unsigned int block_sizes[]={16,32,64,96,128,160,192,224,256,320,384,448,512,640,1024,2048,4096,8192,16384};

static unsigned char *block_size_lookup;
static int is_block_size_lookup_init=0;

struct rat_block_allocator
{
	unsigned int numchunks;
	rat_chunk *chunks;

	unsigned int chunk_space;
	rat_block **recycling_lists;
};

void free_bsl(void)	// we use (void) to conform to that retarded standard
					// and get rid of the associated warning message
{
	free((void *)block_size_lookup);
}

rat_block_allocator *rat_block_allocator_create()
{
	rat_block_allocator *bal=(rat_block_allocator *)rat_alloc(sizeof(rat_block_allocator));
	memset(bal,0,sizeof(rat_block_allocator));

	bal->chunk_space=rat_chunk_arr_inc;
	
	bal->numchunks=0;
	bal->chunks=(rat_chunk *)rat_alloc(sizeof(rat_chunk)*bal->chunk_space);
	bal->recycling_lists=(rat_block **)rat_alloc(sizeof(rat_block *)*rat_block_sizes);
	
	memset(bal->chunks,0,sizeof(rat_chunk)*bal->chunk_space);
	memset(bal->recycling_lists,0,sizeof(rat_block *)*rat_block_sizes);

	if (!is_block_size_lookup_init)
	{
		register unsigned int i,j=0;
		block_size_lookup=(char *)malloc(rat_max_block_size+1);
		atexit(free_bsl);
		
		for (i=1; i<rat_max_block_size+1; i++)
		{
			if (j>=rat_block_sizes)
			{
				fprintf(stderr,"blocks size list exceeds block sizes!\n");
				fflush(stderr);
			}
			if (i<=block_sizes&#91;j&#93;)
				block_size_lookup&#91;i&#93;=(unsigned char)j;
			else
				block_size_lookup&#91;i&#93;=(unsigned char)++j;
		}
		is_block_size_lookup_init=1;
	}

	return bal;
}

void rat_block_allocator_destroy(rat_block_allocator *bal)
{
	register unsigned int i;

	for (i=0; i<bal->numchunks; i++)
		rat_dealloc((void *)bal->chunks[i].blocks);
	
	rat_dealloc((void *)bal->chunks);
	rat_dealloc((void *)bal->recycling_lists);
	rat_dealloc((void *)bal);
}

void rat_block_allocator_clear(rat_block_allocator *bal)
{
	register unsigned int i;

	for (i=0; i<bal->numchunks; i++)
		rat_dealloc((void *)bal->chunks[i].blocks);
	bal->numchunks=0;

	memset(bal->chunks,0,sizeof(rat_chunk)*bal->chunk_space);
	memset(bal->recycling_lists,0,sizeof(rat_block *)*rat_block_sizes);
}

void *rat_block_alloc(rat_block_allocator *bal,size_t size)
{
	register unsigned int i,idx;

	if (size==0) return NULL;
	if (size>rat_max_block_size)
	{
		fprintf(stderr,"this block is too fucking big! you can't allocate more than %u bytes!\n",rat_max_block_size);
		fflush(stderr);
		return NULL;
	}

	idx=block_size_lookup[size];
	if (idx<0||rat_block_sizes<idx)
	{
		fprintf(stderr,"block size lookup failed!\n");
		fflush(stderr);
		return NULL;
	}

	if (bal->recycling_lists[idx]) // this is our fast memory recycler!!! look at that ultra speed!!!
	{
		rat_block *block=bal->recycling_lists[idx];
		bal->recycling_lists[idx]=block->next;

		return block;
	}
	else
	{
		register unsigned int block_size,numblocks;
		rat_chunk *chunk;
		rat_block *last;

		if (bal->numchunks==bal->chunk_space)
		{
			rat_chunk *old_chunks=bal->chunks;

			bal->chunk_space+=rat_chunk_arr_inc;
			bal->chunks=(rat_chunk *)rat_alloc(sizeof(rat_chunk)*bal->chunk_space);

			memcpy(bal->chunks,old_chunks,sizeof(rat_chunk)*bal->numchunks);
			memset(bal->chunks+bal->numchunks,0,sizeof(rat_chunk)*rat_chunk_arr_inc);
			rat_dealloc((void *)old_chunks);
		}

		chunk=bal->chunks+bal->numchunks;
		chunk->blocks=(rat_block *)rat_alloc(rat_chunk_size);

		block_size=block_sizes[idx];
		chunk->block_size=block_size;
		numblocks=rat_chunk_size/block_size;

		if (numblocks*block_size>rat_chunk_size)
		{
			fprintf(stderr,"internal memory block listing error!\n");
			fflush(stderr);
			return NULL;
		}

		for (i=0; i<numblocks-1; i++)
		{
			rat_block *thisblock=(rat_block *)((char *)chunk->blocks+block_size*i);
			rat_block *nextblock=(rat_block *)((char *)chunk->blocks+block_size*(i+1));
			thisblock->next=nextblock;
		}
		last=(rat_block *)((char *)chunk->blocks+block_size*(numblocks-1));
		last->next=NULL;

		bal->recycling_lists[idx]=chunk->blocks->next;
		bal->numchunks++;

		return chunk->blocks;
	}
}

void *rat_block_realloc(rat_block_allocator *bal,void *mem,size_t oldsize,size_t size)
{
	void *newmem;
	size_t roldsize=block_sizes[block_size_lookup[oldsize]];
	size_t rsize=block_sizes[block_size_lookup[size]];

	if (rsize==roldsize)
		return mem;
	else if (rsize>roldsize)
	{
		newmem=rat_block_alloc(bal,size);
		memcpy(newmem,mem,roldsize);
		rat_block_dealloc(bal,mem,oldsize);
	}
	else
	{
		newmem=rat_block_alloc(bal,size);
		memcpy(newmem,mem,rsize);
		rat_block_dealloc(bal,mem,oldsize);
	}

	return newmem;
}

void rat_block_dealloc(rat_block_allocator *bal,void *mem,size_t size)
{
	register unsigned int idx;
	rat_block *block;

	if (size==0||mem==NULL) return;
	if (size<0||size>rat_max_block_size)
	{
		fprintf(stderr,"block size out of range; impossible dealloc size!\n");
		fflush(stderr);
		return;
	}

	idx=block_size_lookup[size];
	if (idx<0||rat_block_sizes<idx)
	{
		fprintf(stderr,"block size lookup failed!\n");
		fflush(stderr);
		return;
	}

	block=(rat_block *)mem;
	block->next=bal->recycling_lists[idx];
	bal->recycling_lists[idx]=block;
}