/* Automatically generated by
	SmartSyntaxPluginCodeGenerator VMMaker.oscog-eem.2493 uuid: d7437cc5-75c7-4a43-af6b-0f85bf0cebe6
   from
	BitBltSimulation VMMaker.oscog-eem.2493 uuid: d7437cc5-75c7-4a43-af6b-0f85bf0cebe6
 */
static char __buildInfo[] = "BitBltSimulation VMMaker.oscog-eem.2493 uuid: d7437cc5-75c7-4a43-af6b-0f85bf0cebe6 " __DATE__ ;



#include "config.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

/* Default EXPORT macro that does nothing (see comment in sq.h): */
#define EXPORT(returnType) returnType

/* Do not include the entire sq.h file but just those parts needed. */
#include "sqConfig.h"			/* Configuration options */
#include "sqVirtualMachine.h"	/*  The virtual machine proxy definition */
#include "sqPlatformSpecific.h"	/* Platform specific definitions */

#define true 1
#define false 0
#define null 0  /* using 'null' because nil is predefined in Think C */
#ifdef SQUEAK_BUILTIN_PLUGIN
# undef EXPORT
# define EXPORT(returnType) static returnType
#endif

#include "sqAssert.h"
#ifdef ENABLE_FAST_BLT
#include "BitBltDispatch.h"
#else
// to handle the unavoidable decl in the spec of copyBitsFallback();
#define operation_t void
#endif
#include "sqMemoryAccess.h"


/*** Constants ***/
#define AllOnes 0xFFFFFFFFU
#define AlphaIndex 3
#define BBClipHeightIndex 13
#define BBClipWidthIndex 12
#define BBClipXIndex 10
#define BBClipYIndex 11
#define BBColorMapIndex 14
#define BBDestFormIndex 0
#define BBDestXIndex 4
#define BBDestYIndex 5
#define BBHalftoneFormIndex 2
#define BBHeightIndex 7
#define BBRuleIndex 3
#define BBSourceFormIndex 1
#define BBSourceXIndex 8
#define BBSourceYIndex 9
#define BBWarpBase 15
#define BBWidthIndex 6
#define BEBitBltIndex 2
#define BinaryPoint 14
#define BlueIndex 2
#define ColorMapFixedPart 2
#define ColorMapIndexedPart 4
#define ColorMapNewStyle 8
#define ColorMapPresent 1
#define FixedPt1 0x4000
#define FormBitsIndex 0
#define FormDepthIndex 3
#define FormHeightIndex 2
#define FormWidthIndex 1
#define GreenIndex 1
#define OpTableSize 43
#define PrimErrBadArgument 3
#define PrimErrCallbackError 20
#define PrimErrObjectMoved 18
#define RedIndex 0


/*** Function Prototypes ***/
static unsigned int addWordwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt affectedBottom(void);
static sqInt affectedLeft(void);
static sqInt affectedRight(void);
static sqInt affectedTop(void);
static unsigned int alphaBlendConstwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int alphaBlendConstwithpaintMode(unsigned int sourceWord, unsigned int destinationWord, sqInt paintMode);
static unsigned int alphaBlendScaledwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int alphaBlendwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int alphaPaintConstwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt alphaSourceBlendBits16(void);
static sqInt alphaSourceBlendBits32(void);
static sqInt alphaSourceBlendBits8(void);
static unsigned int bitAndInvertwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitAndwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertAndInvertwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertAndwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertDestinationwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertOrInvertwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertOrwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertSourcewith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitInvertXorwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitOrInvertwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitOrwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int bitXorwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt checkSourceOverlap(void);
static unsigned int clearWordwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt clipRange(void);
EXPORT(sqInt) copyBits(void);
static sqInt copyBitsFastPathSpecialised(void);
EXPORT(sqInt) copyBitsFromtoat(sqInt startX, sqInt stopX, sqInt yValue);
static sqInt copyBitsLockedAndClipped(void);
static sqInt copyBitsRule41Test(void);
extern void copyBitsFallback(operation_t *op, unsigned int flags);
static sqInt copyLoop(void);
static sqInt copyLoopNoSource(void);
static sqInt copyLoopPixMap(void);
static unsigned int * default8To32Table(void);
static sqInt deltaFromtonSteps(sqInt x1, sqInt x2, sqInt n);
static unsigned int destinationWordwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt destMaskAndPointerInit(void);
static unsigned int dither32To16threshold(unsigned int srcWord, sqInt ditherValue);
static sqInt drawLoopXY(sqInt xDelta, sqInt yDelta);
static unsigned int expensiveDither32To16threshold(unsigned int srcWord, sqInt ditherValue);
static sqInt fetchIntOrFloatofObject(sqInt fieldIndex, sqInt objectPointer);
static sqInt fetchIntOrFloatofObjectifNil(sqInt fieldIndex, sqInt objectPointer, sqInt defaultValue);
static unsigned int fixAlphawith(unsigned int sourceWord, unsigned int destinationWord);
EXPORT(const char*) getModuleName(void);
static sqInt ignoreSourceOrHalftone(sqInt formPointer);
static sqInt initBBOpTable(void);
static sqInt initDither8Lookup(void);
EXPORT(sqInt) initialiseModule(void);
static sqInt isIdentityMapwith(int *shifts, unsigned int *masks);
static sqInt loadBitBltDestForm(void);
EXPORT(sqInt) loadBitBltFrom(sqInt bbObj);
static sqInt loadBitBltFromwarping(sqInt bbObj, sqInt aBool);
static sqInt loadBitBltSourceForm(void);
static sqInt loadColorMap(void);
static void * loadColorMapShiftOrMaskFrom(sqInt mapOop);
static sqInt loadHalftoneForm(void);
static sqInt loadSurfacePlugin(void);
static sqInt loadWarpBltFrom(sqInt bbObj);
static sqInt lockSurfaces(void);
static sqInt mapPixelflags(sqInt sourcePixel, sqInt mapperFlags);
static unsigned int mergewith(unsigned int sourceWord, unsigned int destinationWord);
EXPORT(sqInt) moduleUnloaded(char *aModuleName);
static sqInt OLDrgbDiffwith(sqInt sourceWord, sqInt destinationWord);
static sqInt OLDtallyIntoMapwith(sqInt sourceWord, sqInt destinationWord);
static unsigned int partitionedAddtonBitscomponentMaskcarryOverflowMask(unsigned int word1, unsigned int word2, sqInt nBits, unsigned int componentMask, unsigned int carryOverflowMask);
static unsigned int partitionedANDtonBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts);
static unsigned int partitionedMaxwithnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts);
static unsigned int partitionedMinwithnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts);
static unsigned int partitionedMulwithnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts);
static unsigned int partitionedRgbComponentAlphadestnBitsnPartitions(unsigned int sourceWord, unsigned int destWord, sqInt nBits, sqInt nParts);
static unsigned int partitionedSubfromnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts);
static sqInt performCopyLoop(void);
static unsigned int pickSourcePixelsflagssrcMaskdestMasksrcShiftIncdstShiftInc(sqInt nPixels, sqInt mapperFlags, sqInt srcMask, sqInt dstMask, sqInt srcShiftInc, sqInt dstShiftInc);
static unsigned int pickWarpPixelAtXy(sqInt xx, sqInt yy);
static unsigned int pixClearwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int pixMaskwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int pixPaintwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int pixSwapwith(unsigned int sourceWord, unsigned int destWord);
EXPORT(sqInt) primitiveCompareColors(void);
EXPORT(sqInt) primitiveCopyBits(void);
EXPORT(sqInt) primitiveDisplayString(void);
EXPORT(sqInt) primitiveDrawLoop(void);
EXPORT(sqInt) primitivePixelValueAt(void);
EXPORT(sqInt) primitiveWarpBits(void);
static sqInt reloadDestAndSourceForms(void);
static unsigned int rgbAddwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt rgbComponentAlpha16(void);
static sqInt rgbComponentAlpha32(void);
static sqInt rgbComponentAlpha32with(sqInt sourceWord, sqInt destinationWord);
static sqInt rgbComponentAlpha8(void);
static sqInt rgbComponentAlphawith(sqInt sourceWord, sqInt destinationWord);
static unsigned int rgbDiffwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt rgbMap16To32(sqInt sourcePixel);
static sqInt rgbMap32To32(sqInt sourcePixel);
static sqInt rgbMapPixelflags(sqInt sourcePixel, sqInt mapperFlags);
static sqInt rgbMapfromto(sqInt sourcePixel, sqInt nBitsIn, sqInt nBitsOut);
static unsigned int rgbMaxwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int rgbMinInvertwith(unsigned int wordToInvert, unsigned int destinationWord);
static unsigned int rgbMinwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int rgbMulwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int rgbSubwith(unsigned int sourceWord, unsigned int destinationWord);
EXPORT(sqInt) setInterpreter(struct VirtualMachine *anInterpreter);
static sqInt setupColorMasks(void);
static sqInt setupColorMasksFromto(sqInt srcBits, sqInt targetBits);
static sqInt showDisplayBits(void);
static sqInt sourceSkewAndPointerInit(void);
static unsigned int sourceWordwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt sqAssert(sqInt aBool);
static unsigned int subWordwith(unsigned int sourceWord, unsigned int destinationWord);
static unsigned int tallyIntoMapwith(unsigned int sourceWord, unsigned int destinationWord);
static sqInt tryCopyingBitsQuickly(void);
static sqInt unlockSurfaces(void);
static sqInt warpBits(void);
static sqInt warpLoop(void);
static sqInt warpLoopSetup(void);
static sqInt warpPickSmoothPixelsxDeltahyDeltahxDeltavyDeltavsourceMapsmoothingdstShiftInc(sqInt nPixels, sqInt xDeltah, sqInt yDeltah, sqInt xDeltav, sqInt yDeltav, sqInt sourceMap, sqInt n, sqInt dstShiftInc);
static sqInt warpPickSourcePixelsxDeltahyDeltahxDeltavyDeltavdstShiftIncflags(sqInt nPixels, sqInt xDeltah, sqInt yDeltah, sqInt xDeltav, sqInt yDeltav, sqInt dstShiftInc, sqInt mapperFlags);


/*** Variables ***/
static sqInt affectedB;
static sqInt affectedL;
static sqInt affectedR;
static sqInt affectedT;
static int bbH;
static int bbW;
static sqInt bitBltIsReceiver;
static sqInt bitBltOop;
static sqInt bitCount;
static sqInt clipHeight;
static sqInt clipWidth;
static sqInt clipX;
static sqInt clipY;
static sqInt cmBitsPerColor;
static sqInt cmFlags;
static unsigned int * cmLookupTable;
static sqInt cmMask;
static unsigned int * cmMaskTable;
static int * cmShiftTable;
static sqInt combinationRule;
static sqInt componentAlphaModeAlpha;
static sqInt componentAlphaModeColor;
static sqInt destBits;
static sqInt destDelta;
static int destDepth;
static sqInt destForm;
static int destHeight;
static usqInt destIndex;
static sqInt destMask;
static int destMSB;
static int destPitch;
static sqInt destPPW;
static int destWidth;
static sqInt destX;
static sqInt destY;
static  unsigned char dither8Lookup[4096];
static const int ditherMatrix4x4[16] = {
0,	8,	2,	10,
12,	4,	14,	6,
3,	11,	1,	9,
15,	7,	13,	5
};
static const int ditherThresholds16[8] = { 0, 2, 4, 6, 8, 12, 14, 16 };
static const int ditherValues16[32] = {
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
};
static sqInt dstBitShift;
static int dx;
static int dy;
static usqInt endOfDestination;
static usqInt endOfSource;
static unsigned char * gammaLookupTable;
static sqInt halftoneBase;
static sqInt halftoneForm;
static sqInt halftoneHeight;
static sqInt hasSurfaceLock;
static sqInt hDir;
static sqInt height;

#if !defined(SQUEAK_BUILTIN_PLUGIN)
static sqInt (*byteSizeOf)(sqInt oop);
static sqInt (*failed)(void);
static sqInt (*fetchIntegerofObject)(sqInt fieldIndex, sqInt objectPointer);
static sqInt (*fetchLong32ofObject)(sqInt fieldIndex, sqInt oop);
static sqInt (*fetchPointerofObject)(sqInt index, sqInt oop);
static void * (*firstIndexableField)(sqInt oop);
static double (*floatValueOf)(sqInt oop);
static sqInt (*integerObjectOf)(sqInt value);
static sqInt (*integerValueOf)(sqInt oop);
static void * (*ioLoadFunctionFrom)(char *functionName, char *moduleName);
static sqInt (*isArray)(sqInt oop);
static sqInt (*isBytes)(sqInt oop);
static sqInt (*isIntegerObject)(sqInt objectPointer);
static sqInt (*isPointers)(sqInt oop);
static sqInt (*isPositiveMachineIntegerObject)(sqInt oop);
static sqInt (*isWords)(sqInt oop);
static sqInt (*isWordsOrBytes)(sqInt oop);
static sqInt (*methodArgumentCount)(void);
static sqInt (*methodReturnInteger)(sqInt integer);
static sqInt (*methodReturnReceiver)(void);
static sqInt (*nilObject)(void);
static sqInt (*pop)(sqInt nItems);
static void (*popthenPush)(sqInt nItems, sqInt oop);
static sqInt (*positive32BitIntegerFor)(unsigned int integerValue);
static usqInt (*positive32BitValueOf)(sqInt oop);
static usqLong (*positive64BitValueOf)(sqInt oop);
static sqInt (*primitiveFail)(void);
static sqInt (*primitiveFailFor)(sqInt reasonCode);
static sqInt (*showDisplayBitsLeftTopRightBottom)(sqInt aForm, sqInt l, sqInt t, sqInt r, sqInt b);
static sqInt (*slotSizeOf)(sqInt oop);
static sqInt (*stackIntegerValue)(sqInt offset);
static sqInt (*stackObjectValue)(sqInt offset);
static sqInt (*stackValue)(sqInt offset);
static sqInt (*statNumGCs)(void);
static sqInt (*storeIntegerofObjectwithValue)(sqInt index, sqInt oop, sqInt integer);
#else /* !defined(SQUEAK_BUILTIN_PLUGIN) */
extern sqInt byteSizeOf(sqInt oop);
extern sqInt failed(void);
extern sqInt fetchIntegerofObject(sqInt fieldIndex, sqInt objectPointer);
extern sqInt fetchLong32ofObject(sqInt fieldIndex, sqInt oop);
extern sqInt fetchPointerofObject(sqInt index, sqInt oop);
extern void * firstIndexableField(sqInt oop);
extern double floatValueOf(sqInt oop);
extern sqInt integerObjectOf(sqInt value);
extern sqInt integerValueOf(sqInt oop);
extern void * ioLoadFunctionFrom(char *functionName, char *moduleName);
extern sqInt isArray(sqInt oop);
extern sqInt isBytes(sqInt oop);
#if !defined(isIntegerObject)
extern sqInt isIntegerObject(sqInt objectPointer);
#endif
extern sqInt isPointers(sqInt oop);
#if VM_PROXY_MAJOR > 1 || (VM_PROXY_MAJOR == 1 && VM_PROXY_MINOR >= 15)
extern sqInt isPositiveMachineIntegerObject(sqInt oop);
#else
# define isPositiveMachineIntegerObject(oop) 0
#endif
extern sqInt isWords(sqInt oop);
extern sqInt isWordsOrBytes(sqInt oop);
extern sqInt methodArgumentCount(void);
extern sqInt methodReturnInteger(sqInt integer);
extern sqInt methodReturnReceiver(void);
extern sqInt nilObject(void);
extern sqInt pop(sqInt nItems);
extern void popthenPush(sqInt nItems, sqInt oop);
extern sqInt positive32BitIntegerFor(unsigned int integerValue);
extern usqInt positive32BitValueOf(sqInt oop);
extern usqLong positive64BitValueOf(sqInt oop);
extern sqInt primitiveFail(void);
extern sqInt primitiveFailFor(sqInt reasonCode);
extern sqInt showDisplayBitsLeftTopRightBottom(sqInt aForm, sqInt l, sqInt t, sqInt r, sqInt b);
extern sqInt slotSizeOf(sqInt oop);
extern sqInt stackIntegerValue(sqInt offset);
extern sqInt stackObjectValue(sqInt offset);
extern sqInt stackValue(sqInt offset);
#if VM_PROXY_MAJOR > 1 || (VM_PROXY_MAJOR == 1 && VM_PROXY_MINOR >= 14)
extern sqInt statNumGCs(void);
#else
# define statNumGCs() 0
#endif
extern sqInt storeIntegerofObjectwithValue(sqInt index, sqInt oop, sqInt integer);
extern
#endif
struct VirtualMachine* interpreterProxy;
static sqInt isWarping;
static sqIntptr_t (*lockSurfaceFn)(sqIntptr_t, int*, int, int, int, int);
static sqInt mask1;
static sqInt mask2;
static int maskTable[33] = {
0, 1, 3, 0, 15, 31, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 65535,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1
};
static const char *moduleName =
#ifdef SQUEAK_BUILTIN_PLUGIN
	"BitBltPlugin VMMaker.oscog-eem.2493 (i)"
#else
	"BitBltPlugin VMMaker.oscog-eem.2493 (e)"
#endif
;
static sqInt noHalftone;
static sqInt noSource;
static sqInt numGCsOnInvocation;
static sqInt nWords;
static void *opTable[43];
static sqInt preload;
static int (*querySurfaceFn)(sqIntptr_t, int*, int*, int*, int*);
static sqInt skew;
static sqInt sourceAlpha;
static sqInt sourceBits;
static sqInt sourceDelta;
static int sourceDepth;
static sqInt sourceForm;
static int sourceHeight;
static usqInt sourceIndex;
static int sourceMSB;
static int sourcePitch;
static sqInt sourcePPW;
static int sourceWidth;
static sqInt sourceX;
static sqInt sourceY;
static sqInt srcBitShift;
static int sx;
static int sy;
static unsigned char * ungammaLookupTable;
static int (*unlockSurfaceFn)(sqIntptr_t, int, int, int, int);
static sqInt vDir;
static sqInt warpAlignMask;
static sqInt warpAlignShift;
static int warpBitShiftTable[32];
static sqInt warpSrcMask;
static sqInt warpSrcShift;
static sqInt width;


	/* BitBltSimulation>>#addWord:with: */
static unsigned int
addWordwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord + destinationWord;
}

	/* BitBltSimulation>>#affectedBottom */
static sqInt
affectedBottom(void)
{
	return affectedB;
}

	/* BitBltSimulation>>#affectedLeft */
static sqInt
affectedLeft(void)
{
	return affectedL;
}

	/* BitBltSimulation>>#affectedRight */
static sqInt
affectedRight(void)
{
	return affectedR;
}

	/* BitBltSimulation>>#affectedTop */
static sqInt
affectedTop(void)
{
	return affectedT;
}

	/* BitBltSimulation>>#alphaBlendConst:with: */
static unsigned int
alphaBlendConstwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return alphaBlendConstwithpaintMode(sourceWord, destinationWord, 0);
}


/*	Blend sourceWord with destinationWord using a constant alpha.
	Alpha is encoded as 0 meaning 0.0, and 255 meaning 1.0.
	The blend produced is alpha*source + (1.0-alpha)*dest, with the
	computation being performed independently on each color component.
	This function could eventually blend into any depth destination,
	using the same color averaging and mapping as warpBlt.
	paintMode = true means do nothing if the source pixel value is zero. */
/*	This first implementation works with dest depths of 16 and 32 bits only.
	Normal color mapping will allow sources of lower depths in this case,
	and results can be mapped directly by truncation, so no extra color maps
	are needed.
	To allow storing into any depth will require subsequent addition of two
	other colormaps, as is the case with WarpBlt.
 */

	/* BitBltSimulation>>#alphaBlendConst:with:paintMode: */
static unsigned int
alphaBlendConstwithpaintMode(unsigned int sourceWord, unsigned int destinationWord, sqInt paintMode)
{
	sqInt bitsPerColor;
	unsigned int blend;
	unsigned int blendAG;
	unsigned int blendRB;
	unsigned int destPixVal;
	unsigned int destShifted;
	sqInt i;
	sqInt j;
	unsigned int maskShifted;
	unsigned int pixBlend;
	unsigned int pixMask;
	unsigned int result;
	unsigned int rgbMask;
	sqInt shift;
	unsigned int sourcePixVal;
	unsigned int sourceShifted;
	unsigned int unAlpha;

	if (destDepth < 16) {
		return destinationWord;
	}
	unAlpha = 0xFF - sourceAlpha;
	result = destinationWord;
	if (destPPW == 1) {

		/* 32bpp blends include alpha */
		if (!(paintMode && (sourceWord == 0))) {

			/* painting a transparent pixel */

			/* blendRB red and blue */
			blendRB = (((sourceWord & 16711935) * sourceAlpha) + ((destinationWord & 16711935) * unAlpha)) + 16711935;

			/* blendRB alpha and green */
			blendAG = ((((((usqInt) sourceWord) >> 8) & 16711935) * sourceAlpha) + (((((usqInt) destinationWord) >> 8) & 16711935) * unAlpha)) + 16711935;

			/* divide by 255 */
			blendRB = (((usqInt) (blendRB + ((((usqInt) (blendRB - 65537)) >> 8) & 16711935))) >> 8) & 16711935;
			blendAG = (((usqInt) (blendAG + ((((usqInt) (blendAG - 65537)) >> 8) & 16711935))) >> 8) & 16711935;
			result = blendRB | (((usqInt)(blendAG) << 8));
		}
	}
	else {
		pixMask = maskTable[destDepth];
		bitsPerColor = 5;
		rgbMask = 0x1F;
		maskShifted = destMask;
		destShifted = destinationWord;
		sourceShifted = sourceWord;
		for (j = 1; j <= destPPW; j += 1) {
			sourcePixVal = sourceShifted & pixMask;
			if (!(((maskShifted & pixMask) == 0)
				 || (paintMode && (sourcePixVal == 0)))) {
				destPixVal = destShifted & pixMask;
				pixBlend = 0;
				for (i = 1; i <= 3; i += 1) {
					shift = (i - 1) * bitsPerColor;
					blend = ((((((((usqInt) sourcePixVal) >> shift) & rgbMask) * sourceAlpha) + (((((usqInt) destPixVal) >> shift) & rgbMask) * unAlpha)) + 0xFE) / 0xFF) & rgbMask;
					pixBlend = pixBlend | (((usqInt)(blend) << shift));
				}
				result = (result & ((unsigned int)~(((usqInt)(pixMask) << ((j - 1) * 16))))) | (((usqInt)(pixBlend) << ((j - 1) * 16)));
			}
			maskShifted = ((usqInt) maskShifted) >> destDepth;
			sourceShifted = ((usqInt) sourceShifted) >> destDepth;
			destShifted = ((usqInt) destShifted) >> destDepth;
		}
	}
	return result;
}


/*	Blend sourceWord with destinationWord using the alpha value from
	sourceWord. Alpha is encoded as 0 meaning 0.0, and 255 meaning 1.0.
	In contrast to alphaBlend:with: the color produced is
	
	srcColor + (1-srcAlpha) * dstColor
	
	e.g., it is assumed that the source color is already scaled.
 */

	/* BitBltSimulation>>#alphaBlendScaled:with: */
static unsigned int
alphaBlendScaledwith(unsigned int sourceWord, unsigned int destinationWord)
{
	unsigned int ag;
	unsigned int rb;
	unsigned int unAlpha;


	/* Do NOT inline this into optimized loops */

	/* High 8 bits of source pixel is source opacity (ARGB format) */
	unAlpha = 0xFF - (((usqInt) sourceWord) >> 24);

	/* blend red and blue components */
	rb = ((((usqInt) ((destinationWord & 16711935) * unAlpha)) >> 8) & 16711935) + (sourceWord & 16711935);

	/* blend alpha and green components */
	ag = ((((usqInt) (((((usqInt) destinationWord) >> 8) & 16711935) * unAlpha)) >> 8) & 16711935) + ((((usqInt) sourceWord) >> 8) & 16711935);

	/* saturate red and blue components if there is a carry */
	rb = (rb & 16711935) | (((usqInt) ((rb & 16777472) * 0xFF)) >> 8);

	/* saturate alpha and green components if there is a carry */
	ag = (((usqInt)((ag & 16711935)) << 8)) | ((ag & 16777472) * 0xFF);
	return ag | rb;
}


/*	Blend sourceWord with destinationWord, assuming both are 32-bit pixels.
	The source is assumed to have 255*alpha in the high 8 bits of each pixel,
	while the high 8 bits of the destinationWord will be ignored.
	The blend produced is alpha*source + (1-alpha)*dest, with
	the computation being performed independently on each color
	component. The high byte of the result will be 0. */

	/* BitBltSimulation>>#alphaBlend:with: */
static unsigned int
alphaBlendwith(unsigned int sourceWord, unsigned int destinationWord)
{
	unsigned int alpha;
	unsigned int blendAG;
	unsigned int blendRB;
	unsigned int result;
	unsigned int unAlpha;


	/* High 8 bits of source pixel */
	alpha = ((usqInt) sourceWord) >> 24;
	if (alpha == 0) {
		return destinationWord;
	}
	if (alpha == 0xFF) {
		return sourceWord;
	}
	unAlpha = 0xFF - alpha;

	/* blend red and blue */
	blendRB = (((sourceWord & 16711935) * alpha) + ((destinationWord & 16711935) * unAlpha)) + 16711935;

	/* blend alpha and green */
	blendAG = (((((((usqInt) sourceWord) >> 8) | 0xFF0000) & 16711935) * alpha) + (((((usqInt) destinationWord) >> 8) & 16711935) * unAlpha)) + 16711935;

	/* divide by 255 */
	blendRB = (((usqInt) (blendRB + ((((usqInt) (blendRB - 65537)) >> 8) & 16711935))) >> 8) & 16711935;
	blendAG = (((usqInt) (blendAG + ((((usqInt) (blendAG - 65537)) >> 8) & 16711935))) >> 8) & 16711935;
	result = blendRB | (((usqInt)(blendAG) << 8));
	return result;
}

	/* BitBltSimulation>>#alphaPaintConst:with: */
static unsigned int
alphaPaintConstwith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (sourceWord == 0) {
		return destinationWord;
	}
	return alphaBlendConstwithpaintMode(sourceWord, destinationWord, 1);
}


/*	This version assumes 
	combinationRule = 34
	sourcePixSize = 32
	destPixSize = 16
	sourceForm ~= destForm.
	 */

	/* BitBltSimulation>>#alphaSourceBlendBits16 */
static sqInt
alphaSourceBlendBits16(void)
{
	sqInt addThreshold;
	sqInt addThreshold1;
	int deltaX;
	int deltaY;
	unsigned int destWord;
	int ditherBase;
	int ditherIndex;
	int ditherThreshold;
	sqInt dstIndex;
	unsigned int dstMask;
	sqInt dstValue;
	sqInt dstValue1;
	int dstY;
	unsigned int sourceWord;
	unsigned int srcAlpha;
	sqInt srcIndex;
	int srcShift;
	int srcY;


	/* This particular method should be optimized in itself */

	/* So we can pre-decrement */
	deltaY = bbH + 1;
	srcY = sy;
	dstY = dy;
	srcShift = (dx & 1) * 16;
	if (destMSB) {
		srcShift = 16 - srcShift;
	}

	/* This is the outer loop */
	mask1 = 0xFFFFU << (16 - srcShift);
	while (((deltaY -= 1)) != 0) {
		srcIndex = (sourceBits + (srcY * sourcePitch)) + (sx * 4);
		dstIndex = (destBits + (dstY * destPitch)) + ((dx / 2) * 4);
		ditherBase = (dstY & 3) * 4;

		/* For pre-increment */
		ditherIndex = (sx & 3) - 1;

		/* So we can pre-decrement */
		deltaX = bbW + 1;
		dstMask = mask1;
		if (dstMask == 0xFFFF) {
			srcShift = 16;
		}
		else {
			srcShift = 0;
		}
		while (((deltaX -= 1)) != 0) {
			ditherThreshold = ditherMatrix4x4[ditherBase + ((ditherIndex = (ditherIndex + 1) & 3))];
			assert((((usqInt)srcIndex)) < endOfSource);
			sourceWord = long32At(srcIndex);
			srcAlpha = ((usqInt) sourceWord) >> 24;
			if (srcAlpha == 0xFF) {

				/* Dither from 32 to 16 bit */
				/* begin dither32To16:threshold: */
				addThreshold = ((usqInt) ditherThreshold << 8);
				sourceWord = ((((usqInt) (dither8Lookup[addThreshold + ((((usqInt) sourceWord >> 16)) & 0xFF)]) << 10)) + (((usqInt) (dither8Lookup[addThreshold + ((((usqInt) sourceWord >> 8)) & 0xFF)]) << 5))) + (dither8Lookup[addThreshold + (sourceWord & 0xFF)]);
				if (sourceWord == 0) {
					sourceWord = 1U << srcShift;
				}
				else {
					sourceWord = ((usqInt)(sourceWord) << srcShift);
				}
				/* begin dstLongAt:put:mask: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				dstValue = long32At(dstIndex);
				dstValue = dstValue & dstMask;
				dstValue = dstValue | sourceWord;
				/* begin dstLongAt:put: */
				long32Atput(dstIndex, dstValue);
			}
			else {

				/* srcAlpha ~= 255 */
				if (!(srcAlpha == 0)) {

					/* 0 < srcAlpha < 255 */
					/* If we have to mix colors then just copy a single word */
					assert((((usqInt)dstIndex)) < endOfDestination);
					destWord = long32At(dstIndex);
					destWord = destWord & ((unsigned int)~dstMask);

					/* Expand from 16 to 32 bit by adding zero bits */
					destWord = ((usqInt) destWord) >> srcShift;

					/* Mix colors */
					destWord = ((((usqInt) (destWord & 0x7C00) << 9)) | (((usqInt) (destWord & 0x3E0) << 6))) | ((((usqInt) (destWord & 0x1F) << 3)) | 0xFF000000U);

					/* And dither */
					sourceWord = alphaBlendScaledwith(sourceWord, destWord);
					/* begin dither32To16:threshold: */
					addThreshold1 = ((usqInt) ditherThreshold << 8);
					sourceWord = ((((usqInt) (dither8Lookup[addThreshold1 + ((((usqInt) sourceWord >> 16)) & 0xFF)]) << 10)) + (((usqInt) (dither8Lookup[addThreshold1 + ((((usqInt) sourceWord >> 8)) & 0xFF)]) << 5))) + (dither8Lookup[addThreshold1 + (sourceWord & 0xFF)]);
					if (sourceWord == 0) {
						sourceWord = 1U << srcShift;
					}
					else {
						sourceWord = ((usqInt)(sourceWord) << srcShift);
					}
					/* begin dstLongAt:put:mask: */
					assert((((usqInt)dstIndex)) < endOfDestination);
					dstValue1 = long32At(dstIndex);
					dstValue1 = dstValue1 & dstMask;
					dstValue1 = dstValue1 | sourceWord;
					/* begin dstLongAt:put: */
					long32Atput(dstIndex, dstValue1);
				}
			}
			srcIndex += 4;
			if (destMSB) {
				if (srcShift == 0) {
					dstIndex += 4;
				}
			}
			else {
				if (!(srcShift == 0)) {
					dstIndex += 4;
				}
			}

			/* Toggle between 0 and 16 */
			srcShift = srcShift ^ 16;
			dstMask = (unsigned int)~dstMask;
		}
		srcY += 1;
		dstY += 1;
	}
	return 0;
}


/*	This version assumes 
	combinationRule = 34
	sourcePixSize = destPixSize = 32
	sourceForm ~= destForm.
	Note: The inner loop has been optimized for dealing
	with the special cases of srcAlpha = 0.0 and srcAlpha = 1.0 
	 */

	/* BitBltSimulation>>#alphaSourceBlendBits32 */
static sqInt
alphaSourceBlendBits32(void)
{
	int deltaX;
	int deltaY;
	unsigned int destWord;
	sqInt dstIndex;
	int dstY;
	unsigned int sourceWord;
	unsigned int srcAlpha;
	sqInt srcIndex;
	int srcY;


	/* This particular method should be optimized in itself */
	/* Give the compile a couple of hints */
	/* The following should be declared as pointers so the compiler will
	   notice that they're used for accessing memory locations
	   (good to know on an Intel architecture) but then the increments
	   would be different between ST code and C code so must hope the
	   compiler notices what happens (MS Visual C does) */

	/* So we can pre-decrement */
	deltaY = bbH + 1;
	srcY = sy;

	/* This is the outer loop */
	dstY = dy;
	while (((deltaY -= 1)) != 0) {
		srcIndex = (sourceBits + (srcY * sourcePitch)) + (sx * 4);
		dstIndex = (destBits + (dstY * destPitch)) + (dx * 4);

		/* So we can pre-decrement */
		/* This is the inner loop */
		deltaX = bbW + 1;
		while (((deltaX -= 1)) != 0) {
			assert((((usqInt)srcIndex)) < endOfSource);
			sourceWord = long32At(srcIndex);
			srcAlpha = ((usqInt) sourceWord) >> 24;
			if (srcAlpha == 0xFF) {
				long32Atput(dstIndex, sourceWord);
				srcIndex += 4;

				/* Now copy as many words as possible with alpha = 255 */
				dstIndex += 4;
				while ((((deltaX -= 1)) != 0)
				 && ((((usqInt) ((assert((((usqInt)srcIndex)) < endOfSource),
				(sourceWord = long32At(srcIndex))))) >> 24) == 0xFF)) {
					long32Atput(dstIndex, sourceWord);
					srcIndex += 4;
					dstIndex += 4;
				}
				deltaX += 1;
			}
			else {

				/* srcAlpha ~= 255 */
				if (srcAlpha == 0) {
					srcIndex += 4;

					/* Now skip as many words as possible, */
					dstIndex += 4;
					while ((((deltaX -= 1)) != 0)
					 && ((((usqInt) ((assert((((usqInt)srcIndex)) < endOfSource),
					(sourceWord = long32At(srcIndex))))) >> 24) == 0)) {
						srcIndex += 4;
						dstIndex += 4;
					}
					deltaX += 1;
				}
				else {

					/* 0 < srcAlpha < 255 */
					/* If we have to mix colors then just copy a single word */
					assert((((usqInt)dstIndex)) < endOfDestination);
					destWord = long32At(dstIndex);
					destWord = alphaBlendScaledwith(sourceWord, destWord);
					long32Atput(dstIndex, destWord);
					srcIndex += 4;
					dstIndex += 4;
				}
			}
		}
		srcY += 1;
		dstY += 1;
	}
	return 0;
}


/*	This version assumes 
	combinationRule = 34
	sourcePixSize = 32
	destPixSize = 8
	sourceForm ~= destForm.
	Note: This is not real blending since we don't have the source colors
	available.  */

	/* BitBltSimulation>>#alphaSourceBlendBits8 */
static sqInt
alphaSourceBlendBits8(void)
{
	sqInt adjust;
	int deltaX;
	int deltaY;
	unsigned int destWord;
	sqInt dstIndex;
	unsigned int dstMask;
	sqInt dstValue;
	int dstY;
	sqInt mapperFlags;
	unsigned int *mappingTable;
	sqInt pv;
	unsigned int sourceWord;
	unsigned int srcAlpha;
	sqInt srcIndex;
	sqInt srcShift;
	int srcY;
	static unsigned int theTable[256] = { 
0x0, 0xFF000001, 0xFFFFFFFF, 0xFF808080, 0xFFFF0000, 0xFF00FF00, 0xFF0000FF, 0xFF00FFFF, 
0xFFFFFF00, 0xFFFF00FF, 0xFF202020, 0xFF404040, 0xFF606060, 0xFF9F9F9F, 0xFFBFBFBF, 0xFFDFDFDF, 
0xFF080808, 0xFF101010, 0xFF181818, 0xFF282828, 0xFF303030, 0xFF383838, 0xFF484848, 0xFF505050, 
0xFF585858, 0xFF686868, 0xFF707070, 0xFF787878, 0xFF878787, 0xFF8F8F8F, 0xFF979797, 0xFFA7A7A7, 
0xFFAFAFAF, 0xFFB7B7B7, 0xFFC7C7C7, 0xFFCFCFCF, 0xFFD7D7D7, 0xFFE7E7E7, 0xFFEFEFEF, 0xFFF7F7F7, 
0xFF000001, 0xFF003300, 0xFF006600, 0xFF009900, 0xFF00CC00, 0xFF00FF00, 0xFF000033, 0xFF003333, 
0xFF006633, 0xFF009933, 0xFF00CC33, 0xFF00FF33, 0xFF000066, 0xFF003366, 0xFF006666, 0xFF009966, 
0xFF00CC66, 0xFF00FF66, 0xFF000099, 0xFF003399, 0xFF006699, 0xFF009999, 0xFF00CC99, 0xFF00FF99, 
0xFF0000CC, 0xFF0033CC, 0xFF0066CC, 0xFF0099CC, 0xFF00CCCC, 0xFF00FFCC, 0xFF0000FF, 0xFF0033FF, 
0xFF0066FF, 0xFF0099FF, 0xFF00CCFF, 0xFF00FFFF, 0xFF330000, 0xFF333300, 0xFF336600, 0xFF339900, 
0xFF33CC00, 0xFF33FF00, 0xFF330033, 0xFF333333, 0xFF336633, 0xFF339933, 0xFF33CC33, 0xFF33FF33, 
0xFF330066, 0xFF333366, 0xFF336666, 0xFF339966, 0xFF33CC66, 0xFF33FF66, 0xFF330099, 0xFF333399, 
0xFF336699, 0xFF339999, 0xFF33CC99, 0xFF33FF99, 0xFF3300CC, 0xFF3333CC, 0xFF3366CC, 0xFF3399CC, 
0xFF33CCCC, 0xFF33FFCC, 0xFF3300FF, 0xFF3333FF, 0xFF3366FF, 0xFF3399FF, 0xFF33CCFF, 0xFF33FFFF, 
0xFF660000, 0xFF663300, 0xFF666600, 0xFF669900, 0xFF66CC00, 0xFF66FF00, 0xFF660033, 0xFF663333, 
0xFF666633, 0xFF669933, 0xFF66CC33, 0xFF66FF33, 0xFF660066, 0xFF663366, 0xFF666666, 0xFF669966, 
0xFF66CC66, 0xFF66FF66, 0xFF660099, 0xFF663399, 0xFF666699, 0xFF669999, 0xFF66CC99, 0xFF66FF99, 
0xFF6600CC, 0xFF6633CC, 0xFF6666CC, 0xFF6699CC, 0xFF66CCCC, 0xFF66FFCC, 0xFF6600FF, 0xFF6633FF, 
0xFF6666FF, 0xFF6699FF, 0xFF66CCFF, 0xFF66FFFF, 0xFF990000, 0xFF993300, 0xFF996600, 0xFF999900, 
0xFF99CC00, 0xFF99FF00, 0xFF990033, 0xFF993333, 0xFF996633, 0xFF999933, 0xFF99CC33, 0xFF99FF33, 
0xFF990066, 0xFF993366, 0xFF996666, 0xFF999966, 0xFF99CC66, 0xFF99FF66, 0xFF990099, 0xFF993399, 
0xFF996699, 0xFF999999, 0xFF99CC99, 0xFF99FF99, 0xFF9900CC, 0xFF9933CC, 0xFF9966CC, 0xFF9999CC, 
0xFF99CCCC, 0xFF99FFCC, 0xFF9900FF, 0xFF9933FF, 0xFF9966FF, 0xFF9999FF, 0xFF99CCFF, 0xFF99FFFF, 
0xFFCC0000, 0xFFCC3300, 0xFFCC6600, 0xFFCC9900, 0xFFCCCC00, 0xFFCCFF00, 0xFFCC0033, 0xFFCC3333, 
0xFFCC6633, 0xFFCC9933, 0xFFCCCC33, 0xFFCCFF33, 0xFFCC0066, 0xFFCC3366, 0xFFCC6666, 0xFFCC9966, 
0xFFCCCC66, 0xFFCCFF66, 0xFFCC0099, 0xFFCC3399, 0xFFCC6699, 0xFFCC9999, 0xFFCCCC99, 0xFFCCFF99, 
0xFFCC00CC, 0xFFCC33CC, 0xFFCC66CC, 0xFFCC99CC, 0xFFCCCCCC, 0xFFCCFFCC, 0xFFCC00FF, 0xFFCC33FF, 
0xFFCC66FF, 0xFFCC99FF, 0xFFCCCCFF, 0xFFCCFFFF, 0xFFFF0000, 0xFFFF3300, 0xFFFF6600, 0xFFFF9900, 
0xFFFFCC00, 0xFFFFFF00, 0xFFFF0033, 0xFFFF3333, 0xFFFF6633, 0xFFFF9933, 0xFFFFCC33, 0xFFFFFF33, 
0xFFFF0066, 0xFFFF3366, 0xFFFF6666, 0xFFFF9966, 0xFFFFCC66, 0xFFFFFF66, 0xFFFF0099, 0xFFFF3399, 
0xFFFF6699, 0xFFFF9999, 0xFFFFCC99, 0xFFFFFF99, 0xFFFF00CC, 0xFFFF33CC, 0xFFFF66CC, 0xFFFF99CC, 
0xFFFFCCCC, 0xFFFFFFCC, 0xFFFF00FF, 0xFFFF33FF, 0xFFFF66FF, 0xFFFF99FF, 0xFFFFCCFF, 0xFFFFFFFF};;
	sqInt val;

	mappingTable = theTable;
	mapperFlags = cmFlags & ((unsigned int)~ColorMapNewStyle);

	/* So we can pre-decrement */
	deltaY = bbH + 1;
	srcY = sy;
	dstY = dy;
	mask1 = (dx & 3) * 8;
	if (destMSB) {
		mask1 = 24 - mask1;
	}
	mask2 = AllOnes ^ (0xFFU << mask1);
	if ((dx & 1) == 0) {
		adjust = 0;
	}
	else {
		adjust = 522133279;
	}
	if ((dy & 1) == 0) {
		adjust = adjust ^ 522133279;
	}
	while (((deltaY -= 1)) != 0) {
		adjust = adjust ^ 522133279;
		srcIndex = (sourceBits + (srcY * sourcePitch)) + (sx * 4);
		dstIndex = (destBits + (dstY * destPitch)) + ((dx / 4) * 4);

		/* So we can pre-decrement */
		deltaX = bbW + 1;
		srcShift = mask1;

		/* This is the inner loop */
		dstMask = mask2;
		while (((deltaX -= 1)) != 0) {
			sourceWord = (((assert((((usqInt)srcIndex)) < endOfSource),
long32At(srcIndex))) & ((unsigned int)~adjust)) + adjust;
			srcAlpha = ((usqInt) sourceWord) >> 24;
			if (srcAlpha > 0x1F) {

				/* Everything below 31 is transparent */
				if (srcAlpha < 224) {

					/* Everything above 224 is opaque */
					assert((((usqInt)dstIndex)) < endOfDestination);
					destWord = long32At(dstIndex);
					destWord = destWord & ((unsigned int)~dstMask);
					destWord = ((usqInt) destWord) >> srcShift;
					destWord = mappingTable[destWord];
					sourceWord = alphaBlendScaledwith(sourceWord, destWord);
				}
				/* begin mapPixel:flags: */
				pv = sourceWord;
				if ((mapperFlags & ColorMapPresent) != 0) {
					if ((mapperFlags & ColorMapFixedPart) != 0) {
						/* begin rgbMapPixel:flags: */
						val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourceWord & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
						val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourceWord & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
						val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourceWord & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
						pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourceWord & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
						if ((pv == 0)
						 && (sourceWord != 0)) {
							pv = 1;
						}
					}
					if ((mapperFlags & ColorMapIndexedPart) != 0) {
						pv = cmLookupTable[pv & cmMask];
					}
				}
				sourceWord = pv;

				/* Store back */
				sourceWord = ((usqInt)(sourceWord) << srcShift);
				/* begin dstLongAt:put:mask: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				dstValue = long32At(dstIndex);
				dstValue = dstValue & dstMask;
				dstValue = dstValue | sourceWord;
				/* begin dstLongAt:put: */
				long32Atput(dstIndex, dstValue);
			}
			srcIndex += 4;
			if (destMSB) {
				if (srcShift == 0) {
					dstIndex += 4;
					srcShift = 24;
					dstMask = 0xFFFFFF;
				}
				else {
					srcShift -= 8;
					dstMask = (((usqInt) dstMask) >> 8) | 0xFF000000U;
				}
			}
			else {
				if (srcShift == 24) {
					dstIndex += 4;
					srcShift = 0;
					dstMask = 0xFFFFFF00U;
				}
				else {
					srcShift += 8;
					dstMask = (((usqInt)(dstMask) << 8)) | 0xFF;
				}
			}
			adjust = adjust ^ 522133279;
		}
		srcY += 1;
		dstY += 1;
	}
	return 0;
}

	/* BitBltSimulation>>#bitAndInvert:with: */
static unsigned int
bitAndInvertwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord & ((unsigned int)~destinationWord);
}

	/* BitBltSimulation>>#bitAnd:with: */
static unsigned int
bitAndwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord & destinationWord;
}

	/* BitBltSimulation>>#bitInvertAndInvert:with: */
static unsigned int
bitInvertAndInvertwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return ((unsigned int)~sourceWord) & ((unsigned int)~destinationWord);
}

	/* BitBltSimulation>>#bitInvertAnd:with: */
static unsigned int
bitInvertAndwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return ((unsigned int)~sourceWord) & destinationWord;
}

	/* BitBltSimulation>>#bitInvertDestination:with: */
static unsigned int
bitInvertDestinationwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return (unsigned int)~destinationWord;
}

	/* BitBltSimulation>>#bitInvertOrInvert:with: */
static unsigned int
bitInvertOrInvertwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return ((unsigned int)~sourceWord) | ((unsigned int)~destinationWord);
}

	/* BitBltSimulation>>#bitInvertOr:with: */
static unsigned int
bitInvertOrwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return ((unsigned int)~sourceWord) | destinationWord;
}

	/* BitBltSimulation>>#bitInvertSource:with: */
static unsigned int
bitInvertSourcewith(unsigned int sourceWord, unsigned int destinationWord)
{
	return (unsigned int)~sourceWord;
}

	/* BitBltSimulation>>#bitInvertXor:with: */
static unsigned int
bitInvertXorwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return ((unsigned int)~sourceWord) ^ destinationWord;
}

	/* BitBltSimulation>>#bitOrInvert:with: */
static unsigned int
bitOrInvertwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord | ((unsigned int)~destinationWord);
}

	/* BitBltSimulation>>#bitOr:with: */
static unsigned int
bitOrwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord | destinationWord;
}

	/* BitBltSimulation>>#bitXor:with: */
static unsigned int
bitXorwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord ^ destinationWord;
}


/*	check for possible overlap of source and destination */
/*	ar 10/19/1999: This method requires surfaces to be locked. */

	/* BitBltSimulation>>#checkSourceOverlap */
static sqInt
checkSourceOverlap(void)
{
	sqInt t;

	if ((sourceForm == destForm)
	 && (dy >= sy)) {
		if (dy > sy) {

			/* have to start at bottom */
			vDir = -1;
			sy = (sy + bbH) - 1;
			dy = (dy + bbH) - 1;
		}
		else {
			if ((dy == sy) && (dx > sx)) {

				/* y's are equal, but x's are backward */
				hDir = -1;

				/* start at right */
				sx = (sx + bbW) - 1;

				/* and fix up masks */
				dx = (dx + bbW) - 1;
				if (nWords > 1) {
					t = mask1;
					mask1 = mask2;
					mask2 = t;
				}
			}
		}
		destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);
		destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	}
	return 0;
}

	/* BitBltSimulation>>#clearWord:with: */
static unsigned int
clearWordwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return 0;
}


/*	clip and adjust source origin and extent appropriately */
/*	first in x */

	/* BitBltSimulation>>#clipRange */
static sqInt
clipRange(void)
{
	if (destX >= clipX) {
		sx = sourceX;
		dx = destX;
		bbW = width;
	}
	else {
		sx = sourceX + (clipX - destX);
		bbW = width - (clipX - destX);
		dx = clipX;
	}
	if ((dx + bbW) > (clipX + clipWidth)) {
		bbW -= (dx + bbW) - (clipX + clipWidth);
	}
	if (destY >= clipY) {
		sy = sourceY;
		dy = destY;
		bbH = height;
	}
	else {
		sy = (sourceY + clipY) - destY;
		bbH = height - (clipY - destY);
		dy = clipY;
	}
	if ((dy + bbH) > (clipY + clipHeight)) {
		bbH -= (dy + bbH) - (clipY + clipHeight);
	}
	if (noSource) {
		return null;
	}
	if (sx < 0) {
		dx -= sx;
		bbW += sx;
		sx = 0;
	}
	if ((sx + bbW) > sourceWidth) {
		bbW -= (sx + bbW) - sourceWidth;
	}
	if (sy < 0) {
		dy -= sy;
		bbH += sy;
		sy = 0;
	}
	if ((sy + bbH) > sourceHeight) {
		bbH -= (sy + bbH) - sourceHeight;
	}
	return 0;
}


/*	This function is exported for the Balloon engine */

	/* BitBltSimulation>>#copyBits */
EXPORT(sqInt)
copyBits(void)
{
	clipRange();
	if ((bbW <= 0)
	 || (bbH <= 0)) {

		/* zero width or height; noop */
		affectedL = (affectedR = (affectedT = (affectedB = 0)));
		return null;
	}
	if (!(lockSurfaces())) {
		return primitiveFail();
	}
	
#  if ENABLE_FAST_BLT

	/* you really, really mustn't call this unless you have the rest of the code to link to */
	copyBitsFastPathSpecialised();
#  else /* ENABLE_FAST_BLT */
	copyBitsLockedAndClipped();
#  endif /* ENABLE_FAST_BLT */
	unlockSurfaces();
	return 0;
}


/*	Perform the actual copyBits operation using the fast path specialised
	code; fail some cases by falling back to normal code.
	Assume: Surfaces have been locked and clipping was performed.
 */

	/* BitBltSimulation>>#copyBitsFastPathSpecialised */
static sqInt
copyBitsFastPathSpecialised(void)
{
	
#  if ENABLE_FAST_BLT

	/* set the affected area to 0 first */
	affectedL = (affectedR = (affectedT = (affectedB = 0)));
	copyBitsRule41Test();
	if (failed()) {
		return primitiveFail();
	}
	if ((combinationRule == 30) || (combinationRule == 0x1F)) {

		/* Check and fetch source alpha parameter for alpha blend */
		if ((methodArgumentCount()) == 1) {
			sourceAlpha = stackIntegerValue(0);
			if (!((!(failed()))
				 && ((sourceAlpha >= 0) && (sourceAlpha <= 0xFF)))) {
				return primitiveFail();
			}
		}
		else {
			return primitiveFail();
		}
	}
	if ((combinationRule != 22) && (combinationRule != 32)) {

		/* zero width and height; return the count */
		affectedL = dx;
		affectedR = dx + bbW;
		affectedT = dy;
		affectedB = dy + bbH;
	}
	
	// fill the operation structure
	operation_t op;
	op.combinationRule = combinationRule;
	op.noSource = noSource;
	op.src.bits = (void *) sourceBits;
	op.src.pitch = sourcePitch;
	op.src.depth = sourceDepth;
	op.src.msb = sourceMSB;
	op.src.x = sx;
	op.src.y = sy;
	op.dest.bits = (void *) destBits;
	op.dest.pitch = destPitch;
	op.dest.depth = destDepth;
	op.dest.msb = destMSB;
	op.dest.x = dx;
	op.dest.y = dy;
	op.width = bbW;
	op.height = bbH;
	op.cmFlags = cmFlags;
	op.cmShiftTable = (void *) cmShiftTable;
	op.cmMaskTable = (void *) cmMaskTable;
	op.cmMask = cmMask;
	op.cmLookupTable = (void *) cmLookupTable;
	op.noHalftone = noHalftone;
	op.halftoneHeight = halftoneHeight;
	op.halftoneBase = (void *) halftoneBase;
	if (combinationRule == 30 || combinationRule == 31) {
		op.opt.sourceAlpha = sourceAlpha;
	}
	if (combinationRule == 41) {
		op.opt.componentAlpha.componentAlphaModeColor = componentAlphaModeColor;
		op.opt.componentAlpha.componentAlphaModeAlpha = componentAlphaModeAlpha;
		op.opt.componentAlpha.gammaLookupTable = (void *) gammaLookupTable;
		op.opt.componentAlpha.ungammaLookupTable = (void *) ungammaLookupTable;
	}
	// call the sneaky code
	copyBitsDispatch(&op);
#  endif /* ENABLE_FAST_BLT */
	return 0;
}


/*	Support for the balloon engine. */

	/* BitBltSimulation>>#copyBitsFrom:to:at: */
EXPORT(sqInt)
copyBitsFromtoat(sqInt startX, sqInt stopX, sqInt yValue)
{
	destX = startX;
	destY = yValue;
	sourceX = startX;
	width = stopX - startX;
	copyBits();
	/* begin showDisplayBits */
	if (numGCsOnInvocation != (statNumGCs())) {
		reloadDestAndSourceForms();
	}
	showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);
	return 0;
}


/*	Perform the actual copyBits operation.
	Assume: Surfaces have been locked and clipping was performed. */

	/* BitBltSimulation>>#copyBitsLockedAndClipped */
static sqInt
copyBitsLockedAndClipped(void)
{
	sqInt dxLowBits;
	sqInt endBits;
	unsigned int m1;
	sqInt pixPerM1;
	sqInt pixPerM11;
	sqInt startBits;
	sqInt startBits1;
	sqInt sxLowBits;
	sqInt t;

	copyBitsRule41Test();
	if (failed()) {
		return primitiveFail();
	}
	
	/* inline tryCopyingBitsQuickly */
	if (noSource) {
		goto l1;
	}
	if (!((combinationRule == 34)
		 || (combinationRule == 41))) {
		goto l1;
	}
	if (!(sourceDepth == 32)) {
		goto l1;
	}
	if (sourceForm == destForm) {
		goto l1;
	}
	if (combinationRule == 41) {
		if (destDepth == 32) {
			rgbComponentAlpha32();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			return null;
		}
		if (destDepth == 16) {
			rgbComponentAlpha16();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			return null;
		}
		if (destDepth == 8) {
			rgbComponentAlpha8();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			return null;
		}
		goto l1;
	}
	if (destDepth < 8) {
		goto l1;
	}
	if ((destDepth == 8)
	 && ((cmFlags & ColorMapPresent) == 0)) {
		goto l1;
	}
	if (destDepth == 32) {
		alphaSourceBlendBits32();
	}
	if (destDepth == 16) {
		alphaSourceBlendBits16();
	}
	if (destDepth == 8) {
		alphaSourceBlendBits8();
	}
	affectedL = dx;
	affectedR = dx + bbW;
	affectedT = dy;
	affectedB = dy + bbH;
	return null;
	l1:	;
	if (((combinationRule >= 30) && (combinationRule <= 0x1F))) {

		/* Check and fetch source alpha parameter for alpha blend */
		if (!((methodArgumentCount()) == 1)) {
			return primitiveFail();
		}
		sourceAlpha = stackIntegerValue(0);
		if ((failed())
		 || ((sourceAlpha < 0)
		 || (sourceAlpha > 0xFF))) {
			return primitiveFail();
		}
	}

	/* Choose and perform the actual copy loop. */
	bitCount = 0;
	/* begin performCopyLoop */
	
	/* A mask, assuming power of two */
	/* how many pixels in first word */
	pixPerM1 = destPPW - 1;

	/* how many pixels in last word */
	startBits = destPPW - (dx & pixPerM1);
	endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
	if (destMSB) {
		mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
		mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
	}
	else {
		mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
		mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
	}
	if (bbW < startBits) {
		mask1 = mask1 & mask2;
		mask2 = 0;
		nWords = 1;
	}
	else {
		nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
	}

	/* defaults for no overlap with source */
	/* calculate byte addr and delta, based on first word of data */
	/* Note pitch is bytes and nWords is longs, not bytes */
	hDir = (vDir = 1);
	destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

	/* byte addr delta */
	destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	if (noSource) {

		/* Simple fill loop */
		copyLoopNoSource();
	}
	else {

		/* Loop using source and dest */
		/* begin checkSourceOverlap */
		if ((sourceForm == destForm)
		 && (dy >= sy)) {
			if (dy > sy) {

				/* have to start at bottom */
				vDir = -1;
				sy = (sy + bbH) - 1;
				dy = (dy + bbH) - 1;
			}
			else {
				if ((dy == sy) && (dx > sx)) {

					/* y's are equal, but x's are backward */
					hDir = -1;

					/* start at right */
					sx = (sx + bbW) - 1;

					/* and fix up masks */
					dx = (dx + bbW) - 1;
					if (nWords > 1) {
						t = mask1;
						mask1 = mask2;
						mask2 = t;
					}
				}
			}
			destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);
			destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
		}
		if ((sourceDepth != destDepth)
		 || ((cmFlags != 0)
		 || (sourceMSB != destMSB))) {

			/* If we must convert between pixel depths or use
			   color lookups or swap pixels use the general version */
			copyLoopPixMap();
		}
		else {

			/* Otherwise we simply copy pixels and can use a faster version */
			/* begin sourceSkewAndPointerInit */
			assert((destPPW == sourcePPW)
			 && ((destMSB == sourceMSB)
			 && (destDepth == sourceDepth)));

			/* A mask, assuming power of two */
			pixPerM11 = destPPW - 1;
			sxLowBits = sx & pixPerM11;

			/* how many pixels in first word */
			dxLowBits = dx & pixPerM11;
			startBits1 = (hDir > 0
				? sourcePPW - (sx & pixPerM11)
				: (((sx + bbW) - 1) & pixPerM11) + 1);
			m1 = (destMSB
				? ((usqInt) AllOnes) >> (32 - (startBits1 * destDepth))
				: ((usqInt)(AllOnes) << (32 - (startBits1 * destDepth))));

			/* i.e. there are some missing bits */
			/* calculate right-shift skew from source to dest */
			preload = (m1 & mask1) != mask1;

			/* -32..32 */
			skew = destDepth * ((sourceMSB
	? sxLowBits - dxLowBits
	: dxLowBits - sxLowBits));
			if (preload) {
				skew = (skew < 0
					? skew + 32
					: skew - 32);
			}

			/* calculate increments from end of 1 line to start of next */
			sourceIndex = (sourceBits + (sy * sourcePitch)) + ((sx / (32 / sourceDepth)) * 4);
			sourceDelta = (sourcePitch * vDir) - (4 * (nWords * hDir));
			if (preload) {

				/* Compensate for extra source word fetched */
				sourceDelta -= 4 * hDir;
			}
			assert(!((preload
 && (skew == 0))));
			assert(((skew >= -31) && (skew <= 0x1F)));
			copyLoop();
		}
	}
	if (((combinationRule >= 30) && (combinationRule <= 0x1F))) {

		/* zero width and height; just return the count */
		affectedL = (affectedR = (affectedT = (affectedB = 0)));
	}
	else {
		if (hDir > 0) {
			affectedL = dx;
			affectedR = dx + bbW;
		}
		else {
			affectedL = (dx - bbW) + 1;
			affectedR = dx + 1;
		}
		if (vDir > 0) {
			affectedT = dy;
			affectedB = dy + bbH;
		}
		else {
			affectedT = (dy - bbH) + 1;
			affectedB = dy + 1;
		}
	}
	return 0;
}


/*	Test possible use of rule 41, rgbComponentAlpha:with: Nothing to return,
	just set up some variables
 */

	/* BitBltSimulation>>#copyBitsRule41Test */
static sqInt
copyBitsRule41Test(void)
{
	sqInt gammaLookupTableOop;
	sqInt ungammaLookupTableOop;

	if (combinationRule == 41) {

		/* fetch the forecolor into componentAlphaModeColor. */
		componentAlphaModeAlpha = 0xFF;
		componentAlphaModeColor = 0xFFFFFF;
		gammaLookupTable = null;
		ungammaLookupTable = null;
		if ((methodArgumentCount()) >= 2) {
			componentAlphaModeAlpha = stackIntegerValue((methodArgumentCount()) - 2);
			if (failed()) {
				return primitiveFail();
			}
			componentAlphaModeColor = stackIntegerValue((methodArgumentCount()) - 1);
			if (failed()) {
				return primitiveFail();
			}
			if ((methodArgumentCount()) == 4) {
				gammaLookupTableOop = stackObjectValue(1);
				if (isBytes(gammaLookupTableOop)) {
					gammaLookupTable = firstIndexableField(gammaLookupTableOop);
				}
				ungammaLookupTableOop = stackObjectValue(0);
				if (isBytes(ungammaLookupTableOop)) {
					ungammaLookupTable = firstIndexableField(ungammaLookupTableOop);
				}
			}
		}
		else {
			if ((methodArgumentCount()) == 1) {
				componentAlphaModeColor = stackIntegerValue(0);
				if (failed()) {
					return primitiveFail();
				}
			}
			else {
				return primitiveFail();
			}
		}
	}
	return 0;
}


/*	Recover from the fast path specialised code saying Help-I-cant-cope */

	/* BitBltSimulation>>#copyBits:Fallback: */
void
copyBitsFallback(operation_t *op, unsigned int flags)
{
	sqInt done;
	sqInt dxLowBits;
	sqInt endBits;
	unsigned int m1;
	sqInt pixPerM1;
	sqInt pixPerM11;
	sqInt startBits;
	sqInt startBits1;
	sqInt sxLowBits;
	sqInt t;

	
#  if ENABLE_FAST_BLT

	/* recover values from the operation struct used by the fast ARM code */
	
	combinationRule = op->combinationRule;
	noSource = op->noSource;
	sourceBits = (sqInt) op->src.bits;
	sourcePitch = op->src.pitch;
	sourceDepth = op->src.depth;
	sourceMSB = op->src.msb;
	sx = op->src.x;
	sy = op->src.y;
	destBits = (sqInt) op->dest.bits;
	destPitch = op->dest.pitch;
	destDepth = op->dest.depth;
	destMSB = op->dest.msb;
	dx = op->dest.x;
	dy = op->dest.y;
	bbW = op->width;
	bbH = op->height;
	cmFlags = op->cmFlags;
	cmShiftTable = (void *) op->cmShiftTable;
	cmMaskTable = (void *) op->cmMaskTable;
	cmMask = op->cmMask;
	cmLookupTable = (void *) op->cmLookupTable;
	noHalftone = op->noHalftone;
	halftoneHeight = op->halftoneHeight;
	halftoneBase = (sqInt) op->halftoneBase;
	if (combinationRule == 30 || combinationRule == 31) {
		sourceAlpha = op->opt.sourceAlpha;
	}
	if (combinationRule == 41) {
		componentAlphaModeColor = op->opt.componentAlpha.componentAlphaModeColor;
		componentAlphaModeAlpha = op->opt.componentAlpha.componentAlphaModeAlpha;
		gammaLookupTable = (void *) op->opt.componentAlpha.gammaLookupTable;
		ungammaLookupTable = (void *) op->opt.componentAlpha.ungammaLookupTable;
	}
	destPPW = 32 / destDepth;
	cmBitsPerColor = 0;
	if (cmMask == 0x1FF) {
		cmBitsPerColor = 3;
	}
	if (cmMask == 0xFFF) {
		cmBitsPerColor = 4;
	}
	if (cmMask == 0x7FFF) {
		cmBitsPerColor = 5;
	}
	/* begin tryCopyingBitsQuickly */
	if (noSource) {
		done = 0;
		goto l1;
	}
	if (!((combinationRule == 34)
		 || (combinationRule == 41))) {
		done = 0;
		goto l1;
	}
	if (!(sourceDepth == 32)) {
		done = 0;
		goto l1;
	}
	if (sourceForm == destForm) {
		done = 0;
		goto l1;
	}
	if (combinationRule == 41) {
		if (destDepth == 32) {
			rgbComponentAlpha32();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			done = 1;
			goto l1;
		}
		if (destDepth == 16) {
			rgbComponentAlpha16();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			done = 1;
			goto l1;
		}
		if (destDepth == 8) {
			rgbComponentAlpha8();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			done = 1;
			goto l1;
		}
		done = 0;
		goto l1;
	}
	if (destDepth < 8) {
		done = 0;
		goto l1;
	}
	if ((destDepth == 8)
	 && ((cmFlags & ColorMapPresent) == 0)) {
		done = 0;
		goto l1;
	}
	if (destDepth == 32) {
		alphaSourceBlendBits32();
	}
	if (destDepth == 16) {
		alphaSourceBlendBits16();
	}
	if (destDepth == 8) {
		alphaSourceBlendBits8();
	}
	affectedL = dx;
	affectedR = dx + bbW;
	affectedT = dy;
	affectedB = dy + bbH;
	done = 1;
	l1:	/* end tryCopyingBitsQuickly */;
	if (done) {
		return;
	}

	/* Choose and perform the actual copy loop. */
	bitCount = 0;
	/* begin performCopyLoop */
	
	/* A mask, assuming power of two */
	/* how many pixels in first word */
	pixPerM1 = destPPW - 1;

	/* how many pixels in last word */
	startBits = destPPW - (dx & pixPerM1);
	endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
	if (destMSB) {
		mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
		mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
	}
	else {
		mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
		mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
	}
	if (bbW < startBits) {
		mask1 = mask1 & mask2;
		mask2 = 0;
		nWords = 1;
	}
	else {
		nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
	}

	/* defaults for no overlap with source */
	/* calculate byte addr and delta, based on first word of data */
	/* Note pitch is bytes and nWords is longs, not bytes */
	hDir = (vDir = 1);
	destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

	/* byte addr delta */
	destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	if (noSource) {

		/* Simple fill loop */
		copyLoopNoSource();
	}
	else {

		/* Loop using source and dest */
		/* begin checkSourceOverlap */
		if ((sourceForm == destForm)
		 && (dy >= sy)) {
			if (dy > sy) {

				/* have to start at bottom */
				vDir = -1;
				sy = (sy + bbH) - 1;
				dy = (dy + bbH) - 1;
			}
			else {
				if ((dy == sy) && (dx > sx)) {

					/* y's are equal, but x's are backward */
					hDir = -1;

					/* start at right */
					sx = (sx + bbW) - 1;

					/* and fix up masks */
					dx = (dx + bbW) - 1;
					if (nWords > 1) {
						t = mask1;
						mask1 = mask2;
						mask2 = t;
					}
				}
			}
			destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);
			destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
		}
		if ((sourceDepth != destDepth)
		 || ((cmFlags != 0)
		 || (sourceMSB != destMSB))) {

			/* If we must convert between pixel depths or use
			   color lookups or swap pixels use the general version */
			copyLoopPixMap();
		}
		else {

			/* Otherwise we simply copy pixels and can use a faster version */
			/* begin sourceSkewAndPointerInit */
			assert((destPPW == sourcePPW)
			 && ((destMSB == sourceMSB)
			 && (destDepth == sourceDepth)));

			/* A mask, assuming power of two */
			pixPerM11 = destPPW - 1;
			sxLowBits = sx & pixPerM11;

			/* how many pixels in first word */
			dxLowBits = dx & pixPerM11;
			startBits1 = (hDir > 0
				? sourcePPW - (sx & pixPerM11)
				: (((sx + bbW) - 1) & pixPerM11) + 1);
			m1 = (destMSB
				? ((usqInt) AllOnes) >> (32 - (startBits1 * destDepth))
				: ((usqInt)(AllOnes) << (32 - (startBits1 * destDepth))));

			/* i.e. there are some missing bits */
			/* calculate right-shift skew from source to dest */
			preload = (m1 & mask1) != mask1;

			/* -32..32 */
			skew = destDepth * ((sourceMSB
	? sxLowBits - dxLowBits
	: dxLowBits - sxLowBits));
			if (preload) {
				skew = (skew < 0
					? skew + 32
					: skew - 32);
			}

			/* calculate increments from end of 1 line to start of next */
			sourceIndex = (sourceBits + (sy * sourcePitch)) + ((sx / (32 / sourceDepth)) * 4);
			sourceDelta = (sourcePitch * vDir) - (4 * (nWords * hDir));
			if (preload) {

				/* Compensate for extra source word fetched */
				sourceDelta -= 4 * hDir;
			}
			assert(!((preload
 && (skew == 0))));
			assert(((skew >= -31) && (skew <= 0x1F)));
			copyLoop();
		}
	}
#  endif /* ENABLE_FAST_BLT */
}


/*	This version of the inner loop assumes noSource = false. */

	/* BitBltSimulation>>#copyLoop */
static sqInt
copyLoop(void)
{
	unsigned int destWord;
	unsigned int halftoneWord;
	sqInt hInc;
	sqInt i;
	unsigned int (*mergeFnwith)(unsigned int, unsigned int);
	unsigned int mergeWord;
	unsigned int notSkewMask;
	unsigned int prevWord;
	unsigned int skewMask;
	unsigned int skewWord;
	unsigned int thisWord;
	int unskew;
	sqInt word;
	int y;


	/* unskew is a bitShift and MUST remain signed, while skewMask is unsigned. */
	mergeFnwith = ((unsigned int (*)(unsigned int, unsigned int)) (opTable[combinationRule + 1]));
	assert(!((preload
 && (skew == 0))));
	assert(((skew >= -31) && (skew <= 0x1F)));

	/* Byte delta */
	hInc = hDir * 4;
	if (skew < 0) {
		unskew = skew + 32;
		skewMask = ((usqInt)(AllOnes) << (0 - skew));
	}
	else {
		if (skew == 0) {
			unskew = 0;
			skewMask = AllOnes;
		}
		else {
			unskew = skew - 32;
			skewMask = ((usqInt) AllOnes) >> skew;
		}
	}
	notSkewMask = (unsigned int)~skewMask;
	if (noHalftone) {
		halftoneWord = AllOnes;
		halftoneHeight = 0;
	}
	else {
		halftoneWord = long32At(halftoneBase + ((0 % halftoneHeight) * 4));
	}

	/* Here is the vertical loop, in two versions, one for the combinationRule = 3 copy mode, one for the general case. */
	y = dy;
	if (combinationRule == 3) {
		for (i = 1; i <= bbH; i += 1) {

			/* here is the vertical loop for combinationRule = 3 copy mode; no need to call merge */
			if (halftoneHeight > 1) {

				/* Otherwise, its always the same */
				halftoneWord = long32At(halftoneBase + ((y % halftoneHeight) * 4));
				y += vDir;
			}
			if (preload) {

				/* load the 64-bit shifter */
				assert((((usqInt)sourceIndex)) < endOfSource);
				prevWord = long32At(sourceIndex);
				sourceIndex += hInc;
			}
			else {
				prevWord = 0;
			}
			destMask = mask1;

			/* pick up next word */
			assert((((usqInt)sourceIndex)) < endOfSource);
			thisWord = long32At(sourceIndex);
			sourceIndex += hInc;

			/* 32-bit rotate */
			skewWord = (((unskew < 0) ? ((usqInt) (prevWord & notSkewMask) >> -unskew) : ((usqInt) (prevWord & notSkewMask) << unskew))) | (((skew < 0) ? ((usqInt) (thisWord & skewMask) >> -skew) : ((usqInt) (thisWord & skewMask) << skew)));
			prevWord = thisWord;
			assert((((usqInt)destIndex)) < endOfDestination);
			destWord = long32At(destIndex);
			destWord = (destMask & (skewWord & halftoneWord)) | (destWord & ((unsigned int)~destMask));
			long32Atput(destIndex, destWord);
			destIndex += hInc;
			destMask = AllOnes;
			if ((skew == 0)
			 && (halftoneWord == AllOnes)) {

				/* Very special inner loop for STORE mode with no skew -- just move words */
				if (preload
				 && (hDir == 1)) {
					for (word = 2; word < nWords; word += 1) {

						/* Note loop starts with prevWord loaded (due to preload) */
						long32Atput(destIndex, prevWord);
						destIndex += hInc;
						assert((((usqInt)sourceIndex)) < endOfSource);
						prevWord = long32At(sourceIndex);
						sourceIndex += hInc;
					}
				}
				else {
					for (word = 2; word < nWords; word += 1) {
						assert((((usqInt)sourceIndex)) < endOfSource);
						thisWord = long32At(sourceIndex);
						sourceIndex += hInc;
						long32Atput(destIndex, thisWord);
						destIndex += hInc;
					}
					prevWord = thisWord;
				}
			}
			else {
				for (word = 2; word < nWords; word += 1) {
					assert((((usqInt)sourceIndex)) < endOfSource);
					thisWord = long32At(sourceIndex);
					sourceIndex += hInc;

					/* 32-bit rotate */
					skewWord = (((unskew < 0) ? ((usqInt) (prevWord & notSkewMask) >> -unskew) : ((usqInt) (prevWord & notSkewMask) << unskew))) | (((skew < 0) ? ((usqInt) (thisWord & skewMask) >> -skew) : ((usqInt) (thisWord & skewMask) << skew)));
					prevWord = thisWord;
					long32Atput(destIndex, skewWord & halftoneWord);
					destIndex += hInc;
				}
			}
			if (nWords > 1) {
				destMask = mask2;

				/* pick up next word */
				assert((((usqInt)sourceIndex)) < endOfSource);
				thisWord = long32At(sourceIndex);
				sourceIndex += hInc;

				/* 32-bit rotate */
				skewWord = (((unskew < 0) ? ((usqInt) (prevWord & notSkewMask) >> -unskew) : ((usqInt) (prevWord & notSkewMask) << unskew))) | (((skew < 0) ? ((usqInt) (thisWord & skewMask) >> -skew) : ((usqInt) (thisWord & skewMask) << skew)));
				assert((((usqInt)destIndex)) < endOfDestination);
				destWord = long32At(destIndex);
				destWord = (destMask & (skewWord & halftoneWord)) | (destWord & ((unsigned int)~destMask));
				long32Atput(destIndex, destWord);
				destIndex += hInc;
			}
			sourceIndex += sourceDelta;
			destIndex += destDelta;
		}
	}
	else {
		for (i = 1; i <= bbH; i += 1) {

			/* here is the vertical loop for the general case (combinationRule ~= 3) */
			if (halftoneHeight > 1) {

				/* Otherwise, its always the same */
				halftoneWord = long32At(halftoneBase + ((y % halftoneHeight) * 4));
				y += vDir;
			}
			if (preload) {

				/* load the 64-bit shifter */
				assert((((usqInt)sourceIndex)) < endOfSource);
				prevWord = long32At(sourceIndex);
				sourceIndex += hInc;
			}
			else {
				prevWord = 0;
			}
			destMask = mask1;

			/* pick up next word */
			assert((((usqInt)sourceIndex)) < endOfSource);
			thisWord = long32At(sourceIndex);
			sourceIndex += hInc;

			/* 32-bit rotate */
			skewWord = (((unskew < 0) ? ((usqInt) (prevWord & notSkewMask) >> -unskew) : ((usqInt) (prevWord & notSkewMask) << unskew))) | (((skew < 0) ? ((usqInt) (thisWord & skewMask) >> -skew) : ((usqInt) (thisWord & skewMask) << skew)));
			prevWord = thisWord;
			assert((((usqInt)destIndex)) < endOfDestination);
			destWord = long32At(destIndex);
			mergeWord = mergeFnwith(skewWord & halftoneWord, destWord);
			destWord = (destMask & mergeWord) | (destWord & ((unsigned int)~destMask));
			long32Atput(destIndex, destWord);
			destIndex += hInc;
			destMask = AllOnes;
			for (word = 2; word < nWords; word += 1) {

				/* Normal inner loop does merge: */

				/* pick up next word */
				assert((((usqInt)sourceIndex)) < endOfSource);
				thisWord = long32At(sourceIndex);
				sourceIndex += hInc;

				/* 32-bit rotate */
				skewWord = (((unskew < 0) ? ((usqInt) (prevWord & notSkewMask) >> -unskew) : ((usqInt) (prevWord & notSkewMask) << unskew))) | (((skew < 0) ? ((usqInt) (thisWord & skewMask) >> -skew) : ((usqInt) (thisWord & skewMask) << skew)));
				prevWord = thisWord;
				mergeWord = mergeFnwith(skewWord & halftoneWord, (assert((((usqInt)destIndex)) < endOfDestination),
				long32At(destIndex)));
				long32Atput(destIndex, mergeWord);
				destIndex += hInc;
			}
			if (nWords > 1) {
				destMask = mask2;

				/* pick up next word */
				assert((((usqInt)sourceIndex)) < endOfSource);
				thisWord = long32At(sourceIndex);
				sourceIndex += hInc;

				/* 32-bit rotate */
				skewWord = (((unskew < 0) ? ((usqInt) (prevWord & notSkewMask) >> -unskew) : ((usqInt) (prevWord & notSkewMask) << unskew))) | (((skew < 0) ? ((usqInt) (thisWord & skewMask) >> -skew) : ((usqInt) (thisWord & skewMask) << skew)));
				assert((((usqInt)destIndex)) < endOfDestination);
				destWord = long32At(destIndex);
				mergeWord = mergeFnwith(skewWord & halftoneWord, destWord);
				destWord = (destMask & mergeWord) | (destWord & ((unsigned int)~destMask));
				long32Atput(destIndex, destWord);
				destIndex += hInc;
			}
			sourceIndex += sourceDelta;
			destIndex += destDelta;
		}
	}
	return 0;
}


/*	Faster copyLoop when source not used. hDir and vDir are both
	positive, and perload and skew are unused */

	/* BitBltSimulation>>#copyLoopNoSource */
static sqInt
copyLoopNoSource(void)
{
	unsigned int destWord;
	unsigned int halftoneWord;
	sqInt i;
	unsigned int (*mergeFnwith)(unsigned int, unsigned int);
	unsigned int mergeWord;
	sqInt word;

	halftoneWord = 0;
	mergeFnwith = ((unsigned int (*)(unsigned int, unsigned int)) (opTable[combinationRule + 1]));
	if (noHalftone) {
		halftoneWord = AllOnes;
	}
	for (i = 1; i <= bbH; i += 1) {

		/* here is the vertical loop */
		if (!noHalftone) {
			halftoneWord = long32At(halftoneBase + ((((dy + i) - 1) % halftoneHeight) * 4));
		}
		destMask = mask1;
		assert((((usqInt)destIndex)) < endOfDestination);
		destWord = long32At(destIndex);
		mergeWord = mergeFnwith(halftoneWord, destWord);
		destWord = (destMask & mergeWord) | (destWord & ((unsigned int)~destMask));
		long32Atput(destIndex, destWord);
		destIndex += 4;
		destMask = AllOnes;
		if (combinationRule == 3) {

			/* Special inner loop for STORE */
			destWord = halftoneWord;
			for (word = 2; word < nWords; word += 1) {
				long32Atput(destIndex, destWord);
				destIndex += 4;
			}
		}
		else {

			/* Normal inner loop does merge */
			for (word = 2; word < nWords; word += 1) {

				/* Normal inner loop does merge */
				assert((((usqInt)destIndex)) < endOfDestination);
				destWord = long32At(destIndex);
				mergeWord = mergeFnwith(halftoneWord, destWord);
				long32Atput(destIndex, mergeWord);
				destIndex += 4;
			}
		}
		if (nWords > 1) {
			destMask = mask2;
			assert((((usqInt)destIndex)) < endOfDestination);
			destWord = long32At(destIndex);
			mergeWord = mergeFnwith(halftoneWord, destWord);
			destWord = (destMask & mergeWord) | (destWord & ((unsigned int)~destMask));
			long32Atput(destIndex, destWord);
			destIndex += 4;
		}
		destIndex += destDelta;
	}
	return 0;
}


/*	This version of the inner loop maps source pixels
	to a destination form with different depth. Because it is already
	unweildy, the loop is not unrolled as in the other versions.
	Preload, skew and skewMask are all overlooked, since pickSourcePixels
	delivers its destination word already properly aligned.
	Note that pickSourcePixels could be copied in-line at the top of
	the horizontal loop, and some of its inits moved out of the loop. */
/*	ar 12/7/1999:
	The loop has been rewritten to use only one pickSourcePixels call.
	The idea is that the call itself could be inlined. If we decide not
	to inline pickSourcePixels we could optimize the loop instead. */

	/* BitBltSimulation>>#copyLoopPixMap */
static sqInt
copyLoopPixMap(void)
{
	sqInt destPix;
	unsigned int destPixMask;
	unsigned int destWord;
	unsigned int destWord1;
	sqInt dstShift;
	sqInt dstShift1;
	int dstShiftInc;
	sqInt dstShiftLeft;
	sqInt endBits;
	unsigned int halftoneWord;
	sqInt i;
	sqInt mapperFlags;
	unsigned int (*mergeFnwith)(unsigned int, unsigned int);
	unsigned int mergeWord;
	sqInt nPix;
	sqInt nPix1;
	sqInt nSourceIncs;
	sqInt pv;
	sqInt scrStartBits;
	unsigned int skewWord;
	sqInt sourcePix;
	unsigned int sourcePixMask;
	unsigned int sourceWord;
	sqInt srcShift;
	sqInt srcShift1;
	int srcShiftInc;
	sqInt startBits;
	sqInt val;
	sqInt words;

	halftoneWord = 0;
	mergeFnwith = ((unsigned int (*)(unsigned int, unsigned int)) (opTable[combinationRule + 1]));
	sourcePPW = 32 / sourceDepth;
	sourcePixMask = maskTable[sourceDepth];
	destPixMask = maskTable[destDepth];
	mapperFlags = cmFlags & ((unsigned int)~ColorMapNewStyle);
	sourceIndex = (sourceBits + (sy * sourcePitch)) + ((sx / sourcePPW) * 4);
	scrStartBits = sourcePPW - (sx & (sourcePPW - 1));
	if (bbW < scrStartBits) {
		nSourceIncs = 0;
	}
	else {
		nSourceIncs = ((bbW - scrStartBits) / sourcePPW) + 1;
	}

	/* Note following two items were already calculated in destmask setup! */
	sourceDelta = sourcePitch - (nSourceIncs * 4);
	startBits = destPPW - (dx & (destPPW - 1));
	endBits = (((dx + bbW) - 1) & (destPPW - 1)) + 1;
	if (bbW < startBits) {
		startBits = bbW;
	}
	srcShift = (sx & (sourcePPW - 1)) * sourceDepth;
	dstShift = (dx & (destPPW - 1)) * destDepth;
	srcShiftInc = sourceDepth;
	dstShiftInc = destDepth;
	dstShiftLeft = 0;
	if (sourceMSB) {
		srcShift = (32 - sourceDepth) - srcShift;
		srcShiftInc = 0 - srcShiftInc;
	}
	if (destMSB) {
		dstShift = (32 - destDepth) - dstShift;
		dstShiftInc = 0 - dstShiftInc;
		dstShiftLeft = 32 - destDepth;
	}
	if (noHalftone) {
		halftoneWord = AllOnes;
	}
	for (i = 1; i <= bbH; i += 1) {

		/* here is the vertical loop */
		if (!noHalftone) {
			halftoneWord = long32At(halftoneBase + ((((dy + i) - 1) % halftoneHeight) * 4));
		}
		srcBitShift = srcShift;
		dstBitShift = dstShift;
		destMask = mask1;

		/* Here is the horizontal loop... */
		nPix = startBits;
		words = nWords;
		do {
			/* begin pickSourcePixels:flags:srcMask:destMask:srcShiftInc:dstShiftInc: */
			destWord1 = 0;

			/* Hint: Keep in register */
			srcShift1 = srcBitShift;

			/* Hint: Keep in register */
			dstShift1 = dstBitShift;

			/* always > 0 so we can use do { } while(--nPix); */
			nPix1 = nPix;
			if (mapperFlags == (ColorMapPresent | ColorMapIndexedPart)) {

				/* a little optimization for (pretty crucial) blits using indexed lookups only */
				/* grab, colormap and mix in pixel */
				do {
					assert((((usqInt)sourceIndex)) < endOfSource);
					sourceWord = long32At(sourceIndex);
					sourcePix = (((usqInt) sourceWord) >> srcShift1) & sourcePixMask;
					destPix = cmLookupTable[sourcePix & cmMask];

					/* adjust dest pix index */
					destWord1 = destWord1 | (((sqInt)((usqInt)((destPix & destPixMask)) << dstShift1)));

					/* adjust source pix index */
					dstShift1 += dstShiftInc;
					if (!((((srcShift1 += srcShiftInc)) & 0xFFFFFFE0U) == 0)) {
						srcShift1 = (sourceMSB
							? srcShift1 + 32
							: srcShift1 - 32);
						/* begin incSrcIndex: */
						sourceIndex += 4;
					}
				} while(!(((nPix1 -= 1)) == 0));
			}
			else {

				/* grab, colormap and mix in pixel */
				do {
					assert((((usqInt)sourceIndex)) < endOfSource);
					sourceWord = long32At(sourceIndex);
					sourcePix = (((usqInt) sourceWord) >> srcShift1) & sourcePixMask;
					/* begin mapPixel:flags: */
					pv = sourcePix;
					if ((mapperFlags & ColorMapPresent) != 0) {
						if ((mapperFlags & ColorMapFixedPart) != 0) {
							/* begin rgbMapPixel:flags: */
							val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourcePix & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
							val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourcePix & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
							val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourcePix & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
							pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourcePix & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
							if ((pv == 0)
							 && (sourcePix != 0)) {
								pv = 1;
							}
						}
						if ((mapperFlags & ColorMapIndexedPart) != 0) {
							pv = cmLookupTable[pv & cmMask];
						}
					}
					destPix = pv;

					/* adjust dest pix index */
					destWord1 = destWord1 | (((sqInt)((usqInt)((destPix & destPixMask)) << dstShift1)));

					/* adjust source pix index */
					dstShift1 += dstShiftInc;
					if (!((((srcShift1 += srcShiftInc)) & 0xFFFFFFE0U) == 0)) {
						srcShift1 = (sourceMSB
							? srcShift1 + 32
							: srcShift1 - 32);
						/* begin incSrcIndex: */
						sourceIndex += 4;
					}
				} while(!(((nPix1 -= 1)) == 0));
			}

			/* Store back */
			srcBitShift = srcShift1;
			skewWord = destWord1;
			dstBitShift = dstShiftLeft;
			if (destMask == AllOnes) {

				/* avoid read-modify-write */
				mergeWord = mergeFnwith(skewWord & halftoneWord, (assert((((usqInt)destIndex)) < endOfDestination),
				long32At(destIndex)));
				long32Atput(destIndex, destMask & mergeWord);
			}
			else {

				/* General version using dest masking */
				assert((((usqInt)destIndex)) < endOfDestination);
				destWord = long32At(destIndex);
				mergeWord = mergeFnwith(skewWord & halftoneWord, destWord & destMask);
				destWord = (destMask & mergeWord) | (destWord & ((unsigned int)~destMask));
				long32Atput(destIndex, destWord);
			}
			destIndex += 4;
			if (words == 2) {

				/* e.g., is the next word the last word? */
				/* set mask for last word in this row */
				destMask = mask2;
				nPix = endBits;
			}
			else {

				/* use fullword mask for inner loop */
				destMask = AllOnes;
				nPix = destPPW;
			}
		} while(!(((words -= 1)) == 0));
		sourceIndex += sourceDelta;
		destIndex += destDelta;
	}
	return 0;
}


/*	Return the default translation table from 1..8 bit indexed colors to 32bit */
/*	The table has been generated by the following statements */
/*	| pvs hex |
	String streamContents:[:s|
	s nextPutAll:'static unsigned int theTable[256] = { '.
	pvs := (Color colorMapIfNeededFrom: 8 to: 32) asArray.
	1 to: pvs size do:[:i|
	i > 1 ifTrue:[s nextPutAll:', '].
	(i-1 \\ 8) = 0 ifTrue:[s cr].
	s nextPutAll:'0x'.
	hex := (pvs at: i) printStringBase: 16.
	s nextPutAll: (hex copyFrom: 4 to: hex size).
	].
	s nextPutAll:'};'.
	]. */

	/* BitBltSimulation>>#default8To32Table */
static unsigned int *
default8To32Table(void)
{
	static unsigned int theTable[256] = { 
0x0, 0xFF000001, 0xFFFFFFFF, 0xFF808080, 0xFFFF0000, 0xFF00FF00, 0xFF0000FF, 0xFF00FFFF, 
0xFFFFFF00, 0xFFFF00FF, 0xFF202020, 0xFF404040, 0xFF606060, 0xFF9F9F9F, 0xFFBFBFBF, 0xFFDFDFDF, 
0xFF080808, 0xFF101010, 0xFF181818, 0xFF282828, 0xFF303030, 0xFF383838, 0xFF484848, 0xFF505050, 
0xFF585858, 0xFF686868, 0xFF707070, 0xFF787878, 0xFF878787, 0xFF8F8F8F, 0xFF979797, 0xFFA7A7A7, 
0xFFAFAFAF, 0xFFB7B7B7, 0xFFC7C7C7, 0xFFCFCFCF, 0xFFD7D7D7, 0xFFE7E7E7, 0xFFEFEFEF, 0xFFF7F7F7, 
0xFF000001, 0xFF003300, 0xFF006600, 0xFF009900, 0xFF00CC00, 0xFF00FF00, 0xFF000033, 0xFF003333, 
0xFF006633, 0xFF009933, 0xFF00CC33, 0xFF00FF33, 0xFF000066, 0xFF003366, 0xFF006666, 0xFF009966, 
0xFF00CC66, 0xFF00FF66, 0xFF000099, 0xFF003399, 0xFF006699, 0xFF009999, 0xFF00CC99, 0xFF00FF99, 
0xFF0000CC, 0xFF0033CC, 0xFF0066CC, 0xFF0099CC, 0xFF00CCCC, 0xFF00FFCC, 0xFF0000FF, 0xFF0033FF, 
0xFF0066FF, 0xFF0099FF, 0xFF00CCFF, 0xFF00FFFF, 0xFF330000, 0xFF333300, 0xFF336600, 0xFF339900, 
0xFF33CC00, 0xFF33FF00, 0xFF330033, 0xFF333333, 0xFF336633, 0xFF339933, 0xFF33CC33, 0xFF33FF33, 
0xFF330066, 0xFF333366, 0xFF336666, 0xFF339966, 0xFF33CC66, 0xFF33FF66, 0xFF330099, 0xFF333399, 
0xFF336699, 0xFF339999, 0xFF33CC99, 0xFF33FF99, 0xFF3300CC, 0xFF3333CC, 0xFF3366CC, 0xFF3399CC, 
0xFF33CCCC, 0xFF33FFCC, 0xFF3300FF, 0xFF3333FF, 0xFF3366FF, 0xFF3399FF, 0xFF33CCFF, 0xFF33FFFF, 
0xFF660000, 0xFF663300, 0xFF666600, 0xFF669900, 0xFF66CC00, 0xFF66FF00, 0xFF660033, 0xFF663333, 
0xFF666633, 0xFF669933, 0xFF66CC33, 0xFF66FF33, 0xFF660066, 0xFF663366, 0xFF666666, 0xFF669966, 
0xFF66CC66, 0xFF66FF66, 0xFF660099, 0xFF663399, 0xFF666699, 0xFF669999, 0xFF66CC99, 0xFF66FF99, 
0xFF6600CC, 0xFF6633CC, 0xFF6666CC, 0xFF6699CC, 0xFF66CCCC, 0xFF66FFCC, 0xFF6600FF, 0xFF6633FF, 
0xFF6666FF, 0xFF6699FF, 0xFF66CCFF, 0xFF66FFFF, 0xFF990000, 0xFF993300, 0xFF996600, 0xFF999900, 
0xFF99CC00, 0xFF99FF00, 0xFF990033, 0xFF993333, 0xFF996633, 0xFF999933, 0xFF99CC33, 0xFF99FF33, 
0xFF990066, 0xFF993366, 0xFF996666, 0xFF999966, 0xFF99CC66, 0xFF99FF66, 0xFF990099, 0xFF993399, 
0xFF996699, 0xFF999999, 0xFF99CC99, 0xFF99FF99, 0xFF9900CC, 0xFF9933CC, 0xFF9966CC, 0xFF9999CC, 
0xFF99CCCC, 0xFF99FFCC, 0xFF9900FF, 0xFF9933FF, 0xFF9966FF, 0xFF9999FF, 0xFF99CCFF, 0xFF99FFFF, 
0xFFCC0000, 0xFFCC3300, 0xFFCC6600, 0xFFCC9900, 0xFFCCCC00, 0xFFCCFF00, 0xFFCC0033, 0xFFCC3333, 
0xFFCC6633, 0xFFCC9933, 0xFFCCCC33, 0xFFCCFF33, 0xFFCC0066, 0xFFCC3366, 0xFFCC6666, 0xFFCC9966, 
0xFFCCCC66, 0xFFCCFF66, 0xFFCC0099, 0xFFCC3399, 0xFFCC6699, 0xFFCC9999, 0xFFCCCC99, 0xFFCCFF99, 
0xFFCC00CC, 0xFFCC33CC, 0xFFCC66CC, 0xFFCC99CC, 0xFFCCCCCC, 0xFFCCFFCC, 0xFFCC00FF, 0xFFCC33FF, 
0xFFCC66FF, 0xFFCC99FF, 0xFFCCCCFF, 0xFFCCFFFF, 0xFFFF0000, 0xFFFF3300, 0xFFFF6600, 0xFFFF9900, 
0xFFFFCC00, 0xFFFFFF00, 0xFFFF0033, 0xFFFF3333, 0xFFFF6633, 0xFFFF9933, 0xFFFFCC33, 0xFFFFFF33, 
0xFFFF0066, 0xFFFF3366, 0xFFFF6666, 0xFFFF9966, 0xFFFFCC66, 0xFFFFFF66, 0xFFFF0099, 0xFFFF3399, 
0xFFFF6699, 0xFFFF9999, 0xFFFFCC99, 0xFFFFFF99, 0xFFFF00CC, 0xFFFF33CC, 0xFFFF66CC, 0xFFFF99CC, 
0xFFFFCCCC, 0xFFFFFFCC, 0xFFFF00FF, 0xFFFF33FF, 0xFFFF66FF, 0xFFFF99FF, 0xFFFFCCFF, 0xFFFFFFFF};;

	return theTable;
}


/*	Utility routine for computing Warp increments. */

	/* BitBltSimulation>>#deltaFrom:to:nSteps: */
static sqInt
deltaFromtonSteps(sqInt x1, sqInt x2, sqInt n)
{
	if (x2 > x1) {
		return (((x2 - x1) + FixedPt1) / (n + 1)) + 1;
	}
	else {
		if (x2 == x1) {
			return 0;
		}
		return 0 - ((((x1 - x2) + FixedPt1) / (n + 1)) + 1);
	}
}

	/* BitBltSimulation>>#destinationWord:with: */
static unsigned int
destinationWordwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return destinationWord;
}


/*	Compute masks for left and right destination words */

	/* BitBltSimulation>>#destMaskAndPointerInit */
static sqInt
destMaskAndPointerInit(void)
{
	sqInt endBits;
	sqInt pixPerM1;
	sqInt startBits;


	/* A mask, assuming power of two */
	/* how many pixels in first word */
	pixPerM1 = destPPW - 1;

	/* how many pixels in last word */
	startBits = destPPW - (dx & pixPerM1);
	endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
	if (destMSB) {
		mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
		mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
	}
	else {
		mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
		mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
	}
	if (bbW < startBits) {
		mask1 = mask1 & mask2;
		mask2 = 0;
		nWords = 1;
	}
	else {
		nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
	}

	/* defaults for no overlap with source */
	/* calculate byte addr and delta, based on first word of data */
	/* Note pitch is bytes and nWords is longs, not bytes */
	hDir = (vDir = 1);
	destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

	/* byte addr delta */
	destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	return 0;
}


/*	Dither the given 32bit word to 16 bit. Ignore alpha. */

	/* BitBltSimulation>>#dither32To16:threshold: */
static unsigned int
dither32To16threshold(unsigned int srcWord, sqInt ditherValue)
{
	sqInt addThreshold;


	/* You bet */
	addThreshold = ((usqInt) ditherValue << 8);
	return ((((usqInt) (dither8Lookup[addThreshold + ((((usqInt) srcWord >> 16)) & 0xFF)]) << 10)) + (((usqInt) (dither8Lookup[addThreshold + ((((usqInt) srcWord >> 8)) & 0xFF)]) << 5))) + (dither8Lookup[addThreshold + (srcWord & 0xFF)]);
}


/*	This is the primitive implementation of the line-drawing loop.
	See the comments in BitBlt>>drawLoopX:Y: */

	/* BitBltSimulation>>#drawLoopX:Y: */
static sqInt
drawLoopXY(sqInt xDelta, sqInt yDelta)
{
	sqInt affB;
	sqInt affL;
	sqInt affR;
	sqInt affT;
	sqInt dx1;
	sqInt dy1;
	sqInt i;
	sqInt P;
	sqInt px;
	sqInt py;

	if (xDelta > 0) {
		dx1 = 1;
	}
	else {
		if (xDelta == 0) {
			dx1 = 0;
		}
		else {
			dx1 = -1;
		}
	}
	if (yDelta > 0) {
		dy1 = 1;
	}
	else {
		if (yDelta == 0) {
			dy1 = 0;
		}
		else {
			dy1 = -1;
		}
	}
	px = SQABS(yDelta);
	py = SQABS(xDelta);

	/* init null rectangle */
	affL = (affT = 9999);
	affR = (affB = -9999);
	if (py > px) {

		/* more horizontal */
		P = py / 2;
		for (i = 1; i <= py; i += 1) {
			destX += dx1;
			if (((P -= px)) < 0) {
				destY += dy1;
				P += py;
			}
			if (i < py) {
				copyBits();
				if (failed()) {
					return null;
				}
				if ((affectedL < affectedR)
				 && (affectedT < affectedB)) {

					/* Affected rectangle grows along the line */
					affL = ((affL < affectedL) ? affL : affectedL);
					affR = ((affR < affectedR) ? affectedR : affR);
					affT = ((affT < affectedT) ? affT : affectedT);
					affB = ((affB < affectedB) ? affectedB : affB);
					if (((affR - affL) * (affB - affT)) > 4000) {

						/* If affected rectangle gets large, update it in chunks */
						affectedL = affL;
						affectedR = affR;
						affectedT = affT;
						affectedB = affB;
						/* begin showDisplayBits */
						if (numGCsOnInvocation != (statNumGCs())) {
							reloadDestAndSourceForms();
						}
						showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);

						/* init null rectangle */
						affL = (affT = 9999);
						affR = (affB = -9999);
					}
				}
			}
		}
	}
	else {

		/* more vertical */
		P = px / 2;
		for (i = 1; i <= px; i += 1) {
			destY += dy1;
			if (((P -= py)) < 0) {
				destX += dx1;
				P += px;
			}
			if (i < px) {
				copyBits();
				if (failed()) {
					return null;
				}
				if ((affectedL < affectedR)
				 && (affectedT < affectedB)) {

					/* Affected rectangle grows along the line */
					affL = ((affL < affectedL) ? affL : affectedL);
					affR = ((affR < affectedR) ? affectedR : affR);
					affT = ((affT < affectedT) ? affT : affectedT);
					affB = ((affB < affectedB) ? affectedB : affB);
					if (((affR - affL) * (affB - affT)) > 4000) {

						/* If affected rectangle gets large, update it in chunks */
						affectedL = affL;
						affectedR = affR;
						affectedT = affT;
						affectedB = affB;
						/* begin showDisplayBits */
						if (numGCsOnInvocation != (statNumGCs())) {
							reloadDestAndSourceForms();
						}
						showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);

						/* init null rectangle */
						affL = (affT = 9999);
						affR = (affB = -9999);
					}
				}
			}
		}
	}
	affectedL = affL;
	affectedR = affR;
	affectedT = affT;

	/* store destX, Y back */
	affectedB = affB;
	storeIntegerofObjectwithValue(BBDestXIndex, bitBltOop, destX);
	storeIntegerofObjectwithValue(BBDestYIndex, bitBltOop, destY);
	return 0;
}


/*	Dither the given 32bit word to 16 bit. Ignore alpha. */

	/* BitBltSimulation>>#expensiveDither32To16:threshold: */
static unsigned int
expensiveDither32To16threshold(unsigned int srcWord, sqInt ditherValue)
{
	int out;
	unsigned int pv;
	int threshold;
	int value;


	/* You bet */
	pv = srcWord & 0xFF;
	threshold = ditherThresholds16[pv & 7];
	value = ditherValues16[((usqInt) pv >> 3)];
	if (ditherValue < threshold) {
		out = value + 1;
	}
	else {
		out = value;
	}
	pv = (((usqInt) srcWord >> 8)) & 0xFF;
	threshold = ditherThresholds16[pv & 7];
	value = ditherValues16[((usqInt) pv >> 3)];
	if (ditherValue < threshold) {
		out = out | (((usqInt) (value + 1) << 5));
	}
	else {
		out = out | (((usqInt) value << 5));
	}
	pv = (((usqInt) srcWord >> 16)) & 0xFF;
	threshold = ditherThresholds16[pv & 7];
	value = ditherValues16[((usqInt) pv >> 3)];
	if (ditherValue < threshold) {
		out = out | (((usqInt) (value + 1) << 10));
	}
	else {
		out = out | (((usqInt) value << 10));
	}
	return out;
}


/*	Return the integer value of the given field of the given object. If the
	field contains a Float, truncate it and return its integral part. Fail if
	the given field does not contain a small integer or Float, or if the
	truncated Float is out of the range of small integers.
 */

	/* BitBltSimulation>>#fetchIntOrFloat:ofObject: */
static sqInt
fetchIntOrFloatofObject(sqInt fieldIndex, sqInt objectPointer)
{
	sqInt fieldOop;
	double floatValue;

	fieldOop = fetchPointerofObject(fieldIndex, objectPointer);
	if (isIntegerObject(fieldOop)) {
		return integerValueOf(fieldOop);
	}
	floatValue = floatValueOf(fieldOop);
	if (!((-2.147483648e9 <= floatValue)
		 && (floatValue <= 2.147483647e9))) {
		primitiveFail();
		return 0;
	}
	return ((sqInt)floatValue);
}


/*	Return the integer value of the given field of the given object. If the
	field contains a Float, truncate it and return its integral part. Fail if
	the given field does not contain a small integer or Float, or if the
	truncated Float is out of the range of small integers.
 */

	/* BitBltSimulation>>#fetchIntOrFloat:ofObject:ifNil: */
static sqInt
fetchIntOrFloatofObjectifNil(sqInt fieldIndex, sqInt objectPointer, sqInt defaultValue)
{
	sqInt fieldOop;
	double floatValue;

	fieldOop = fetchPointerofObject(fieldIndex, objectPointer);
	if (isIntegerObject(fieldOop)) {
		return integerValueOf(fieldOop);
	}
	if (fieldOop == (nilObject())) {
		return defaultValue;
	}
	floatValue = floatValueOf(fieldOop);
	if (!((-2.147483648e9 <= floatValue)
		 && (floatValue <= 2.147483647e9))) {
		primitiveFail();
		return 0;
	}
	return ((sqInt)floatValue);
}


/*	For any non-zero pixel value in destinationWord with zero alpha channel
	take the alpha from sourceWord and fill it in. Intended for fixing alpha
	channels left at zero during 16->32 bpp conversions.
 */

	/* BitBltSimulation>>#fixAlpha:with: */
static unsigned int
fixAlphawith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (!(destDepth == 32)) {
		return destinationWord;
	}
	if (destinationWord == 0) {
		return 0;
	}
	if (!((destinationWord & 0xFF000000U) == 0)) {
		return destinationWord;
	}
	return destinationWord | (sourceWord & 0xFF000000U);
}


/*	Note: This is hardcoded so it can be run from Squeak.
	The module name is used for validating a module *after*
	it is loaded to check if it does really contain the module
	we're thinking it contains. This is important! */

	/* InterpreterPlugin>>#getModuleName */
EXPORT(const char*)
getModuleName(void)
{
	return moduleName;
}

	/* BitBltSimulation>>#ignoreSourceOrHalftone: */
static sqInt
ignoreSourceOrHalftone(sqInt formPointer)
{
	if (formPointer == (nilObject())) {
		return 1;
	}
	if (combinationRule == 0) {
		return 1;
	}
	if (combinationRule == 5) {
		return 1;
	}
	if (combinationRule == 10) {
		return 1;
	}
	if (combinationRule == 15) {
		return 1;
	}
	return 0;
}

	/* BitBltSimulation>>#initBBOpTable */
static sqInt
initBBOpTable(void)
{
	opTable[0+1] = (void *)clearWordwith;
	opTable[1+1] = (void *)bitAndwith;
	opTable[2+1] = (void *)bitAndInvertwith;
	opTable[3+1] = (void *)sourceWordwith;
	opTable[4+1] = (void *)bitInvertAndwith;
	opTable[5+1] = (void *)destinationWordwith;
	opTable[6+1] = (void *)bitXorwith;
	opTable[7+1] = (void *)bitOrwith;
	opTable[8+1] = (void *)bitInvertAndInvertwith;
	opTable[9+1] = (void *)bitInvertXorwith;
	opTable[10+1] = (void *)bitInvertDestinationwith;
	opTable[11+1] = (void *)bitOrInvertwith;
	opTable[12+1] = (void *)bitInvertSourcewith;
	opTable[13+1] = (void *)bitInvertOrwith;
	opTable[14+1] = (void *)bitInvertOrInvertwith;
	opTable[15+1] = (void *)destinationWordwith;
	opTable[16+1] = (void *)destinationWordwith;
	opTable[17+1] = (void *)destinationWordwith;
	opTable[18+1] = (void *)addWordwith;
	opTable[19+1] = (void *)subWordwith;
	opTable[20+1] = (void *)rgbAddwith;
	opTable[21+1] = (void *)rgbSubwith;
	opTable[22+1] = (void *)OLDrgbDiffwith;
	opTable[23+1] = (void *)OLDtallyIntoMapwith;
	opTable[24+1] = (void *)alphaBlendwith;
	opTable[25+1] = (void *)pixPaintwith;
	opTable[26+1] = (void *)pixMaskwith;
	opTable[27+1] = (void *)rgbMaxwith;
	opTable[28+1] = (void *)rgbMinwith;
	opTable[29+1] = (void *)rgbMinInvertwith;
	opTable[30+1] = (void *)alphaBlendConstwith;
	opTable[31+1] = (void *)alphaPaintConstwith;
	opTable[32+1] = (void *)rgbDiffwith;
	opTable[33+1] = (void *)tallyIntoMapwith;
	opTable[34+1] = (void *)alphaBlendScaledwith;
	opTable[35+1] = (void *)alphaBlendScaledwith;
	opTable[36+1] = (void *)alphaBlendScaledwith;
	opTable[37+1] = (void *)rgbMulwith;
	opTable[38+1] = (void *)pixSwapwith;
	opTable[39+1] = (void *)pixClearwith;
	opTable[40+1] = (void *)fixAlphawith;
	opTable[41+1] = (void *)rgbComponentAlphawith;
	return 0;
}

	/* BitBltSimulation>>#initDither8Lookup */
static sqInt
initDither8Lookup(void)
{
	sqInt b;
	int out;
	unsigned int pv;
	sqInt t;
	int threshold;
	unsigned int value;
	int value1;

	for (b = 0; b <= 0xFF; b += 1) {
		for (t = 0; t <= 15; t += 1) {
			/* begin expensiveDither32To16:threshold: */
			pv = (((unsigned int) b)) & 0xFF;
			threshold = ditherThresholds16[pv & 7];
			value1 = ditherValues16[((usqInt) pv >> 3)];
			if (t < threshold) {
				out = value1 + 1;
			}
			else {
				out = value1;
			}
			pv = (((usqInt) (((unsigned int) b)) >> 8)) & 0xFF;
			threshold = ditherThresholds16[pv & 7];
			value1 = ditherValues16[((usqInt) pv >> 3)];
			if (t < threshold) {
				out = out | (((usqInt) (value1 + 1) << 5));
			}
			else {
				out = out | (((usqInt) value1 << 5));
			}
			pv = (((usqInt) (((unsigned int) b)) >> 16)) & 0xFF;
			threshold = ditherThresholds16[pv & 7];
			value1 = ditherValues16[((usqInt) pv >> 3)];
			if (t < threshold) {
				out = out | (((usqInt) (value1 + 1) << 10));
			}
			else {
				out = out | (((usqInt) value1 << 10));
			}
			value = out;
			dither8Lookup[(((sqInt)((usqInt)(t) << 8))) + b] = value;
		}
	}
	return 0;
}

	/* BitBltSimulation>>#initialiseModule */
EXPORT(sqInt)
initialiseModule(void)
{
	initBBOpTable();
	initDither8Lookup();
	
#  if ENABLE_FAST_BLT
	initialiseCopyBits();
#  endif /* ENABLE_FAST_BLT */
	return 1;
}


/*	Return true if shiftTable/maskTable define an identity mapping. */

	/* BitBltSimulation>>#isIdentityMap:with: */
static sqInt
isIdentityMapwith(int *shifts, unsigned int *masks)
{
	if ((shifts == null)
	 || (masks == null)) {
		return 1;
	}
	if (((shifts[RedIndex]) == 0)
	 && (((shifts[GreenIndex]) == 0)
	 && (((shifts[BlueIndex]) == 0)
	 && (((shifts[AlphaIndex]) == 0)
	 && (((masks[RedIndex]) == 0xFF0000)
	 && (((masks[GreenIndex]) == 0xFF00)
	 && (((masks[BlueIndex]) == 0xFF)
	 && ((masks[AlphaIndex]) == 0xFF000000U)))))))) {
		return 1;
	}
	return 0;
}


/*	Load the dest form for BitBlt. Answer false if anything is wrong, true
	otherwise. 
 */

	/* BitBltSimulation>>#loadBitBltDestForm */
static sqInt
loadBitBltDestForm(void)
{
	sqInt destBitsSize;
	sqInt v;

	v = 0;
	if (!((isPointers(destForm))
		 && ((slotSizeOf(destForm)) >= 4))) {
		return 0;
	}
	destBits = fetchPointerofObject(FormBitsIndex, destForm);
	destWidth = fetchIntegerofObject(FormWidthIndex, destForm);
	destHeight = fetchIntegerofObject(FormHeightIndex, destForm);
	if (!((destWidth >= 0)
		 && (destHeight >= 0))) {
		return 0;
	}
	destDepth = fetchIntegerofObject(FormDepthIndex, destForm);
	if (!((destMSB = destDepth > 0))) {
		destDepth = 0 - destDepth;
	}
	if (isIntegerObject(destBits)) {

		/* Query for actual surface dimensions */
		if (querySurfaceFn == 0) {
			if (!(loadSurfacePlugin())) {
				return 0;
			}
		}
		if (!(querySurfaceFn(integerValueOf(destBits), (&destWidth), (&destHeight), (&destDepth), (&destMSB)))) {
			primitiveFailFor(PrimErrCallbackError);
			return 0;
		}
		destPPW = 32 / destDepth;
		destBits = (destPitch = 0);
	}
	else {
		if (!(isWordsOrBytes(destBits))) {
			return 0;
		}
		destPPW = 32 / destDepth;
		destPitch = ((destWidth + (destPPW - 1)) / destPPW) * 4;
		destBitsSize = byteSizeOf(destBits);
		if (!(destBitsSize >= (destPitch * destHeight))) {
			return 0;
		}
		destBits = oopForPointer(firstIndexableField(destBits));
	}
	return 1;
}


/*	Load BitBlt from the oop.
	This function is exported for the Balloon engine. */

	/* BitBltSimulation>>#loadBitBltFrom: */
EXPORT(sqInt)
loadBitBltFrom(sqInt bbObj)
{
	return loadBitBltFromwarping(bbObj, 0);
}


/*	Load context from BitBlt instance. Return false if anything is amiss */
/*	NOTE this should all be changed to minX/maxX coordinates for simpler
	clipping -- once it works! */

	/* BitBltSimulation>>#loadBitBltFrom:warping: */
static sqInt
loadBitBltFromwarping(sqInt bbObj, sqInt aBool)
{
	sqInt cmOop;
	sqInt cmSize;
	sqInt destBitsSize;
	sqInt fieldOop;
	sqInt fieldOop1;
	double floatValue;
	double floatValue1;
	sqInt halftoneBits;
	sqInt mapOop;
	sqInt mapOop1;
	sqInt ok;
	sqInt oldStyle;
	sqInt oop;
	sqInt sourceBitsSize;
	sqInt v;
	sqInt v1;

	v = 0;
	v1 = 0;
	bitBltOop = bbObj;
	isWarping = aBool;
	bitBltIsReceiver = bbObj == (stackValue(methodArgumentCount()));
	numGCsOnInvocation = statNumGCs();
	combinationRule = fetchIntegerofObject(BBRuleIndex, bitBltOop);
	if ((failed())
	 || ((combinationRule < 0)
	 || (combinationRule > (OpTableSize - 2)))) {
		return 0;
	}
	if ((combinationRule >= 16)
	 && (combinationRule <= 17)) {
		return 0;
	}
	sourceForm = fetchPointerofObject(BBSourceFormIndex, bitBltOop);
	/* begin ignoreSourceOrHalftone: */
	if (sourceForm == (nilObject())) {
		noSource = 1;
		goto l1;
	}
	if (combinationRule == 0) {
		noSource = 1;
		goto l1;
	}
	if (combinationRule == 5) {
		noSource = 1;
		goto l1;
	}
	if (combinationRule == 10) {
		noSource = 1;
		goto l1;
	}
	if (combinationRule == 15) {
		noSource = 1;
		goto l1;
	}
	noSource = 0;
	l1:	/* end ignoreSourceOrHalftone: */;
	halftoneForm = fetchPointerofObject(BBHalftoneFormIndex, bitBltOop);
	/* begin ignoreSourceOrHalftone: */
	if (halftoneForm == (nilObject())) {
		noHalftone = 1;
		goto l2;
	}
	if (combinationRule == 0) {
		noHalftone = 1;
		goto l2;
	}
	if (combinationRule == 5) {
		noHalftone = 1;
		goto l2;
	}
	if (combinationRule == 10) {
		noHalftone = 1;
		goto l2;
	}
	if (combinationRule == 15) {
		noHalftone = 1;
		goto l2;
	}
	noHalftone = 0;
	l2:	/* end ignoreSourceOrHalftone: */;
	destForm = fetchPointerofObject(BBDestFormIndex, bbObj);
	/* begin loadBitBltDestForm */
	if (!((isPointers(destForm))
		 && ((slotSizeOf(destForm)) >= 4))) {
		ok = 0;
		goto l3;
	}
	destBits = fetchPointerofObject(FormBitsIndex, destForm);
	destWidth = fetchIntegerofObject(FormWidthIndex, destForm);
	destHeight = fetchIntegerofObject(FormHeightIndex, destForm);
	if (!((destWidth >= 0)
		 && (destHeight >= 0))) {
		ok = 0;
		goto l3;
	}
	destDepth = fetchIntegerofObject(FormDepthIndex, destForm);
	if (!((destMSB = destDepth > 0))) {
		destDepth = 0 - destDepth;
	}
	if (isIntegerObject(destBits)) {

		/* Query for actual surface dimensions */
		if (querySurfaceFn == 0) {
			if (!(loadSurfacePlugin())) {
				ok = 0;
				goto l3;
			}
		}
		if (!(querySurfaceFn(integerValueOf(destBits), (&destWidth), (&destHeight), (&destDepth), (&destMSB)))) {
			primitiveFailFor(PrimErrCallbackError);
			ok = 0;
			goto l3;
		}
		destPPW = 32 / destDepth;
		destBits = (destPitch = 0);
	}
	else {
		if (!(isWordsOrBytes(destBits))) {
			ok = 0;
			goto l3;
		}
		destPPW = 32 / destDepth;
		destPitch = ((destWidth + (destPPW - 1)) / destPPW) * 4;
		destBitsSize = byteSizeOf(destBits);
		if (!(destBitsSize >= (destPitch * destHeight))) {
			ok = 0;
			goto l3;
		}
		destBits = oopForPointer(firstIndexableField(destBits));
	}
	ok = 1;
	l3:	/* end loadBitBltDestForm */;
	if (!ok) {
		return 0;
	}
	destX = fetchIntOrFloatofObjectifNil(BBDestXIndex, bitBltOop, 0);
	destY = fetchIntOrFloatofObjectifNil(BBDestYIndex, bitBltOop, 0);
	width = fetchIntOrFloatofObjectifNil(BBWidthIndex, bitBltOop, destWidth);
	height = fetchIntOrFloatofObjectifNil(BBHeightIndex, bitBltOop, destHeight);
	if (failed()) {
		return 0;
	}
	if (noSource) {
		sourceX = (sourceY = 0);
	}
	else {
		/* begin loadBitBltSourceForm */
		if (!((isPointers(sourceForm))
			 && ((slotSizeOf(sourceForm)) >= 4))) {
			ok = 0;
			goto l7;
		}
		sourceBits = fetchPointerofObject(FormBitsIndex, sourceForm);
		/* begin fetchIntOrFloat:ofObject: */
		fieldOop = fetchPointerofObject(FormWidthIndex, sourceForm);
		if (isIntegerObject(fieldOop)) {
			sourceWidth = integerValueOf(fieldOop);
			goto l5;
		}
		floatValue = floatValueOf(fieldOop);
		if (!((-2.147483648e9 <= floatValue)
			 && (floatValue <= 2.147483647e9))) {
			primitiveFail();
			sourceWidth = 0;
			goto l5;
		}
		sourceWidth = ((sqInt)floatValue);
	l5:	/* end fetchIntOrFloat:ofObject: */;
		/* begin fetchIntOrFloat:ofObject: */
		fieldOop1 = fetchPointerofObject(FormHeightIndex, sourceForm);
		if (isIntegerObject(fieldOop1)) {
			sourceHeight = integerValueOf(fieldOop1);
			goto l6;
		}
		floatValue1 = floatValueOf(fieldOop1);
		if (!((-2.147483648e9 <= floatValue1)
			 && (floatValue1 <= 2.147483647e9))) {
			primitiveFail();
			sourceHeight = 0;
			goto l6;
		}
		sourceHeight = ((sqInt)floatValue1);
	l6:	/* end fetchIntOrFloat:ofObject: */;
		if (!((sourceWidth >= 0)
			 && (sourceHeight >= 0))) {
			ok = 0;
			goto l7;
		}
		sourceDepth = fetchIntegerofObject(FormDepthIndex, sourceForm);
		if (!((sourceMSB = sourceDepth > 0))) {
			sourceDepth = 0 - sourceDepth;
		}
		if (isIntegerObject(sourceBits)) {

			/* Query for actual surface dimensions */
			if (querySurfaceFn == 0) {
				if (!(loadSurfacePlugin())) {
					ok = 0;
					goto l7;
				}
			}
			if (!(querySurfaceFn(integerValueOf(sourceBits), (&sourceWidth), (&sourceHeight), (&sourceDepth), (&sourceMSB)))) {
				primitiveFailFor(PrimErrCallbackError);
				ok = 0;
				goto l7;
			}
			sourcePPW = 32 / sourceDepth;
			sourceBits = (sourcePitch = 0);
		}
		else {
			if (!(isWordsOrBytes(sourceBits))) {
				ok = 0;
				goto l7;
			}
			sourcePPW = 32 / sourceDepth;
			sourcePitch = ((sourceWidth + (sourcePPW - 1)) / sourcePPW) * 4;
			sourceBitsSize = byteSizeOf(sourceBits);
			if (!(sourceBitsSize >= (sourcePitch * sourceHeight))) {
				ok = 0;
				goto l7;
			}
			sourceBits = oopForPointer(firstIndexableField(sourceBits));
		}
		ok = 1;
	l7:	/* end loadBitBltSourceForm */;
		if (!ok) {
			return 0;
		}
		/* begin loadColorMap */
		cmFlags = (cmMask = (cmBitsPerColor = 0));
		cmShiftTable = null;
		cmMaskTable = null;
		cmLookupTable = null;
		cmOop = fetchPointerofObject(BBColorMapIndex, bitBltOop);
		if (cmOop == (nilObject())) {
			ok = 1;
			goto l10;
		}

		/* even if identity or somesuch - may be cleared later */
		cmFlags = ColorMapPresent;
		oldStyle = 0;
		if (isWords(cmOop)) {

			/* This is an old-style color map (indexed only, with implicit RGBA conversion) */
			cmSize = slotSizeOf(cmOop);
			cmLookupTable = firstIndexableField(cmOop);
			oldStyle = 1;
		}
		else {

			/* A new-style color map (fully qualified) */
			if (!((isPointers(cmOop))
				 && ((slotSizeOf(cmOop)) >= 3))) {
				ok = 0;
				goto l10;
			}
			/* begin loadColorMapShiftOrMaskFrom: */
			mapOop = fetchPointerofObject(0, cmOop);
			if (mapOop == (nilObject())) {
				cmShiftTable = ((void *) null);
				goto l8;
			}
			if (!((isWords(mapOop))
				 && ((slotSizeOf(mapOop)) == 4))) {
				primitiveFail();
				cmShiftTable = ((void *) null);
				goto l8;
			}
			cmShiftTable = ((void *) (firstIndexableField(mapOop)));
	l8:	/* end loadColorMapShiftOrMaskFrom: */;
			/* begin loadColorMapShiftOrMaskFrom: */
			mapOop1 = fetchPointerofObject(1, cmOop);
			if (mapOop1 == (nilObject())) {
				cmMaskTable = ((void *) null);
				goto l9;
			}
			if (!((isWords(mapOop1))
				 && ((slotSizeOf(mapOop1)) == 4))) {
				primitiveFail();
				cmMaskTable = ((void *) null);
				goto l9;
			}
			cmMaskTable = ((void *) (firstIndexableField(mapOop1)));
	l9:	/* end loadColorMapShiftOrMaskFrom: */;
			oop = fetchPointerofObject(2, cmOop);
			if (oop == (nilObject())) {
				cmSize = 0;
			}
			else {
				if (!(isWords(oop))) {
					ok = 0;
					goto l10;
				}
				cmSize = slotSizeOf(oop);
				cmLookupTable = firstIndexableField(oop);
			}
			cmFlags = cmFlags | ColorMapNewStyle;
		}
		if (!((cmSize & (cmSize - 1)) == 0)) {
			ok = 0;
			goto l10;
		}
		cmMask = cmSize - 1;
		cmBitsPerColor = 0;
		if (cmSize == 512) {
			cmBitsPerColor = 3;
		}
		if (cmSize == 4096) {
			cmBitsPerColor = 4;
		}
		if (cmSize == 32768) {
			cmBitsPerColor = 5;
		}
		if (cmSize == 0) {
			cmLookupTable = null;
			cmMask = 0;
		}
		else {
			cmFlags = cmFlags | ColorMapIndexedPart;
		}
		if (oldStyle) {

			/* needs implicit conversion */
			setupColorMasks();
		}
		if (isIdentityMapwith(cmShiftTable, cmMaskTable)) {
			cmMaskTable = null;
			cmShiftTable = null;
		}
		else {
			cmFlags = cmFlags | ColorMapFixedPart;
		}
		ok = 1;
	l10:	/* end loadColorMap */;
		if (!ok) {
			return 0;
		}
		if ((cmFlags & ColorMapNewStyle) == 0) {
			setupColorMasks();
		}
		sourceX = fetchIntOrFloatofObjectifNil(BBSourceXIndex, bitBltOop, 0);
		sourceY = fetchIntOrFloatofObjectifNil(BBSourceYIndex, bitBltOop, 0);
	}
	/* begin loadHalftoneForm */
	if (noHalftone) {
		halftoneBase = null;
		ok = 1;
		goto l4;
	}
	if ((isPointers(halftoneForm))
	 && ((slotSizeOf(halftoneForm)) >= 4)) {

		/* Old-style 32xN monochrome halftone Forms */
		halftoneBits = fetchPointerofObject(FormBitsIndex, halftoneForm);
		halftoneHeight = fetchIntegerofObject(FormHeightIndex, halftoneForm);
		if (!(isWords(halftoneBits))) {
			noHalftone = 1;
		}
	}
	else {

		/* New spec accepts, basically, a word array */
		if (!(isWords(halftoneForm))) {
			ok = 0;
			goto l4;
		}
		halftoneBits = halftoneForm;
		halftoneHeight = slotSizeOf(halftoneBits);
	}
	halftoneBase = oopForPointer(firstIndexableField(halftoneBits));
	ok = 1;
	l4:	/* end loadHalftoneForm */;
	if (!ok) {
		return 0;
	}
	clipX = fetchIntOrFloatofObjectifNil(BBClipXIndex, bitBltOop, 0);
	clipY = fetchIntOrFloatofObjectifNil(BBClipYIndex, bitBltOop, 0);
	clipWidth = fetchIntOrFloatofObjectifNil(BBClipWidthIndex, bitBltOop, destWidth);
	clipHeight = fetchIntOrFloatofObjectifNil(BBClipHeightIndex, bitBltOop, destHeight);
	if (failed()) {
		return 0;
	}
	if (clipX < 0) {
		clipWidth += clipX;
		clipX = 0;
	}
	if (clipY < 0) {
		clipHeight += clipY;
		clipY = 0;
	}
	if ((clipX + clipWidth) > destWidth) {
		clipWidth = destWidth - clipX;
	}
	if ((clipY + clipHeight) > destHeight) {
		clipHeight = destHeight - clipY;
	}
	if (numGCsOnInvocation != (statNumGCs())) {

		/* querySurface could be a callback in loadSourceFor: and loadDestForm: */
		primitiveFailFor(PrimErrObjectMoved);
		return 0;
	}
	return 1;
}


/*	Load the source form for BitBlt. Return false if anything is wrong, true
	otherwise. 
 */

	/* BitBltSimulation>>#loadBitBltSourceForm */
static sqInt
loadBitBltSourceForm(void)
{
	sqInt fieldOop;
	sqInt fieldOop1;
	double floatValue;
	double floatValue1;
	sqInt sourceBitsSize;
	sqInt v;

	v = 0;
	if (!((isPointers(sourceForm))
		 && ((slotSizeOf(sourceForm)) >= 4))) {
		return 0;
	}
	sourceBits = fetchPointerofObject(FormBitsIndex, sourceForm);
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop = fetchPointerofObject(FormWidthIndex, sourceForm);
	if (isIntegerObject(fieldOop)) {
		sourceWidth = integerValueOf(fieldOop);
		goto l1;
	}
	floatValue = floatValueOf(fieldOop);
	if (!((-2.147483648e9 <= floatValue)
		 && (floatValue <= 2.147483647e9))) {
		primitiveFail();
		sourceWidth = 0;
		goto l1;
	}
	sourceWidth = ((sqInt)floatValue);
	l1:	/* end fetchIntOrFloat:ofObject: */;
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop1 = fetchPointerofObject(FormHeightIndex, sourceForm);
	if (isIntegerObject(fieldOop1)) {
		sourceHeight = integerValueOf(fieldOop1);
		goto l2;
	}
	floatValue1 = floatValueOf(fieldOop1);
	if (!((-2.147483648e9 <= floatValue1)
		 && (floatValue1 <= 2.147483647e9))) {
		primitiveFail();
		sourceHeight = 0;
		goto l2;
	}
	sourceHeight = ((sqInt)floatValue1);
	l2:	/* end fetchIntOrFloat:ofObject: */;
	if (!((sourceWidth >= 0)
		 && (sourceHeight >= 0))) {
		return 0;
	}
	sourceDepth = fetchIntegerofObject(FormDepthIndex, sourceForm);
	if (!((sourceMSB = sourceDepth > 0))) {
		sourceDepth = 0 - sourceDepth;
	}
	if (isIntegerObject(sourceBits)) {

		/* Query for actual surface dimensions */
		if (querySurfaceFn == 0) {
			if (!(loadSurfacePlugin())) {
				return 0;
			}
		}
		if (!(querySurfaceFn(integerValueOf(sourceBits), (&sourceWidth), (&sourceHeight), (&sourceDepth), (&sourceMSB)))) {
			primitiveFailFor(PrimErrCallbackError);
			return 0;
		}
		sourcePPW = 32 / sourceDepth;
		sourceBits = (sourcePitch = 0);
	}
	else {
		if (!(isWordsOrBytes(sourceBits))) {
			return 0;
		}
		sourcePPW = 32 / sourceDepth;
		sourcePitch = ((sourceWidth + (sourcePPW - 1)) / sourcePPW) * 4;
		sourceBitsSize = byteSizeOf(sourceBits);
		if (!(sourceBitsSize >= (sourcePitch * sourceHeight))) {
			return 0;
		}
		sourceBits = oopForPointer(firstIndexableField(sourceBits));
	}
	return 1;
}


/*	ColorMap, if not nil, must be longWords, and 
	2^N long, where N = sourceDepth for 1, 2, 4, 8 bits, 
	or N = 9, 12, or 15 (3, 4, 5 bits per color) for 16 or 32 bits. */

	/* BitBltSimulation>>#loadColorMap */
static sqInt
loadColorMap(void)
{
	sqInt cmOop;
	sqInt cmSize;
	sqInt mapOop;
	sqInt mapOop1;
	sqInt oldStyle;
	sqInt oop;

	cmFlags = (cmMask = (cmBitsPerColor = 0));
	cmShiftTable = null;
	cmMaskTable = null;
	cmLookupTable = null;
	cmOop = fetchPointerofObject(BBColorMapIndex, bitBltOop);
	if (cmOop == (nilObject())) {
		return 1;
	}

	/* even if identity or somesuch - may be cleared later */
	cmFlags = ColorMapPresent;
	oldStyle = 0;
	if (isWords(cmOop)) {

		/* This is an old-style color map (indexed only, with implicit RGBA conversion) */
		cmSize = slotSizeOf(cmOop);
		cmLookupTable = firstIndexableField(cmOop);
		oldStyle = 1;
	}
	else {

		/* A new-style color map (fully qualified) */
		if (!((isPointers(cmOop))
			 && ((slotSizeOf(cmOop)) >= 3))) {
			return 0;
		}
		/* begin loadColorMapShiftOrMaskFrom: */
		mapOop = fetchPointerofObject(0, cmOop);
		if (mapOop == (nilObject())) {
			cmShiftTable = ((void *) null);
			goto l1;
		}
		if (!((isWords(mapOop))
			 && ((slotSizeOf(mapOop)) == 4))) {
			primitiveFail();
			cmShiftTable = ((void *) null);
			goto l1;
		}
		cmShiftTable = ((void *) (firstIndexableField(mapOop)));
	l1:	/* end loadColorMapShiftOrMaskFrom: */;
		/* begin loadColorMapShiftOrMaskFrom: */
		mapOop1 = fetchPointerofObject(1, cmOop);
		if (mapOop1 == (nilObject())) {
			cmMaskTable = ((void *) null);
			goto l2;
		}
		if (!((isWords(mapOop1))
			 && ((slotSizeOf(mapOop1)) == 4))) {
			primitiveFail();
			cmMaskTable = ((void *) null);
			goto l2;
		}
		cmMaskTable = ((void *) (firstIndexableField(mapOop1)));
	l2:	/* end loadColorMapShiftOrMaskFrom: */;
		oop = fetchPointerofObject(2, cmOop);
		if (oop == (nilObject())) {
			cmSize = 0;
		}
		else {
			if (!(isWords(oop))) {
				return 0;
			}
			cmSize = slotSizeOf(oop);
			cmLookupTable = firstIndexableField(oop);
		}
		cmFlags = cmFlags | ColorMapNewStyle;
	}
	if (!((cmSize & (cmSize - 1)) == 0)) {
		return 0;
	}
	cmMask = cmSize - 1;
	cmBitsPerColor = 0;
	if (cmSize == 512) {
		cmBitsPerColor = 3;
	}
	if (cmSize == 4096) {
		cmBitsPerColor = 4;
	}
	if (cmSize == 32768) {
		cmBitsPerColor = 5;
	}
	if (cmSize == 0) {
		cmLookupTable = null;
		cmMask = 0;
	}
	else {
		cmFlags = cmFlags | ColorMapIndexedPart;
	}
	if (oldStyle) {

		/* needs implicit conversion */
		setupColorMasks();
	}
	if (isIdentityMapwith(cmShiftTable, cmMaskTable)) {
		cmMaskTable = null;
		cmShiftTable = null;
	}
	else {
		cmFlags = cmFlags | ColorMapFixedPart;
	}
	return 1;
}

	/* BitBltSimulation>>#loadColorMapShiftOrMaskFrom: */
static void *
loadColorMapShiftOrMaskFrom(sqInt mapOop)
{
	if (mapOop == (nilObject())) {
		return null;
	}
	if (!((isWords(mapOop))
		 && ((slotSizeOf(mapOop)) == 4))) {
		primitiveFail();
		return null;
	}
	return firstIndexableField(mapOop);
}


/*	Load the halftone form */

	/* BitBltSimulation>>#loadHalftoneForm */
static sqInt
loadHalftoneForm(void)
{
	sqInt halftoneBits;

	if (noHalftone) {
		halftoneBase = null;
		return 1;
	}
	if ((isPointers(halftoneForm))
	 && ((slotSizeOf(halftoneForm)) >= 4)) {

		/* Old-style 32xN monochrome halftone Forms */
		halftoneBits = fetchPointerofObject(FormBitsIndex, halftoneForm);
		halftoneHeight = fetchIntegerofObject(FormHeightIndex, halftoneForm);
		if (!(isWords(halftoneBits))) {
			noHalftone = 1;
		}
	}
	else {

		/* New spec accepts, basically, a word array */
		if (!(isWords(halftoneForm))) {
			return 0;
		}
		halftoneBits = halftoneForm;
		halftoneHeight = slotSizeOf(halftoneBits);
	}
	halftoneBase = oopForPointer(firstIndexableField(halftoneBits));
	return 1;
}


/*	Load the surface support plugin */

	/* BitBltSimulation>>#loadSurfacePlugin */
static sqInt
loadSurfacePlugin(void)
{
	querySurfaceFn = ioLoadFunctionFrom("ioGetSurfaceFormat", "SurfacePlugin");
	lockSurfaceFn = ioLoadFunctionFrom("ioLockSurface", "SurfacePlugin");
	unlockSurfaceFn = ioLoadFunctionFrom("ioUnlockSurface", "SurfacePlugin");
	return (querySurfaceFn != 0)
	 && ((lockSurfaceFn != 0)
	 && (unlockSurfaceFn != 0));
}

	/* BitBltSimulation>>#loadWarpBltFrom: */
static sqInt
loadWarpBltFrom(sqInt bbObj)
{
	return loadBitBltFromwarping(bbObj, 1);
}


/*	Get a pointer to the bits of any OS surfaces. */
/*	Notes: 
	* For equal source/dest handles only one locking operation is performed.
	This is to prevent locking of overlapping areas which does not work with
	certain APIs (as an example, DirectDraw prevents locking of overlapping
	areas). 
	A special case for non-overlapping but equal source/dest handle would 
	be possible but we would have to transfer this information over to 
	unlockSurfaces somehow (currently, only one unlock operation is 
	performed for equal source and dest handles). Also, this would require
	a change in the notion of ioLockSurface() which is right now interpreted
	as a hint and not as a requirement to lock only the specific portion of
	the surface.
	
	* The arguments in ioLockSurface() provide the implementation with
	an explicit hint what area is affected. It can be very useful to
	know the max. affected area beforehand if getting the bits requires
	expensive copy operations (e.g., like a roundtrip to the X server or a
	glReadPixel op).
	However, the returned pointer *MUST* point to the virtual origin of the
	surface and not to the beginning of the rectangle. The promise made by
	BitBlt is to never access data outside the given rectangle (aligned to
	4byte boundaries!)
	so it is okay to return a pointer to the virtual origin that is actually
	outside the valid memory area.
	
	* The area provided in ioLockSurface() is already clipped (e.g., it will
	always be inside the source and dest boundingBox) but it is not aligned to
	word boundaries
	yet. It is up to the support code to compute accurate alignment if
	necessary. 
	* Warping always requires the entire source surface to be locked because
	there is no beforehand knowledge about what area will actually be
	traversed. 
	* Fail if a GC has occurred since the primitive started (presumably in the
	lockSurface function), because one or more of the primitives' parameters
	may have been moved.
	
 */

	/* BitBltSimulation>>#lockSurfaces */
static sqInt
lockSurfaces(void)
{
	sqInt b;
	sqInt destHandle;
	sqInt l;
	sqInt r;
	sqInt sourceHandle;
	sqInt t;
	sqInt v;

	v = 0;
	assert(numGCsOnInvocation == (statNumGCs()));
	hasSurfaceLock = 0;
	if (destBits == 0) {

		/* Blitting *to* OS surface */
		if (lockSurfaceFn == 0) {
			if (!(loadSurfacePlugin())) {
				return 0;
			}
		}
		destHandle = fetchIntegerofObject(FormBitsIndex, destForm);
		if (!((sourceBits != 0)
			 || (noSource))) {

			/* Handle the special case of equal source and dest handles */
			sourceHandle = fetchIntegerofObject(FormBitsIndex, sourceForm);
			if (sourceHandle == destHandle) {

				/* If we have overlapping source/dest we lock the entire area
				   so that there is only one area transmitted */
				if (isWarping) {

					/* Otherwise use overlapping area */
					l = ((sx < dx) ? sx : dx);
					r = (((sx < dx) ? dx : sx)) + bbW;
					t = ((sy < dy) ? sy : dy);
					b = (((sy < dy) ? dy : sy)) + bbH;
					sourceBits = lockSurfaceFn(sourceHandle, (&sourcePitch), l, t, r - l, b - t);
				}
				else {

					/* When warping we always need the entire surface for the source */
					sourceBits = lockSurfaceFn(sourceHandle, (&sourcePitch), 0, 0, sourceWidth, sourceHeight);
				}
				destBits = sourceBits;
				destPitch = sourcePitch;
				hasSurfaceLock = 1;
				if (numGCsOnInvocation != (statNumGCs())) {
					unlockSurfaces();
					primitiveFailFor(PrimErrObjectMoved);
					return 0;
				}
				if (destBits == 0) {
					unlockSurfaces();
					primitiveFailFor(PrimErrCallbackError);
					return 0;
				}
				endOfDestination = (endOfSource = sourceBits + (sourcePitch * sourceHeight));
				return 1;
			}
		}
		destBits = lockSurfaceFn(destHandle, (&destPitch), dx, dy, bbW, bbH);
		hasSurfaceLock = 1;
		if (numGCsOnInvocation != (statNumGCs())) {
			unlockSurfaces();
			primitiveFailFor(PrimErrObjectMoved);
			return 0;
		}
		if (destBits == 0) {
			primitiveFailFor(PrimErrCallbackError);
		}
	}
	if (!((sourceBits != 0)
		 || (noSource))) {

		/* Blitting *from* OS surface */
		sourceHandle = fetchIntegerofObject(FormBitsIndex, sourceForm);
		if (failed()) {
			return 0;
		}
		if (lockSurfaceFn == 0) {
			if (!(loadSurfacePlugin())) {
				return 0;
			}
		}
		if (isWarping) {
			sourceBits = lockSurfaceFn(sourceHandle, (&sourcePitch), 0, 0, sourceWidth, sourceHeight);
		}
		else {
			sourceBits = lockSurfaceFn(sourceHandle, (&sourcePitch), sx, sy, bbW, bbH);
		}
		hasSurfaceLock = 1;
		if (numGCsOnInvocation != (statNumGCs())) {
			unlockSurfaces();
			primitiveFailFor(PrimErrObjectMoved);
			return 0;
		}
		if (sourceBits == 0) {
			primitiveFailFor(PrimErrCallbackError);
		}
	}
	endOfSource = (noSource
	 || (sourceBits == 0)
		? 0
		: sourceBits + (sourcePitch * sourceHeight));
	endOfDestination = destBits + (destPitch * destHeight);
	return (destBits != 0)
	 && ((sourceBits != 0)
	 || (noSource));
}


/*	Color map the given source pixel. */

	/* BitBltSimulation>>#mapPixel:flags: */
static sqInt
mapPixelflags(sqInt sourcePixel, sqInt mapperFlags)
{
	sqInt pv;
	sqInt val;

	pv = sourcePixel;
	if ((mapperFlags & ColorMapPresent) != 0) {
		if ((mapperFlags & ColorMapFixedPart) != 0) {
			/* begin rgbMapPixel:flags: */
			val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourcePixel & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
			val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourcePixel & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
			val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourcePixel & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
			pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourcePixel & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
			if ((pv == 0)
			 && (sourcePixel != 0)) {
				pv = 1;
			}
		}
		if ((mapperFlags & ColorMapIndexedPart) != 0) {
			pv = cmLookupTable[pv & cmMask];
		}
	}
	return pv;
}


/*	Sender warpLoop is too big to include this in-line */

	/* BitBltSimulation>>#merge:with: */
static unsigned int
mergewith(unsigned int sourceWord, unsigned int destinationWord)
{
	unsigned int (*mergeFnwith)(unsigned int, unsigned int);

	mergeFnwith = ((unsigned int (*)(unsigned int, unsigned int)) (opTable[combinationRule + 1]));
	return mergeFnwith(sourceWord, destinationWord);
}


/*	The module with the given name was just unloaded.
	Make sure we have no dangling references. */

	/* BitBltSimulation>>#moduleUnloaded: */
EXPORT(sqInt)
moduleUnloaded(char *aModuleName)
{
	if ((strcmp(aModuleName, "SurfacePlugin")) == 0) {

		/* The surface plugin just shut down. How nasty. */
		querySurfaceFn = 0;
		lockSurfaceFn = 0;
		unlockSurfaceFn = 0;
	}
	return 0;
}


/*	Subract the pixels in the source and destination, color by color,
	and return the sum of the absolute value of all the differences.
	For non-rgb, XOR the two and return the number of differing pixels.
	Note that the region is not clipped to bit boundaries, but only to the
	nearest (enclosing) word. This is because copyLoop does not do
	pre-merge masking. For accurate results, you must subtract the
	values obtained from the left and right fringes. */

	/* BitBltSimulation>>#OLDrgbDiff:with: */
static sqInt
OLDrgbDiffwith(sqInt sourceWord, sqInt destinationWord)
{
	sqInt diff;
	int pixMask;

	if (destDepth < 16) {

		/* Just xor and count differing bits if not RGB */
		diff = sourceWord ^ destinationWord;
		pixMask = maskTable[destDepth];
		while (!(diff == 0)) {
			if ((diff & pixMask) != 0) {
				bitCount += 1;
			}
			diff = ((usqInt) diff) >> destDepth;
		}
		return destinationWord;
	}
	if (destDepth == 16) {
		diff = partitionedSubfromnBitsnPartitions(sourceWord, destinationWord, 5, 3);
		bitCount = ((bitCount + (diff & 0x1F)) + ((((usqInt) diff) >> 5) & 0x1F)) + ((((usqInt) diff) >> 10) & 0x1F);
		diff = partitionedSubfromnBitsnPartitions(((usqInt) sourceWord) >> 16, ((usqInt) destinationWord) >> 16, 5, 3);
		bitCount = ((bitCount + (diff & 0x1F)) + ((((usqInt) diff) >> 5) & 0x1F)) + ((((usqInt) diff) >> 10) & 0x1F);
	}
	else {
		diff = partitionedSubfromnBitsnPartitions(sourceWord, destinationWord, 8, 3);
		bitCount = ((bitCount + (diff & 0xFF)) + ((((usqInt) diff) >> 8) & 0xFF)) + ((((usqInt) diff) >> 16) & 0xFF);
	}
	return destinationWord;
}


/*	Tally pixels into the color map. Note that the source should be 
	specified = destination, in order for the proper color map checks 
	to be performed at setup.
	Note that the region is not clipped to bit boundaries, but only to the
	nearest (enclosing) word. This is because copyLoop does not do
	pre-merge masking. For accurate results, you must subtract the
	values obtained from the left and right fringes. */

	/* BitBltSimulation>>#OLDtallyIntoMap:with: */
static sqInt
OLDtallyIntoMapwith(sqInt sourceWord, sqInt destinationWord)
{
	sqInt d;
	sqInt d1;
	sqInt d2;
	sqInt destPix;
	sqInt destPix1;
	sqInt destPix2;
	sqInt i;
	sqInt mapIndex;
	sqInt mask;
	sqInt mask3;
	sqInt mask4;
	sqInt pixMask;
	sqInt shiftWord;
	sqInt srcPix;
	sqInt srcPix1;
	sqInt srcPix2;
	sqInt value;
	sqInt value1;
	sqInt value2;
	sqInt value3;

	if (!((cmFlags & (ColorMapPresent | ColorMapIndexedPart)) == (ColorMapPresent | ColorMapIndexedPart))) {
		return destinationWord;
	}
	if (destDepth < 16) {

		/* loop through all packed pixels. */
		pixMask = (maskTable[destDepth]) & cmMask;
		shiftWord = destinationWord;
		for (i = 1; i <= destPPW; i += 1) {
			mapIndex = shiftWord & pixMask;
			/* begin tallyMapAt:put: */
			value = (cmLookupTable[mapIndex & cmMask]) + 1;
			cmLookupTable[mapIndex & cmMask] = value;
			shiftWord = ((usqInt) shiftWord) >> destDepth;
		}
		return destinationWord;
	}
	if (destDepth == 16) {

		/* Two pixels  Tally the right half... */
		/* begin rgbMap:from:to: */
		if (((d = cmBitsPerColor - 5)) > 0) {

			/* Expand to more bits by zero-fill */

			/* Transfer mask */
			mask = (1U << 5) - 1;
			srcPix = ((sqInt)((usqInt)((destinationWord & 0xFFFF)) << d));
			mask = ((sqInt)((usqInt)(mask) << d));
			destPix = srcPix & mask;
			mask = ((sqInt)((usqInt)(mask) << cmBitsPerColor));
			srcPix = ((sqInt)((usqInt)(srcPix) << d));
			mapIndex = (destPix + (srcPix & mask)) + ((((sqInt)((usqInt)(srcPix) << d))) & (((sqInt)((usqInt)(mask) << cmBitsPerColor))));
			goto l2;
		}
		else {

			/* Compress to fewer bits by truncation */
			if (d == 0) {
				if (5 == 5) {

					/* Sometimes called with 16 bits, though pixel is 15,
					   but we must never return more than 15. */
					mapIndex = (destinationWord & 0xFFFF) & 0x7FFF;
					goto l2;
				}
				if (5 == 8) {

					/* Sometimes called with 32 bits, though pixel is 24,
					   but we must never return more than 24. */
					mapIndex = (destinationWord & 0xFFFF) & 0xFFFFFF;
					goto l2;
				}
				mapIndex = destinationWord & 0xFFFF;
				goto l2;
			}
			if ((destinationWord & 0xFFFF) == 0) {
				mapIndex = destinationWord & 0xFFFF;
				goto l2;
			}
			d = 5 - cmBitsPerColor;

			/* Transfer mask */
			mask = (1U << cmBitsPerColor) - 1;
			srcPix = ((usqInt) (destinationWord & 0xFFFF)) >> d;
			destPix = srcPix & mask;
			mask = ((sqInt)((usqInt)(mask) << cmBitsPerColor));
			srcPix = ((usqInt) srcPix) >> d;
			destPix = (destPix + (srcPix & mask)) + ((((usqInt) srcPix) >> d) & (((sqInt)((usqInt)(mask) << cmBitsPerColor))));
			if (destPix == 0) {
				mapIndex = 1;
				goto l2;
			}
			mapIndex = destPix;
			goto l2;
		}
	l2:	/* end rgbMap:from:to: */;
		/* begin tallyMapAt:put: */
		value1 = (cmLookupTable[mapIndex & cmMask]) + 1;
		cmLookupTable[mapIndex & cmMask] = value1;
		/* begin rgbMap:from:to: */
		if (((d1 = cmBitsPerColor - 5)) > 0) {

			/* Expand to more bits by zero-fill */

			/* Transfer mask */
			mask3 = (1U << 5) - 1;
			srcPix1 = ((sqInt)((usqInt)((((usqInt) destinationWord) >> 16)) << d1));
			mask3 = ((sqInt)((usqInt)(mask3) << d1));
			destPix1 = srcPix1 & mask3;
			mask3 = ((sqInt)((usqInt)(mask3) << cmBitsPerColor));
			srcPix1 = ((sqInt)((usqInt)(srcPix1) << d1));
			mapIndex = (destPix1 + (srcPix1 & mask3)) + ((((sqInt)((usqInt)(srcPix1) << d1))) & (((sqInt)((usqInt)(mask3) << cmBitsPerColor))));
			goto l4;
		}
		else {

			/* Compress to fewer bits by truncation */
			if (d1 == 0) {
				if (5 == 5) {

					/* Sometimes called with 16 bits, though pixel is 15,
					   but we must never return more than 15. */
					mapIndex = (((usqInt) destinationWord) >> 16) & 0x7FFF;
					goto l4;
				}
				if (5 == 8) {

					/* Sometimes called with 32 bits, though pixel is 24,
					   but we must never return more than 24. */
					mapIndex = (((usqInt) destinationWord) >> 16) & 0xFFFFFF;
					goto l4;
				}
				mapIndex = ((usqInt) destinationWord) >> 16;
				goto l4;
			}
			if ((((usqInt) destinationWord) >> 16) == 0) {
				mapIndex = ((usqInt) destinationWord) >> 16;
				goto l4;
			}
			d1 = 5 - cmBitsPerColor;

			/* Transfer mask */
			mask3 = (1U << cmBitsPerColor) - 1;
			srcPix1 = ((usqInt) (((usqInt) destinationWord) >> 16)) >> d1;
			destPix1 = srcPix1 & mask3;
			mask3 = ((sqInt)((usqInt)(mask3) << cmBitsPerColor));
			srcPix1 = ((usqInt) srcPix1) >> d1;
			destPix1 = (destPix1 + (srcPix1 & mask3)) + ((((usqInt) srcPix1) >> d1) & (((sqInt)((usqInt)(mask3) << cmBitsPerColor))));
			if (destPix1 == 0) {
				mapIndex = 1;
				goto l4;
			}
			mapIndex = destPix1;
			goto l4;
		}
	l4:	/* end rgbMap:from:to: */;
		/* begin tallyMapAt:put: */
		value2 = (cmLookupTable[mapIndex & cmMask]) + 1;
		cmLookupTable[mapIndex & cmMask] = value2;
	}
	else {

		/* Just one pixel. */
		/* begin rgbMap:from:to: */
		if (((d2 = cmBitsPerColor - 8)) > 0) {

			/* Expand to more bits by zero-fill */

			/* Transfer mask */
			mask4 = (1U << 8) - 1;
			srcPix2 = ((sqInt)((usqInt)(destinationWord) << d2));
			mask4 = ((sqInt)((usqInt)(mask4) << d2));
			destPix2 = srcPix2 & mask4;
			mask4 = ((sqInt)((usqInt)(mask4) << cmBitsPerColor));
			srcPix2 = ((sqInt)((usqInt)(srcPix2) << d2));
			mapIndex = (destPix2 + (srcPix2 & mask4)) + ((((sqInt)((usqInt)(srcPix2) << d2))) & (((sqInt)((usqInt)(mask4) << cmBitsPerColor))));
			goto l6;
		}
		else {

			/* Compress to fewer bits by truncation */
			if (d2 == 0) {
				if (8 == 5) {

					/* Sometimes called with 16 bits, though pixel is 15,
					   but we must never return more than 15. */
					mapIndex = destinationWord & 0x7FFF;
					goto l6;
				}
				if (8 == 8) {

					/* Sometimes called with 32 bits, though pixel is 24,
					   but we must never return more than 24. */
					mapIndex = destinationWord & 0xFFFFFF;
					goto l6;
				}
				mapIndex = destinationWord;
				goto l6;
			}
			if (destinationWord == 0) {
				mapIndex = destinationWord;
				goto l6;
			}
			d2 = 8 - cmBitsPerColor;

			/* Transfer mask */
			mask4 = (1U << cmBitsPerColor) - 1;
			srcPix2 = ((usqInt) destinationWord) >> d2;
			destPix2 = srcPix2 & mask4;
			mask4 = ((sqInt)((usqInt)(mask4) << cmBitsPerColor));
			srcPix2 = ((usqInt) srcPix2) >> d2;
			destPix2 = (destPix2 + (srcPix2 & mask4)) + ((((usqInt) srcPix2) >> d2) & (((sqInt)((usqInt)(mask4) << cmBitsPerColor))));
			if (destPix2 == 0) {
				mapIndex = 1;
				goto l6;
			}
			mapIndex = destPix2;
			goto l6;
		}
	l6:	/* end rgbMap:from:to: */;
		/* begin tallyMapAt:put: */
		value3 = (cmLookupTable[mapIndex & cmMask]) + 1;
		cmLookupTable[mapIndex & cmMask] = value3;
	}
	return destinationWord;
}


/*	Add word1 to word2 as nParts partitions of nBits each.
	This is useful for packed pixels, or packed colors */
/*	Use unsigned int everywhere because it has a well known arithmetic model
	without undefined behavior w.r.t. overflow and shifts
 */

	/* BitBltSimulation>>#partitionedAdd:to:nBits:componentMask:carryOverflowMask: */
static unsigned int
partitionedAddtonBitscomponentMaskcarryOverflowMask(unsigned int word1, unsigned int word2, sqInt nBits, unsigned int componentMask, unsigned int carryOverflowMask)
{
	unsigned int carryOverflow;
	unsigned int sum;
	unsigned int w1;
	unsigned int w2;


	/* mask to remove high bit of each component */
	w1 = word1 & carryOverflowMask;
	w2 = word2 & carryOverflowMask;

	/* sum without high bit to avoid overflowing over next component */
	sum = (word1 ^ w1) + (word2 ^ w2);

	/* detect overflow condition for saturating */
	carryOverflow = (w1 & w2) | ((w1 | w2) & sum);
	return ((sum ^ w1) ^ w2) | ((((usqInt) carryOverflow) >> (nBits - 1)) * componentMask);
}


/*	AND word1 to word2 as nParts partitions of nBits each.
	Any field of word1 not all-ones is treated as all-zeroes.
	Used for erasing, eg, brush shapes prior to ORing in a color */

	/* BitBltSimulation>>#partitionedAND:to:nBits:nPartitions: */
static unsigned int
partitionedANDtonBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts)
{
	sqInt i;
	sqInt mask;
	unsigned int result;


	/* partition mask starts at the right */
	mask = maskTable[nBits];
	result = 0;
	for (i = 1; i <= nParts; i += 1) {
		if ((word1 & mask) == mask) {
			result = result | (word2 & mask);
		}

		/* slide left to next partition */
		mask = ((sqInt)((usqInt)(mask) << nBits));
	}
	return result;
}


/*	Max word1 to word2 as nParts partitions of nBits each */
/*	In C, most arithmetic operations answer the same bit pattern regardless of
	the operands being signed or unsigned ints
	(this is due to the way 2's complement numbers work). However,
	comparisions might fail. Add the proper declaration of
	words as unsigned int in those cases where comparisions are done (jmv)
 */

	/* BitBltSimulation>>#partitionedMax:with:nBits:nPartitions: */
static unsigned int
partitionedMaxwithnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts)
{
	sqInt i;
	unsigned int mask;
	unsigned int result;


	/* partition mask starts at the right */
	mask = maskTable[nBits];
	result = 0;
	for (i = 1; i <= nParts; i += 1) {
		result = result | ((((word2 & mask) < (word1 & mask)) ? (word1 & mask) : (word2 & mask)));

		/* slide left to next partition */
		mask = ((usqInt)(mask) << nBits);
	}
	return result;
}


/*	Min word1 to word2 as nParts partitions of nBits each */
/*	In C, most arithmetic operations answer the same bit pattern regardless of
	the operands being signed or unsigned ints
	(this is due to the way 2's complement numbers work). However,
	comparisions might fail. Add the proper declaration of
	words as unsigned int in those cases where comparisions are done (jmv)
 */

	/* BitBltSimulation>>#partitionedMin:with:nBits:nPartitions: */
static unsigned int
partitionedMinwithnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts)
{
	sqInt i;
	unsigned int mask;
	unsigned int result;


	/* partition mask starts at the right */
	mask = maskTable[nBits];
	result = 0;
	for (i = 1; i <= nParts; i += 1) {
		result = result | ((((word2 & mask) < (word1 & mask)) ? (word2 & mask) : (word1 & mask)));

		/* slide left to next partition */
		mask = ((usqInt)(mask) << nBits);
	}
	return result;
}


/*	Multiply word1 with word2 as nParts partitions of nBits each.
	This is useful for packed pixels, or packed colors.
	Bug in loop version when non-white background */
/*	In C, integer multiplication might answer a wrong value if the unsigned
	values are declared as signed.
	This problem does not affect this method, because the most significant bit
	(i.e. the sign bit) will
	always be zero (jmv)
 */

	/* BitBltSimulation>>#partitionedMul:with:nBits:nPartitions: */
static unsigned int
partitionedMulwithnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts)
{
	unsigned int dMask;
	unsigned int product;
	unsigned int result;
	unsigned int sMask;


	/* partition mask starts at the right */
	sMask = maskTable[nBits];
	dMask = ((usqInt)(sMask) << nBits);

	/* optimized first step */
	result = ((usqInt) (((((word1 & sMask) + 1) * ((word2 & sMask) + 1)) - 1) & dMask)) >> nBits;
	if (nParts == 1) {
		return result;
	}
	product = (((((((usqInt) word1) >> nBits) & sMask) + 1) * (((((usqInt) word2) >> nBits) & sMask) + 1)) - 1) & dMask;
	result = result | product;
	if (nParts == 2) {
		return result;
	}
	product = (((((((usqInt) word1) >> (2 * nBits)) & sMask) + 1) * (((((usqInt) word2) >> (2 * nBits)) & sMask) + 1)) - 1) & dMask;
	result = result | (((usqInt)(product) << nBits));
	if (nParts == 3) {
		return result;
	}
	product = (((((((usqInt) word1) >> (3 * nBits)) & sMask) + 1) * (((((usqInt) word2) >> (3 * nBits)) & sMask) + 1)) - 1) & dMask;
	result = result | (((usqInt)(product) << (2 * nBits)));
	return result;
}

	/* BitBltSimulation>>#partitionedRgbComponentAlpha:dest:nBits:nPartitions: */
static unsigned int
partitionedRgbComponentAlphadestnBitsnPartitions(unsigned int sourceWord, unsigned int destWord, sqInt nBits, sqInt nParts)
{
	sqInt d;
	sqInt destPix;
	sqInt i;
	unsigned int mask;
	sqInt mask3;
	unsigned int p1;
	unsigned int p2;
	unsigned int result;
	sqInt srcPix;
	sqInt v;


	/* partition mask starts at the right */
	mask = maskTable[nBits];
	result = 0;
	for (i = 1; i <= nParts; i += 1) {
		p1 = ((usqInt) (sourceWord & mask)) >> ((i - 1) * nBits);
		p2 = ((usqInt) (destWord & mask)) >> ((i - 1) * nBits);
		if (!(nBits == 32)) {
			if (nBits == 16) {
				p1 = (((((usqInt)((p1 & 0x1F)) << 3)) | (((usqInt)((p1 & 0x3E0)) << 6))) | (((usqInt)((p1 & 0x7C00)) << 9))) | 0xFF000000U;
				p2 = (((((usqInt)((p2 & 0x1F)) << 3)) | (((usqInt)((p2 & 0x3E0)) << 6))) | (((usqInt)((p2 & 0x7C00)) << 9))) | 0xFF000000U;
			}
			else {
				p1 = (rgbMapfromto(p1, nBits, 32)) | 0xFF000000U;
				p2 = (rgbMapfromto(p2, nBits, 32)) | 0xFF000000U;
			}
		}
		v = rgbComponentAlpha32with(p1, p2);
		if (!(nBits == 32)) {
			/* begin rgbMap:from:to: */
			if (((d = nBits - 32)) > 0) {

				/* Expand to more bits by zero-fill */

				/* Transfer mask */
				mask3 = (1ULL << 32) - 1;
				srcPix = ((sqInt)((usqInt)(v) << d));
				mask3 = ((sqInt)((usqInt)(mask3) << d));
				destPix = srcPix & mask3;
				mask3 = ((sqInt)((usqInt)(mask3) << nBits));
				srcPix = ((sqInt)((usqInt)(srcPix) << d));
				v = (destPix + (srcPix & mask3)) + ((((sqInt)((usqInt)(srcPix) << d))) & (((sqInt)((usqInt)(mask3) << nBits))));
				goto l1;
			}
			else {

				/* Compress to fewer bits by truncation */
				if (d == 0) {
					if (32 == 5) {

						/* Sometimes called with 16 bits, though pixel is 15,
						   but we must never return more than 15. */
						v = v & 0x7FFF;
						goto l1;
					}
					if (32 == 8) {

						/* Sometimes called with 32 bits, though pixel is 24,
						   but we must never return more than 24. */
						v = v & 0xFFFFFF;
						goto l1;
					}
					goto l1;
				}
				if (v == 0) {
					goto l1;
				}
				d = 32 - nBits;

				/* Transfer mask */
				mask3 = (1U << nBits) - 1;
				srcPix = ((usqInt) v) >> d;
				destPix = srcPix & mask3;
				mask3 = ((sqInt)((usqInt)(mask3) << nBits));
				srcPix = ((usqInt) srcPix) >> d;
				destPix = (destPix + (srcPix & mask3)) + ((((usqInt) srcPix) >> d) & (((sqInt)((usqInt)(mask3) << nBits))));
				if (destPix == 0) {
					v = 1;
					goto l1;
				}
				v = destPix;
				goto l1;
			}
	l1:	/* end rgbMap:from:to: */;
		}
		result = result | (((sqInt)((usqInt)(v) << ((i - 1) * nBits))));

		/* slide left to next partition */
		mask = ((usqInt)(mask) << nBits);
	}
	return result;
}


/*	Subtract word1 from word2 as nParts partitions of nBits each.
	This is useful for packed pixels, or packed colors */
/*	In C, most arithmetic operations answer the same bit pattern regardless of
	the operands being signed or unsigned ints
	(this is due to the way 2's complement numbers work). However,
	comparisions might fail. Add the proper declaration of
	words as unsigned int in those cases where comparisions are done (jmv)
 */

	/* BitBltSimulation>>#partitionedSub:from:nBits:nPartitions: */
static unsigned int
partitionedSubfromnBitsnPartitions(unsigned int word1, unsigned int word2, sqInt nBits, sqInt nParts)
{
	sqInt i;
	unsigned int mask;
	unsigned int p1;
	unsigned int p2;
	unsigned int result;


	/* partition mask starts at the right */
	mask = maskTable[nBits];
	result = 0;
	for (i = 1; i <= nParts; i += 1) {
		p1 = word1 & mask;
		p2 = word2 & mask;
		if (p1 < p2) {

			/* result is really abs value of thedifference */
			result = result | (p2 - p1);
		}
		else {
			result = result | (p1 - p2);
		}

		/* slide left to next partition */
		mask = ((usqInt)(mask) << nBits);
	}
	return result;
}


/*	Based on the values provided during setup choose and
	perform the appropriate inner loop function. */
/*	Should be inlined into caller for speed */

	/* BitBltSimulation>>#performCopyLoop */
static sqInt
performCopyLoop(void)
{
	sqInt dxLowBits;
	sqInt endBits;
	unsigned int m1;
	sqInt pixPerM1;
	sqInt pixPerM11;
	sqInt startBits;
	sqInt startBits1;
	sqInt sxLowBits;
	sqInt t;

	/* begin destMaskAndPointerInit */

	/* A mask, assuming power of two */
	/* how many pixels in first word */
	pixPerM1 = destPPW - 1;

	/* how many pixels in last word */
	startBits = destPPW - (dx & pixPerM1);
	endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
	if (destMSB) {
		mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
		mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
	}
	else {
		mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
		mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
	}
	if (bbW < startBits) {
		mask1 = mask1 & mask2;
		mask2 = 0;
		nWords = 1;
	}
	else {
		nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
	}

	/* defaults for no overlap with source */
	/* calculate byte addr and delta, based on first word of data */
	/* Note pitch is bytes and nWords is longs, not bytes */
	hDir = (vDir = 1);
	destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

	/* byte addr delta */
	destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	if (noSource) {

		/* Simple fill loop */
		copyLoopNoSource();
	}
	else {

		/* Loop using source and dest */
		/* begin checkSourceOverlap */
		if ((sourceForm == destForm)
		 && (dy >= sy)) {
			if (dy > sy) {

				/* have to start at bottom */
				vDir = -1;
				sy = (sy + bbH) - 1;
				dy = (dy + bbH) - 1;
			}
			else {
				if ((dy == sy) && (dx > sx)) {

					/* y's are equal, but x's are backward */
					hDir = -1;

					/* start at right */
					sx = (sx + bbW) - 1;

					/* and fix up masks */
					dx = (dx + bbW) - 1;
					if (nWords > 1) {
						t = mask1;
						mask1 = mask2;
						mask2 = t;
					}
				}
			}
			destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);
			destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
		}
		if ((sourceDepth != destDepth)
		 || ((cmFlags != 0)
		 || (sourceMSB != destMSB))) {

			/* If we must convert between pixel depths or use
			   color lookups or swap pixels use the general version */
			copyLoopPixMap();
		}
		else {

			/* Otherwise we simply copy pixels and can use a faster version */
			/* begin sourceSkewAndPointerInit */
			assert((destPPW == sourcePPW)
			 && ((destMSB == sourceMSB)
			 && (destDepth == sourceDepth)));

			/* A mask, assuming power of two */
			pixPerM11 = destPPW - 1;
			sxLowBits = sx & pixPerM11;

			/* how many pixels in first word */
			dxLowBits = dx & pixPerM11;
			startBits1 = (hDir > 0
				? sourcePPW - (sx & pixPerM11)
				: (((sx + bbW) - 1) & pixPerM11) + 1);
			m1 = (destMSB
				? ((usqInt) AllOnes) >> (32 - (startBits1 * destDepth))
				: ((usqInt)(AllOnes) << (32 - (startBits1 * destDepth))));

			/* i.e. there are some missing bits */
			/* calculate right-shift skew from source to dest */
			preload = (m1 & mask1) != mask1;

			/* -32..32 */
			skew = destDepth * ((sourceMSB
	? sxLowBits - dxLowBits
	: dxLowBits - sxLowBits));
			if (preload) {
				skew = (skew < 0
					? skew + 32
					: skew - 32);
			}

			/* calculate increments from end of 1 line to start of next */
			sourceIndex = (sourceBits + (sy * sourcePitch)) + ((sx / (32 / sourceDepth)) * 4);
			sourceDelta = (sourcePitch * vDir) - (4 * (nWords * hDir));
			if (preload) {

				/* Compensate for extra source word fetched */
				sourceDelta -= 4 * hDir;
			}
			assert(!((preload
 && (skew == 0))));
			assert(((skew >= -31) && (skew <= 0x1F)));
			copyLoop();
		}
	}
	return 0;
}


/*	Pick nPix pixels starting at srcBitIndex from the source, map by the
	color map, and justify them according to dstBitIndex in the resulting
	destWord. 
 */

	/* BitBltSimulation>>#pickSourcePixels:flags:srcMask:destMask:srcShiftInc:dstShiftInc: */
static unsigned int
pickSourcePixelsflagssrcMaskdestMasksrcShiftIncdstShiftInc(sqInt nPixels, sqInt mapperFlags, sqInt srcMask, sqInt dstMask, sqInt srcShiftInc, sqInt dstShiftInc)
{
	sqInt destPix;
	unsigned int destWord;
	sqInt dstShift;
	sqInt nPix;
	sqInt pv;
	sqInt sourcePix;
	unsigned int sourceWord;
	sqInt srcShift;
	sqInt val;


	/* oh please */
	destWord = 0;

	/* Hint: Keep in register */
	srcShift = srcBitShift;

	/* Hint: Keep in register */
	dstShift = dstBitShift;

	/* always > 0 so we can use do { } while(--nPix); */
	nPix = nPixels;
	if (mapperFlags == (ColorMapPresent | ColorMapIndexedPart)) {

		/* a little optimization for (pretty crucial) blits using indexed lookups only */
		/* grab, colormap and mix in pixel */
		do {
			assert((((usqInt)sourceIndex)) < endOfSource);
			sourceWord = long32At(sourceIndex);
			sourcePix = (((usqInt) sourceWord) >> srcShift) & srcMask;
			destPix = cmLookupTable[sourcePix & cmMask];

			/* adjust dest pix index */
			destWord = destWord | (((sqInt)((usqInt)((destPix & dstMask)) << dstShift)));

			/* adjust source pix index */
			dstShift += dstShiftInc;
			if (!((((srcShift += srcShiftInc)) & 0xFFFFFFE0U) == 0)) {
				srcShift = (sourceMSB
					? srcShift + 32
					: srcShift - 32);
				/* begin incSrcIndex: */
				sourceIndex += 4;
			}
		} while(!(((nPix -= 1)) == 0));
	}
	else {

		/* grab, colormap and mix in pixel */
		do {
			assert((((usqInt)sourceIndex)) < endOfSource);
			sourceWord = long32At(sourceIndex);
			sourcePix = (((usqInt) sourceWord) >> srcShift) & srcMask;
			/* begin mapPixel:flags: */
			pv = sourcePix;
			if ((mapperFlags & ColorMapPresent) != 0) {
				if ((mapperFlags & ColorMapFixedPart) != 0) {
					/* begin rgbMapPixel:flags: */
					val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourcePix & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
					val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourcePix & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
					val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourcePix & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
					pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourcePix & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
					if ((pv == 0)
					 && (sourcePix != 0)) {
						pv = 1;
					}
				}
				if ((mapperFlags & ColorMapIndexedPart) != 0) {
					pv = cmLookupTable[pv & cmMask];
				}
			}
			destPix = pv;

			/* adjust dest pix index */
			destWord = destWord | (((sqInt)((usqInt)((destPix & dstMask)) << dstShift)));

			/* adjust source pix index */
			dstShift += dstShiftInc;
			if (!((((srcShift += srcShiftInc)) & 0xFFFFFFE0U) == 0)) {
				srcShift = (sourceMSB
					? srcShift + 32
					: srcShift - 32);
				/* begin incSrcIndex: */
				sourceIndex += 4;
			}
		} while(!(((nPix -= 1)) == 0));
	}

	/* Store back */
	srcBitShift = srcShift;
	return destWord;
}


/*	Pick a single pixel from the source for WarpBlt.
	Note: This method is crucial for WarpBlt speed w/o smoothing
	and still relatively important when smoothing is used. */

	/* BitBltSimulation>>#pickWarpPixelAtX:y: */
static unsigned int
pickWarpPixelAtXy(sqInt xx, sqInt yy)
{
	sqInt sourcePix;
	unsigned int sourceWord;
	sqInt srcIndex;
	sqInt x;
	sqInt y;


	/* *please* */
	/* note: it would be much faster if we could just
	   avoid these stupid tests for being inside sourceForm. */
	if ((xx < 0)
	 || ((yy < 0)
	 || ((((x = ((usqInt) xx) >> BinaryPoint)) >= sourceWidth)
	 || (((y = ((usqInt) yy) >> BinaryPoint)) >= sourceHeight)))) {
		return 0;
	}
	srcIndex = (sourceBits + (y * sourcePitch)) + ((((usqInt) x) >> warpAlignShift) * 4);

	/* Extract pixel from word */
	assert((((usqInt)srcIndex)) < endOfSource);
	sourceWord = long32At(srcIndex);
	srcBitShift = warpBitShiftTable[x & warpAlignMask];
	sourcePix = (((usqInt) sourceWord) >> srcBitShift) & warpSrcMask;
	return sourcePix;
}


/*	Clear all pixels in destinationWord for which the pixels of sourceWord
	have the same values. Used to clear areas of some constant color to zero.
 */

	/* BitBltSimulation>>#pixClear:with: */
static unsigned int
pixClearwith(unsigned int sourceWord, unsigned int destinationWord)
{
	sqInt i;
	unsigned int mask;
	int nBits;
	unsigned int pv;
	unsigned int result;

	if (destDepth == 32) {
		if (sourceWord == destinationWord) {
			return 0;
		}
		else {
			return destinationWord;
		}
	}
	nBits = destDepth;

	/* partition mask starts at the right */
	mask = maskTable[nBits];
	result = 0;
	for (i = 1; i <= destPPW; i += 1) {
		pv = destinationWord & mask;
		if ((sourceWord & mask) == pv) {
			pv = 0;
		}
		result = result | pv;

		/* slide left to next partition */
		mask = ((usqInt)(mask) << nBits);
	}
	return result;
}

	/* BitBltSimulation>>#pixMask:with: */
static unsigned int
pixMaskwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return partitionedANDtonBitsnPartitions((unsigned int)~sourceWord, destinationWord, destDepth, destPPW);
}

	/* BitBltSimulation>>#pixPaint:with: */
static unsigned int
pixPaintwith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (sourceWord == 0) {
		return destinationWord;
	}
	return sourceWord | (partitionedANDtonBitsnPartitions((unsigned int)~sourceWord, destinationWord, destDepth, destPPW));
}


/*	Swap the pixels in destWord */

	/* BitBltSimulation>>#pixSwap:with: */
static unsigned int
pixSwapwith(unsigned int sourceWord, unsigned int destWord)
{
	unsigned int highMask;
	sqInt i;
	unsigned int lowMask;
	unsigned int result;
	int shift;

	if (destPPW == 1) {
		return destWord;
	}
	result = 0;

	/* mask low pixel */
	lowMask = (1U << destDepth) - 1;

	/* mask high pixel */
	highMask = ((usqInt)(lowMask) << ((destPPW - 1) * destDepth));
	shift = 32 - destDepth;
	result = result | ((((usqInt)((destWord & lowMask)) << shift)) | (((usqInt) (destWord & highMask)) >> shift));
	if (destPPW <= 2) {
		return result;
	}
	for (i = 2; i <= (destPPW / 2); i += 1) {
		lowMask = ((usqInt)(lowMask) << destDepth);
		highMask = ((usqInt) highMask) >> destDepth;
		shift -= destDepth * 2;
		result = result | ((((usqInt)((destWord & lowMask)) << shift)) | (((usqInt) (destWord & highMask)) >> shift));
	}
	return result;
}


/*	Invoke the pixel color comparing primitive.Only applicable if compiling
	with ENABLE_FAST_BLT */

	/* BitBltSimulation>>#primitiveCompareColorA:to:test: */
EXPORT(sqInt)
primitiveCompareColors(void)
{
	usqIntptr_t colorA;
	usqIntptr_t colorB;
	sqInt rcvr;
	sqInt testID;
	sqInt val;
	sqInt _return_value;

	val = 0;
	if (!((isPositiveMachineIntegerObject(stackValue(2)))
		 && ((isPositiveMachineIntegerObject(stackValue(1)))
		 && (isIntegerObject(stackValue(0)))))) {
		primitiveFailFor(PrimErrBadArgument);
		return null;
	}
	colorA = (BytesPerOop == 4
		? positive32BitValueOf(stackValue(2))
		: positive64BitValueOf(stackValue(2)));
	colorB = (BytesPerOop == 4
		? positive32BitValueOf(stackValue(1))
		: positive64BitValueOf(stackValue(1)));
	testID = stackIntegerValue(0);
	rcvr = stackValue(3);
	if (failed()) {
		return null;
	}
	
#  if ENABLE_FAST_BLT
	if (!(loadBitBltFromwarping(rcvr, 0))) {
		primitiveFail();
		return null;
	}
	clipRange();
	if ((bbW <= 0)
	 || (bbH <= 0)) {

		/* zero width or height; noop */
		primitiveFail();
		return null;
	}
	
	compare_operation_t op;
	op.matchRule = testID & 3;
	op.tally = testID & (1u<<3);
	op.srcA.bits = (void *) sourceBits;
	op.srcA.pitch = sourcePitch;
	op.srcA.depth = sourceDepth;
	op.srcA.msb = sourceMSB;
	op.srcA.x = sx;
	op.srcA.y = sy;
	op.srcB.bits = (void *) destBits;
	op.srcB.pitch = destPitch;
	op.srcB.depth = destDepth;
	op.srcB.msb = destMSB;
	op.srcB.x = dx;
	op.srcB.y = dy;
	op.width = bbW;
	op.height = bbH;
	op.colorA = colorA;
	op.colorB = colorB;

	val = compareColorsDispatch(&op);
	if (!(failed())) {
		_return_value = positive32BitIntegerFor(val);
		if (!(failed())) {
			popthenPush(4, _return_value);
		}
	}
	return null;
#  else /* ENABLE_FAST_BLT */
	primitiveFail();
#  endif /* ENABLE_FAST_BLT */
	if (!(failed())) {
		pop(3);
	}
	return null;
}


/*	Invoke the copyBits primitive. If the destination is the display, then
	copy it to the screen.
 */

	/* BitBltSimulation>>#primitiveCopyBits */
EXPORT(sqInt)
primitiveCopyBits(void)
{
	sqInt rcvr;

	rcvr = stackValue(methodArgumentCount());
	if (!(loadBitBltFromwarping(rcvr, 0))) {
		return primitiveFail();
	}
	copyBits();
	if (failed()) {
		return null;
	}
	/* begin showDisplayBits */
	if (numGCsOnInvocation != (statNumGCs())) {
		reloadDestAndSourceForms();
	}
	showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);
	if (failed()) {
		return null;
	}
	if ((combinationRule == 22)
	 || (combinationRule == 32)) {
		methodReturnInteger(bitCount);
	}
	else {
		methodReturnReceiver();
	}
	return 0;
}

	/* BitBltSimulation>>#primitiveDisplayString */
EXPORT(sqInt)
primitiveDisplayString(void)
{
	sqInt ascii;
	sqInt bbObj;
	sqInt charIndex;
	sqInt endBits;
	sqInt glyphIndex;
	sqInt glyphMap;
	sqInt kernDelta;
	sqInt left;
	sqInt maxGlyph;
	sqInt pixPerM1;
	sqInt quickBlt;
	char *sourcePtr;
	sqInt sourceString;
	sqInt startBits;
	sqInt startIndex;
	sqInt stopIndex;
	sqInt xTable;

	if (!((methodArgumentCount()) == 6)) {
		return primitiveFail();
	}
	kernDelta = stackIntegerValue(0);
	xTable = stackValue(1);
	glyphMap = stackValue(2);
	stopIndex = stackIntegerValue(3);
	startIndex = stackIntegerValue(4);
	sourceString = stackValue(5);
	bbObj = stackObjectValue(6);
	if (failed()) {
		return null;
	}
	if (!((isArray(xTable))
		 && ((isArray(glyphMap))
		 && (((slotSizeOf(glyphMap)) == 256)
		 && ((isBytes(sourceString))
		 && ((startIndex > 0)
		 && ((stopIndex >= 0)
		 && ((stopIndex <= (byteSizeOf(sourceString)))
		 && ((loadBitBltFromwarping(bbObj, 0))
		 && ((combinationRule != 30)
		 && (combinationRule != 0x1F))))))))))) {
		return primitiveFail();
	}
	if (stopIndex == 0) {
		return pop(6);
	}

	/* See if we can go directly into copyLoopPixMap (usually we can) */
	maxGlyph = (slotSizeOf(xTable)) - 2;

	/* no point using slower version */
	quickBlt = (destBits != 0)
	 && ((sourceBits != 0)
	 && ((noSource == 0)
	 && ((sourceForm != destForm)
	 && ((cmFlags != 0)
	 || ((sourceMSB != destMSB)
	 || (sourceDepth != destDepth))))));
	if (quickBlt) {
		endOfSource = sourceBits + (sourcePitch * sourceHeight);
		endOfDestination = destBits + (destPitch * destHeight);
	}
	else {
		if (!(lockSurfaces())) {
			return primitiveFail();
		}
	}
	left = destX;
	sourcePtr = firstIndexableField(sourceString);
	for (charIndex = startIndex; charIndex <= stopIndex; charIndex += 1) {
		ascii = byteAtPointer((sourcePtr + charIndex) - 1);
		glyphIndex = fetchIntegerofObject(ascii, glyphMap);
		if ((glyphIndex < 0)
		 || (glyphIndex > maxGlyph)) {
			return primitiveFail();
		}
		sourceX = fetchIntegerofObject(glyphIndex, xTable);
		width = (fetchIntegerofObject(glyphIndex + 1, xTable)) - sourceX;
		if (failed()) {
			return null;
		}
		clipRange();
		if ((bbW > 0)
		 && (bbH > 0)) {
			if (quickBlt) {
				/* begin destMaskAndPointerInit */

				/* A mask, assuming power of two */
				/* how many pixels in first word */
				pixPerM1 = destPPW - 1;

				/* how many pixels in last word */
				startBits = destPPW - (dx & pixPerM1);
				endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
				if (destMSB) {
					mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
					mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
				}
				else {
					mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
					mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
				}
				if (bbW < startBits) {
					mask1 = mask1 & mask2;
					mask2 = 0;
					nWords = 1;
				}
				else {
					nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
				}

				/* defaults for no overlap with source */
				/* calculate byte addr and delta, based on first word of data */
				/* Note pitch is bytes and nWords is longs, not bytes */
				hDir = (vDir = 1);
				destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

				/* byte addr delta */
				destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
				copyLoopPixMap();
				affectedL = dx;
				affectedR = dx + bbW;
				affectedT = dy;
				affectedB = dy + bbH;
			}
			else {
				copyBitsLockedAndClipped();
			}
		}
		if (failed()) {
			return null;
		}
		destX = (destX + width) + kernDelta;
	}
	affectedL = left;
	if (!quickBlt) {
		unlockSurfaces();
	}
	/* begin showDisplayBits */
	if (numGCsOnInvocation != (statNumGCs())) {
		reloadDestAndSourceForms();
	}
	showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);
	storeIntegerofObjectwithValue(BBDestXIndex, bbObj, destX);
	pop(6);
	return 0;
}


/*	Invoke the line drawing primitive. */

	/* BitBltSimulation>>#primitiveDrawLoop */
EXPORT(sqInt)
primitiveDrawLoop(void)
{
	sqInt affB;
	sqInt affL;
	sqInt affR;
	sqInt affT;
	sqInt dx1;
	sqInt dy1;
	sqInt i;
	sqInt P;
	sqInt px;
	sqInt py;
	sqInt rcvr;
	sqInt xDelta;
	sqInt yDelta;

	rcvr = stackValue(2);
	xDelta = stackIntegerValue(1);
	yDelta = stackIntegerValue(0);
	if (!(loadBitBltFromwarping(rcvr, 0))) {
		return primitiveFail();
	}
	if (!(failed())) {
		/* begin drawLoopX:Y: */
		if (xDelta > 0) {
			dx1 = 1;
		}
		else {
			if (xDelta == 0) {
				dx1 = 0;
			}
			else {
				dx1 = -1;
			}
		}
		if (yDelta > 0) {
			dy1 = 1;
		}
		else {
			if (yDelta == 0) {
				dy1 = 0;
			}
			else {
				dy1 = -1;
			}
		}
		px = SQABS(yDelta);
		py = SQABS(xDelta);

		/* init null rectangle */
		affL = (affT = 9999);
		affR = (affB = -9999);
		if (py > px) {

			/* more horizontal */
			P = py / 2;
			for (i = 1; i <= py; i += 1) {
				destX += dx1;
				if (((P -= px)) < 0) {
					destY += dy1;
					P += py;
				}
				if (i < py) {
					copyBits();
					if (failed()) {
						goto l1;
					}
					if ((affectedL < affectedR)
					 && (affectedT < affectedB)) {

						/* Affected rectangle grows along the line */
						affL = ((affL < affectedL) ? affL : affectedL);
						affR = ((affR < affectedR) ? affectedR : affR);
						affT = ((affT < affectedT) ? affT : affectedT);
						affB = ((affB < affectedB) ? affectedB : affB);
						if (((affR - affL) * (affB - affT)) > 4000) {

							/* If affected rectangle gets large, update it in chunks */
							affectedL = affL;
							affectedR = affR;
							affectedT = affT;
							affectedB = affB;
							/* begin showDisplayBits */
							if (numGCsOnInvocation != (statNumGCs())) {
								reloadDestAndSourceForms();
							}
							showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);

							/* init null rectangle */
							affL = (affT = 9999);
							affR = (affB = -9999);
						}
					}
				}
			}
		}
		else {

			/* more vertical */
			P = px / 2;
			for (i = 1; i <= px; i += 1) {
				destY += dy1;
				if (((P -= py)) < 0) {
					destX += dx1;
					P += px;
				}
				if (i < px) {
					copyBits();
					if (failed()) {
						goto l1;
					}
					if ((affectedL < affectedR)
					 && (affectedT < affectedB)) {

						/* Affected rectangle grows along the line */
						affL = ((affL < affectedL) ? affL : affectedL);
						affR = ((affR < affectedR) ? affectedR : affR);
						affT = ((affT < affectedT) ? affT : affectedT);
						affB = ((affB < affectedB) ? affectedB : affB);
						if (((affR - affL) * (affB - affT)) > 4000) {

							/* If affected rectangle gets large, update it in chunks */
							affectedL = affL;
							affectedR = affR;
							affectedT = affT;
							affectedB = affB;
							/* begin showDisplayBits */
							if (numGCsOnInvocation != (statNumGCs())) {
								reloadDestAndSourceForms();
							}
							showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);

							/* init null rectangle */
							affL = (affT = 9999);
							affR = (affB = -9999);
						}
					}
				}
			}
		}
		affectedL = affL;
		affectedR = affR;
		affectedT = affT;

		/* store destX, Y back */
		affectedB = affB;
		storeIntegerofObjectwithValue(BBDestXIndex, bitBltOop, destX);
		storeIntegerofObjectwithValue(BBDestYIndex, bitBltOop, destY);
	l1:	/* end drawLoopX:Y: */;
		/* begin showDisplayBits */
		if (numGCsOnInvocation != (statNumGCs())) {
			reloadDestAndSourceForms();
		}
		showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);
	}
	if (!(failed())) {
		pop(2);
	}
	return 0;
}


/*	returns the single pixel at x@y.
	It does not handle LSB bitmaps right now.
	If x or y are < 0, return 0 to indicate transparent (cf
	BitBlt>bitPeekerFromForm: usage).
	Likewise if x>width or y>depth.
	Fail if the rcvr doesn't seem to be a Form, or x|y seem wrong
 */

	/* BitBltSimulation>>#primitivePixelValueAtX:y: */
EXPORT(sqInt)
primitivePixelValueAt(void)
{
	sqInt bitmap;
	sqInt bitsSize;
	sqInt depth;
	unsigned int mask;
	sqInt pixel;
	sqInt ppW;
	sqInt rcvr;
	sqInt shift;
	sqInt stride;
	sqInt word;
	sqInt xVal;
	sqInt yVal;
	sqInt _return_value;

	if (!((isIntegerObject(stackValue(1)))
		 && (isIntegerObject(stackValue(0))))) {
		primitiveFailFor(PrimErrBadArgument);
		return null;
	}
	xVal = stackIntegerValue(1);
	yVal = stackIntegerValue(0);
	rcvr = stackValue(2);
	if (failed()) {
		return null;
	}
	if ((xVal < 0)
	 || (yVal < 0)) {
		_return_value = integerObjectOf(0);
		if (!(failed())) {
			popthenPush(3, _return_value);
		}
		return null;
	}
	rcvr = stackValue(methodArgumentCount());
	if (!((isPointers(rcvr))
		 && ((slotSizeOf(rcvr)) >= 4))) {
		primitiveFail();
		return null;
	}
	bitmap = fetchPointerofObject(FormBitsIndex, rcvr);
	if (!(isWordsOrBytes(bitmap))) {
		primitiveFail();
		return null;
	}
	width = fetchIntegerofObject(FormWidthIndex, rcvr);
	height = fetchIntegerofObject(FormHeightIndex, rcvr);

	/* if width/height/depth are not integer, fail */
	depth = fetchIntegerofObject(FormDepthIndex, rcvr);
	if (failed()) {
		return null;
	}
	if ((xVal >= width)
	 || (yVal >= height)) {
		_return_value = integerObjectOf(0);
		if (!(failed())) {
			popthenPush(3, _return_value);
		}
		return null;
	}
	if (depth < 0) {
		primitiveFail();
		return null;
	}

	/* pixels in each word */
	ppW = 32 / depth;

	/* how many words per row of pixels */
	stride = (width + (ppW - 1)) / ppW;
	bitsSize = byteSizeOf(bitmap);
	if (!(bitsSize >= ((stride * height) * 4))) {

		/* bytes per word */
		primitiveFail();
		return null;
	}

	/* load the word that contains our target */
	word = fetchLong32ofObject((yVal * stride) + (xVal / ppW), bitmap);

	/* make a mask to isolate the pixel within that word */
	mask = ((usqInt) 0xFFFFFFFFU) >> (32 - depth);

	/* this is the tricky MSB part - we mask the xVal to find how far into the word we need, then add 1 for the pixel we're looking for, then * depth to get the bit shift */
	shift = 32 - (((xVal & (ppW - 1)) + 1) * depth);

	/* shift, mask and dim the lights */
	pixel = (((usqInt) word) >> shift) & mask;
	if (!(failed())) {
		_return_value = positive32BitIntegerFor(pixel);
		if (!(failed())) {
			popthenPush(3, _return_value);
		}
	}
	return null;
}


/*	Invoke the warpBits primitive. If the destination is the display, then
	copy it to the screen.
 */

	/* BitBltSimulation>>#primitiveWarpBits */
EXPORT(sqInt)
primitiveWarpBits(void)
{
	sqInt endBits;
	sqInt ns;
	sqInt pixPerM1;
	sqInt rcvr;
	sqInt startBits;

	rcvr = stackValue(methodArgumentCount());
	if (!(loadBitBltFromwarping(rcvr, 1))) {
		return primitiveFail();
	}
	/* begin warpBits */
	ns = noSource;
	noSource = 1;
	clipRange();
	noSource = ns;
	if (noSource
	 || ((bbW <= 0)
	 || (bbH <= 0))) {

		/* zero width or height; noop */
		affectedL = (affectedR = (affectedT = (affectedB = 0)));
		goto l1;
	}
	if (!(lockSurfaces())) {
		primitiveFail();
		goto l1;
	}
	/* begin destMaskAndPointerInit */

	/* A mask, assuming power of two */
	/* how many pixels in first word */
	pixPerM1 = destPPW - 1;

	/* how many pixels in last word */
	startBits = destPPW - (dx & pixPerM1);
	endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
	if (destMSB) {
		mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
		mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
	}
	else {
		mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
		mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
	}
	if (bbW < startBits) {
		mask1 = mask1 & mask2;
		mask2 = 0;
		nWords = 1;
	}
	else {
		nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
	}

	/* defaults for no overlap with source */
	/* calculate byte addr and delta, based on first word of data */
	/* Note pitch is bytes and nWords is longs, not bytes */
	hDir = (vDir = 1);
	destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

	/* byte addr delta */
	destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	warpLoop();
	if (hDir > 0) {
		affectedL = dx;
		affectedR = dx + bbW;
	}
	else {
		affectedL = (dx - bbW) + 1;
		affectedR = dx + 1;
	}
	if (vDir > 0) {
		affectedT = dy;
		affectedB = dy + bbH;
	}
	else {
		affectedT = (dy - bbH) + 1;
		affectedB = dy + 1;
	}
	unlockSurfaces();
	l1:	/* end warpBits */;
	if (failed()) {
		return null;
	}
	/* begin showDisplayBits */
	if (numGCsOnInvocation != (statNumGCs())) {
		reloadDestAndSourceForms();
	}
	showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);
	if (failed()) {
		return null;
	}
	methodReturnReceiver();
	return 0;
}


/*	A GC has occurred. The destForm must be updated. But where to derive it
	from? For copyBits and warpBits it is derived from the receiver. But for a
	BalloonEnginePlugin it should be obtained from (interpreterProxy
	fetchPointer: BEBitBltIndex ofObject: engine).
	For the moment implement something that works for these two cases. */

	/* BitBltSimulation>>#reloadDestAndSourceForms */
static sqInt
reloadDestAndSourceForms(void)
{
	sqInt receiver;

	receiver = stackValue(methodArgumentCount());
	if (!bitBltIsReceiver) {
		receiver = fetchPointerofObject(BEBitBltIndex, receiver);
	}
	destForm = fetchPointerofObject(BBDestFormIndex, receiver);
	sourceForm = fetchPointerofObject(BBSourceFormIndex, receiver);
	return 0;
}

	/* BitBltSimulation>>#rgbAdd:with: */
static unsigned int
rgbAddwith(unsigned int sourceWord, unsigned int destinationWord)
{
	unsigned int carryOverflowMask;
	unsigned int componentMask;

	if (destDepth < 16) {

		/* Add each pixel separately */
		componentMask = (1U << destDepth) - 1;
		carryOverflowMask = ((usqInt)((0xFFFFFFFFU / componentMask)) << (destDepth - 1));
		return partitionedAddtonBitscomponentMaskcarryOverflowMask(sourceWord, destinationWord, destDepth, componentMask, carryOverflowMask);
	}
	if (destDepth == 16) {

		/* Add RGB components of each pixel separately */
		componentMask = 0x1F;
		carryOverflowMask = 1108361744;
		return partitionedAddtonBitscomponentMaskcarryOverflowMask(sourceWord & 2147450879, destinationWord & 2147450879, 5, componentMask, carryOverflowMask);
	}
	else {

		/* Add RGBA components of the pixel separately */
		componentMask = 0xFF;
		carryOverflowMask = 2155905152U;
		return partitionedAddtonBitscomponentMaskcarryOverflowMask(sourceWord, destinationWord, 8, componentMask, carryOverflowMask);
	}
}


/*	This version assumes 
	combinationRule = 41
	sourcePixSize = 32
	destPixSize = 16
	sourceForm ~= destForm.
	 */
/*	This particular method should be optimized in itself */

	/* BitBltSimulation>>#rgbComponentAlpha16 */
static sqInt
rgbComponentAlpha16(void)
{
	sqInt addThreshold;
	int deltaX;
	int deltaY;
	unsigned int destWord;
	int ditherBase;
	int ditherIndex;
	int ditherThreshold;
	sqInt dstIndex;
	unsigned int dstMask;
	sqInt dstValue;
	int dstY;
	unsigned int sourceWord;
	unsigned int srcAlpha;
	sqInt srcIndex;
	int srcShift;
	int srcY;


	/* So we can pre-decrement */
	deltaY = bbH + 1;
	srcY = sy;
	dstY = dy;
	srcShift = (dx & 1) * 16;
	if (destMSB) {
		srcShift = 16 - srcShift;
	}

	/* This is the outer loop */
	mask1 = 0xFFFFU << (16 - srcShift);
	while (((deltaY -= 1)) != 0) {
		srcIndex = (sourceBits + (srcY * sourcePitch)) + (sx * 4);
		dstIndex = (destBits + (dstY * destPitch)) + ((dx / 2) * 4);
		ditherBase = (dstY & 3) * 4;

		/* For pre-increment */
		ditherIndex = (sx & 3) - 1;

		/* So we can pre-decrement */
		deltaX = bbW + 1;
		dstMask = mask1;
		if (dstMask == 0xFFFF) {
			srcShift = 16;
		}
		else {
			srcShift = 0;
		}
		while (((deltaX -= 1)) != 0) {
			ditherThreshold = ditherMatrix4x4[ditherBase + ((ditherIndex = (ditherIndex + 1) & 3))];
			assert((((usqInt)srcIndex)) < endOfSource);
			sourceWord = long32At(srcIndex);
			srcAlpha = sourceWord & 0xFFFFFF;
			if (!(srcAlpha == 0)) {

				/* 0 < srcAlpha */
				/* If we have to mix colors then just copy a single word */
				/* begin dstLongAt: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				destWord = ((sqInt) (long32At(dstIndex)));
				destWord = destWord & ((unsigned int)~dstMask);

				/* Expand from 16 to 32 bit by adding zero bits */
				destWord = ((usqInt) destWord) >> srcShift;

				/* Mix colors */
				destWord = ((((usqInt) (destWord & 0x7C00) << 9)) | (((usqInt) (destWord & 0x3E0) << 6))) | ((((usqInt) (destWord & 0x1F) << 3)) | 0xFF000000U);

				/* And dither */
				sourceWord = rgbComponentAlpha32with(sourceWord, destWord);
				/* begin dither32To16:threshold: */
				addThreshold = ((usqInt) ditherThreshold << 8);
				sourceWord = ((((usqInt) (dither8Lookup[addThreshold + ((((usqInt) sourceWord >> 16)) & 0xFF)]) << 10)) + (((usqInt) (dither8Lookup[addThreshold + ((((usqInt) sourceWord >> 8)) & 0xFF)]) << 5))) + (dither8Lookup[addThreshold + (sourceWord & 0xFF)]);
				if (sourceWord == 0) {
					sourceWord = 1U << srcShift;
				}
				else {
					sourceWord = ((usqInt)(sourceWord) << srcShift);
				}
				/* begin dstLongAt:put:mask: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				dstValue = long32At(dstIndex);
				dstValue = dstValue & dstMask;
				dstValue = dstValue | sourceWord;
				/* begin dstLongAt:put: */
				long32Atput(dstIndex, dstValue);
			}
			srcIndex += 4;
			if (destMSB) {
				if (srcShift == 0) {
					dstIndex += 4;
				}
			}
			else {
				if (!(srcShift == 0)) {
					dstIndex += 4;
				}
			}

			/* Toggle between 0 and 16 */
			srcShift = srcShift ^ 16;
			dstMask = (unsigned int)~dstMask;
		}
		srcY += 1;
		dstY += 1;
	}
	return 0;
}


/*	This version assumes 
	combinationRule = 41
	sourcePixSize = destPixSize = 32
	sourceForm ~= destForm.
	Note: The inner loop has been optimized for dealing
	with the special case of aR = aG = aB = 0 
	 */

	/* BitBltSimulation>>#rgbComponentAlpha32 */
static sqInt
rgbComponentAlpha32(void)
{
	register sqInt deltaX;
	int deltaY;
	unsigned int destWord;
	register sqIntptr_t dstIndex;
	int dstY;
	unsigned int sourceWord;
	unsigned int srcAlpha;
	register sqIntptr_t srcIndex;
	int srcY;


	/* This particular method should be optimized in itself */
	/* Give the compile a couple of hints */
	/* The following should be declared as pointers so the compiler will
	   notice that they're used for accessing memory locations
	   (good to know on an Intel architecture) but then the increments
	   would be different between ST code and C code so must hope the
	   compiler notices what happens (MS Visual C does) */

	/* So we can pre-decrement */
	deltaY = bbH + 1;
	srcY = sy;

	/* This is the outer loop */
	dstY = dy;
	while (((deltaY -= 1)) != 0) {
		srcIndex = (sourceBits + (srcY * sourcePitch)) + (sx * 4);
		dstIndex = (destBits + (dstY * destPitch)) + (dx * 4);

		/* So we can pre-decrement */
		/* This is the inner loop */
		deltaX = bbW + 1;
		while (((deltaX -= 1)) != 0) {
			assert((((usqInt)srcIndex)) < endOfSource);
			sourceWord = long32At(srcIndex);
			srcAlpha = sourceWord & 0xFFFFFF;
			if (srcAlpha == 0) {
				srcIndex += 4;

				/* Now skip as many words as possible, */
				dstIndex += 4;
				while ((((deltaX -= 1)) != 0)
				 && ((((assert((((usqInt)srcIndex)) < endOfSource),
				(sourceWord = long32At(srcIndex)))) & 0xFFFFFF) == 0)) {
					srcIndex += 4;
					dstIndex += 4;
				}
				deltaX += 1;
			}
			else {

				/* 0 < srcAlpha */
				/* If we have to mix colors then just copy a single word */
				/* begin dstLongAt: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				destWord = ((sqInt) (long32At(dstIndex)));
				destWord = rgbComponentAlpha32with(sourceWord, destWord);
				/* begin dstLongAt:put: */
				long32Atput(dstIndex, destWord);
				srcIndex += 4;
				dstIndex += 4;
			}
		}
		srcY += 1;
		dstY += 1;
	}
	return 0;
}


/*	
	componentAlphaModeColor is the color,
	sourceWord contains an alpha value for each component of RGB
	each of which is encoded as0 meaning 0.0 and 255 meaning 1.0 .
	the rule is...
	
	color = componentAlphaModeColor.
	colorAlpha = componentAlphaModeAlpha.
	mask = sourceWord.
	dst.A = colorAlpha + (1 - colorAlpha) * dst.A
	dst.R = color.R * mask.R * colorAlpha + (1 - (mask.R * colorAlpha)) *
	dst.R dst.G = color.G * mask.G * colorAlpha + (1 - (mask.G* colorAlpha)) *
	dst.G dst.B = color.B * mask.B * colorAlpha + (1 - (mask.B* colorAlpha)) *
	dst.B  */
/*	Do NOT inline this into optimized loops */

	/* BitBltSimulation>>#rgbComponentAlpha32:with: */
static sqInt
rgbComponentAlpha32with(sqInt sourceWord, sqInt destinationWord)
{
	sqInt a;
	sqInt aA;
	sqInt aB;
	sqInt aG;
	sqInt alpha;
	sqInt answer;
	sqInt aR;
	sqInt b;
	sqInt d;
	sqInt dstMask;
	sqInt g;
	sqInt r;
	sqInt s;
	sqInt srcAlpha;
	sqInt srcColor;

	alpha = sourceWord;
	if (alpha == 0) {
		return destinationWord;
	}
	srcColor = componentAlphaModeColor;
	srcAlpha = componentAlphaModeAlpha & 0xFF;
	aB = alpha & 0xFF;
	alpha = ((usqInt) alpha) >> 8;
	aG = alpha & 0xFF;
	alpha = ((usqInt) alpha) >> 8;
	aR = alpha & 0xFF;
	alpha = ((usqInt) alpha) >> 8;
	aA = alpha & 0xFF;
	if (!(srcAlpha == 0xFF)) {
		aA = ((usqInt) (aA * srcAlpha)) >> 8;
		aR = ((usqInt) (aR * srcAlpha)) >> 8;
		aG = ((usqInt) (aG * srcAlpha)) >> 8;
		aB = ((usqInt) (aB * srcAlpha)) >> 8;
	}
	dstMask = destinationWord;
	d = dstMask & 0xFF;
	s = srcColor & 0xFF;
	if (!(ungammaLookupTable == null)) {
		d = ungammaLookupTable[d];
		s = ungammaLookupTable[s];
	}
	b = (((usqInt) (d * (0xFF - aB))) >> 8) + (((usqInt) (s * aB)) >> 8);
	if (b > 0xFF) {
		b = 0xFF;
	}
	if (!(gammaLookupTable == null)) {
		b = gammaLookupTable[b];
	}
	dstMask = ((usqInt) dstMask) >> 8;
	srcColor = ((usqInt) srcColor) >> 8;
	d = dstMask & 0xFF;
	s = srcColor & 0xFF;
	if (!(ungammaLookupTable == null)) {
		d = ungammaLookupTable[d];
		s = ungammaLookupTable[s];
	}
	g = (((usqInt) (d * (0xFF - aG))) >> 8) + (((usqInt) (s * aG)) >> 8);
	if (g > 0xFF) {
		g = 0xFF;
	}
	if (!(gammaLookupTable == null)) {
		g = gammaLookupTable[g];
	}
	dstMask = ((usqInt) dstMask) >> 8;
	srcColor = ((usqInt) srcColor) >> 8;
	d = dstMask & 0xFF;
	s = srcColor & 0xFF;
	if (!(ungammaLookupTable == null)) {
		d = ungammaLookupTable[d];
		s = ungammaLookupTable[s];
	}
	r = (((usqInt) (d * (0xFF - aR))) >> 8) + (((usqInt) (s * aR)) >> 8);
	if (r > 0xFF) {
		r = 0xFF;
	}
	if (!(gammaLookupTable == null)) {
		r = gammaLookupTable[r];
	}
	dstMask = ((usqInt) dstMask) >> 8;
	srcColor = ((usqInt) srcColor) >> 8;

	/* no need to gamma correct alpha value ? */
	a = (((usqInt) ((dstMask & 0xFF) * (0xFF - aA))) >> 8) + aA;
	if (a > 0xFF) {
		a = 0xFF;
	}
	answer = (((sqInt)((usqInt)(((((sqInt)((usqInt)(((((sqInt)((usqInt)(a) << 8))) + r)) << 8))) + g)) << 8))) + b;
	return answer;
}


/*	This version assumes 
	combinationRule = 41
	sourcePixSize = 32
	destPixSize = 8
	sourceForm ~= destForm.
	Note: This is not real blending since we don't have the source colors
	available.  */

	/* BitBltSimulation>>#rgbComponentAlpha8 */
static sqInt
rgbComponentAlpha8(void)
{
	sqInt adjust;
	int deltaX;
	int deltaY;
	unsigned int destWord;
	sqInt dstIndex;
	unsigned int dstMask;
	sqInt dstValue;
	int dstY;
	sqInt mapperFlags;
	unsigned int *mappingTable;
	sqInt pv;
	unsigned int sourceWord;
	unsigned int srcAlpha;
	sqInt srcIndex;
	sqInt srcShift;
	int srcY;
	static unsigned int theTable[256] = { 
0x0, 0xFF000001, 0xFFFFFFFF, 0xFF808080, 0xFFFF0000, 0xFF00FF00, 0xFF0000FF, 0xFF00FFFF, 
0xFFFFFF00, 0xFFFF00FF, 0xFF202020, 0xFF404040, 0xFF606060, 0xFF9F9F9F, 0xFFBFBFBF, 0xFFDFDFDF, 
0xFF080808, 0xFF101010, 0xFF181818, 0xFF282828, 0xFF303030, 0xFF383838, 0xFF484848, 0xFF505050, 
0xFF585858, 0xFF686868, 0xFF707070, 0xFF787878, 0xFF878787, 0xFF8F8F8F, 0xFF979797, 0xFFA7A7A7, 
0xFFAFAFAF, 0xFFB7B7B7, 0xFFC7C7C7, 0xFFCFCFCF, 0xFFD7D7D7, 0xFFE7E7E7, 0xFFEFEFEF, 0xFFF7F7F7, 
0xFF000001, 0xFF003300, 0xFF006600, 0xFF009900, 0xFF00CC00, 0xFF00FF00, 0xFF000033, 0xFF003333, 
0xFF006633, 0xFF009933, 0xFF00CC33, 0xFF00FF33, 0xFF000066, 0xFF003366, 0xFF006666, 0xFF009966, 
0xFF00CC66, 0xFF00FF66, 0xFF000099, 0xFF003399, 0xFF006699, 0xFF009999, 0xFF00CC99, 0xFF00FF99, 
0xFF0000CC, 0xFF0033CC, 0xFF0066CC, 0xFF0099CC, 0xFF00CCCC, 0xFF00FFCC, 0xFF0000FF, 0xFF0033FF, 
0xFF0066FF, 0xFF0099FF, 0xFF00CCFF, 0xFF00FFFF, 0xFF330000, 0xFF333300, 0xFF336600, 0xFF339900, 
0xFF33CC00, 0xFF33FF00, 0xFF330033, 0xFF333333, 0xFF336633, 0xFF339933, 0xFF33CC33, 0xFF33FF33, 
0xFF330066, 0xFF333366, 0xFF336666, 0xFF339966, 0xFF33CC66, 0xFF33FF66, 0xFF330099, 0xFF333399, 
0xFF336699, 0xFF339999, 0xFF33CC99, 0xFF33FF99, 0xFF3300CC, 0xFF3333CC, 0xFF3366CC, 0xFF3399CC, 
0xFF33CCCC, 0xFF33FFCC, 0xFF3300FF, 0xFF3333FF, 0xFF3366FF, 0xFF3399FF, 0xFF33CCFF, 0xFF33FFFF, 
0xFF660000, 0xFF663300, 0xFF666600, 0xFF669900, 0xFF66CC00, 0xFF66FF00, 0xFF660033, 0xFF663333, 
0xFF666633, 0xFF669933, 0xFF66CC33, 0xFF66FF33, 0xFF660066, 0xFF663366, 0xFF666666, 0xFF669966, 
0xFF66CC66, 0xFF66FF66, 0xFF660099, 0xFF663399, 0xFF666699, 0xFF669999, 0xFF66CC99, 0xFF66FF99, 
0xFF6600CC, 0xFF6633CC, 0xFF6666CC, 0xFF6699CC, 0xFF66CCCC, 0xFF66FFCC, 0xFF6600FF, 0xFF6633FF, 
0xFF6666FF, 0xFF6699FF, 0xFF66CCFF, 0xFF66FFFF, 0xFF990000, 0xFF993300, 0xFF996600, 0xFF999900, 
0xFF99CC00, 0xFF99FF00, 0xFF990033, 0xFF993333, 0xFF996633, 0xFF999933, 0xFF99CC33, 0xFF99FF33, 
0xFF990066, 0xFF993366, 0xFF996666, 0xFF999966, 0xFF99CC66, 0xFF99FF66, 0xFF990099, 0xFF993399, 
0xFF996699, 0xFF999999, 0xFF99CC99, 0xFF99FF99, 0xFF9900CC, 0xFF9933CC, 0xFF9966CC, 0xFF9999CC, 
0xFF99CCCC, 0xFF99FFCC, 0xFF9900FF, 0xFF9933FF, 0xFF9966FF, 0xFF9999FF, 0xFF99CCFF, 0xFF99FFFF, 
0xFFCC0000, 0xFFCC3300, 0xFFCC6600, 0xFFCC9900, 0xFFCCCC00, 0xFFCCFF00, 0xFFCC0033, 0xFFCC3333, 
0xFFCC6633, 0xFFCC9933, 0xFFCCCC33, 0xFFCCFF33, 0xFFCC0066, 0xFFCC3366, 0xFFCC6666, 0xFFCC9966, 
0xFFCCCC66, 0xFFCCFF66, 0xFFCC0099, 0xFFCC3399, 0xFFCC6699, 0xFFCC9999, 0xFFCCCC99, 0xFFCCFF99, 
0xFFCC00CC, 0xFFCC33CC, 0xFFCC66CC, 0xFFCC99CC, 0xFFCCCCCC, 0xFFCCFFCC, 0xFFCC00FF, 0xFFCC33FF, 
0xFFCC66FF, 0xFFCC99FF, 0xFFCCCCFF, 0xFFCCFFFF, 0xFFFF0000, 0xFFFF3300, 0xFFFF6600, 0xFFFF9900, 
0xFFFFCC00, 0xFFFFFF00, 0xFFFF0033, 0xFFFF3333, 0xFFFF6633, 0xFFFF9933, 0xFFFFCC33, 0xFFFFFF33, 
0xFFFF0066, 0xFFFF3366, 0xFFFF6666, 0xFFFF9966, 0xFFFFCC66, 0xFFFFFF66, 0xFFFF0099, 0xFFFF3399, 
0xFFFF6699, 0xFFFF9999, 0xFFFFCC99, 0xFFFFFF99, 0xFFFF00CC, 0xFFFF33CC, 0xFFFF66CC, 0xFFFF99CC, 
0xFFFFCCCC, 0xFFFFFFCC, 0xFFFF00FF, 0xFFFF33FF, 0xFFFF66FF, 0xFFFF99FF, 0xFFFFCCFF, 0xFFFFFFFF};;
	sqInt val;


	/* This particular method should be optimized in itself */
	/* begin default8To32Table */
	mappingTable = theTable;
	mapperFlags = cmFlags & ((unsigned int)~ColorMapNewStyle);

	/* So we can pre-decrement */
	deltaY = bbH + 1;
	srcY = sy;
	dstY = dy;
	mask1 = (dx & 3) * 8;
	if (destMSB) {
		mask1 = 24 - mask1;
	}
	mask2 = AllOnes ^ (0xFFU << mask1);
	if ((dx & 1) == 0) {
		adjust = 0;
	}
	else {
		adjust = 522133279;
	}
	if ((dy & 1) == 0) {
		adjust = adjust ^ 522133279;
	}
	while (((deltaY -= 1)) != 0) {
		adjust = adjust ^ 522133279;
		srcIndex = (sourceBits + (srcY * sourcePitch)) + (sx * 4);
		dstIndex = (destBits + (dstY * destPitch)) + ((dx / 4) * 4);

		/* So we can pre-decrement */
		deltaX = bbW + 1;
		srcShift = mask1;

		/* This is the inner loop */
		dstMask = mask2;
		while (((deltaX -= 1)) != 0) {
			sourceWord = (((assert((((usqInt)srcIndex)) < endOfSource),
long32At(srcIndex))) & ((unsigned int)~adjust)) + adjust;

			/* set srcAlpha to the average of the 3 separate aR,Ag,AB values */
			srcAlpha = sourceWord & 0xFFFFFF;
			srcAlpha = (((((usqInt) srcAlpha) >> 16) + ((((usqInt) srcAlpha) >> 8) & 0xFF)) + (srcAlpha & 0xFF)) / 3;
			if (srcAlpha > 0x1F) {

				/* Everything below 31 is transparent */
				if (srcAlpha > 224) {

					/* treat everything above 224 as opaque */
					sourceWord = 0xFFFFFFFFU;
				}
				/* begin dstLongAt: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				destWord = ((sqInt) (long32At(dstIndex)));
				destWord = destWord & ((unsigned int)~dstMask);
				destWord = ((usqInt) destWord) >> srcShift;
				destWord = mappingTable[destWord];
				sourceWord = rgbComponentAlpha32with(sourceWord, destWord);
				/* begin mapPixel:flags: */
				pv = sourceWord;
				if ((mapperFlags & ColorMapPresent) != 0) {
					if ((mapperFlags & ColorMapFixedPart) != 0) {
						/* begin rgbMapPixel:flags: */
						val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourceWord & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
						val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourceWord & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
						val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourceWord & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
						pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourceWord & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourceWord & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
						if ((pv == 0)
						 && (sourceWord != 0)) {
							pv = 1;
						}
					}
					if ((mapperFlags & ColorMapIndexedPart) != 0) {
						pv = cmLookupTable[pv & cmMask];
					}
				}
				sourceWord = pv;

				/* Store back */
				sourceWord = ((usqInt)(sourceWord) << srcShift);
				/* begin dstLongAt:put:mask: */
				assert((((usqInt)dstIndex)) < endOfDestination);
				dstValue = long32At(dstIndex);
				dstValue = dstValue & dstMask;
				dstValue = dstValue | sourceWord;
				/* begin dstLongAt:put: */
				long32Atput(dstIndex, dstValue);
			}
			srcIndex += 4;
			if (destMSB) {
				if (srcShift == 0) {
					dstIndex += 4;
					srcShift = 24;
					dstMask = 0xFFFFFF;
				}
				else {
					srcShift -= 8;
					dstMask = (((usqInt) dstMask) >> 8) | 0xFF000000U;
				}
			}
			else {
				if (srcShift == 32) {
					dstIndex += 4;
					srcShift = 0;
					dstMask = 0xFFFFFF00U;
				}
				else {
					srcShift += 8;
					dstMask = (((usqInt)(dstMask) << 8)) | 0xFF;
				}
			}
			adjust = adjust ^ 522133279;
		}
		srcY += 1;
		dstY += 1;
	}
	return 0;
}


/*	
	componentAlphaModeColor is the color,
	sourceWord contains an alpha value for each component of RGB
	each of which is encoded as0 meaning 0.0 and 255 meaning 1.0 .
	the rule is...
	
	color = componentAlphaModeColor.
	colorAlpha = componentAlphaModeAlpha.
	mask = sourceWord.
	dst.A = colorAlpha + (1 - colorAlpha) * dst.A
	dst.R = color.R * mask.R * colorAlpha + (1 - (mask.R * colorAlpha)) *
	dst.R dst.G = color.G * mask.G * colorAlpha + (1 - (mask.G* colorAlpha)) *
	dst.G dst.B = color.B * mask.B * colorAlpha + (1 - (mask.B* colorAlpha)) *
	dst.B  */
/*	Do NOT inline this into optimized loops */

	/* BitBltSimulation>>#rgbComponentAlpha:with: */
static sqInt
rgbComponentAlphawith(sqInt sourceWord, sqInt destinationWord)
{
	sqInt alpha;
	sqInt d;
	sqInt destPix;
	sqInt i;
	unsigned int mask;
	sqInt mask3;
	unsigned int p1;
	unsigned int p2;
	unsigned int result;
	sqInt srcPix;
	sqInt v;

	alpha = sourceWord;
	if (alpha == 0) {
		return destinationWord;
	}
	/* begin partitionedRgbComponentAlpha:dest:nBits:nPartitions: */

	/* partition mask starts at the right */
	mask = maskTable[destDepth];
	result = 0;
	for (i = 1; i <= destPPW; i += 1) {
		p1 = ((usqInt) ((((unsigned int) sourceWord)) & mask)) >> ((i - 1) * destDepth);
		p2 = ((usqInt) ((((unsigned int) destinationWord)) & mask)) >> ((i - 1) * destDepth);
		if (!(destDepth == 32)) {
			if (destDepth == 16) {
				p1 = (((((usqInt)((p1 & 0x1F)) << 3)) | (((usqInt)((p1 & 0x3E0)) << 6))) | (((usqInt)((p1 & 0x7C00)) << 9))) | 0xFF000000U;
				p2 = (((((usqInt)((p2 & 0x1F)) << 3)) | (((usqInt)((p2 & 0x3E0)) << 6))) | (((usqInt)((p2 & 0x7C00)) << 9))) | 0xFF000000U;
			}
			else {
				p1 = (rgbMapfromto(p1, destDepth, 32)) | 0xFF000000U;
				p2 = (rgbMapfromto(p2, destDepth, 32)) | 0xFF000000U;
			}
		}
		v = rgbComponentAlpha32with(p1, p2);
		if (!(destDepth == 32)) {
			/* begin rgbMap:from:to: */
			if (((d = destDepth - 32)) > 0) {

				/* Expand to more bits by zero-fill */

				/* Transfer mask */
				mask3 = (1ULL << 32) - 1;
				srcPix = ((sqInt)((usqInt)(v) << d));
				mask3 = ((sqInt)((usqInt)(mask3) << d));
				destPix = srcPix & mask3;
				mask3 = ((sqInt)((usqInt)(mask3) << destDepth));
				srcPix = ((sqInt)((usqInt)(srcPix) << d));
				v = (destPix + (srcPix & mask3)) + ((((sqInt)((usqInt)(srcPix) << d))) & (((sqInt)((usqInt)(mask3) << destDepth))));
				goto l1;
			}
			else {

				/* Compress to fewer bits by truncation */
				if (d == 0) {
					if (32 == 5) {

						/* Sometimes called with 16 bits, though pixel is 15,
						   but we must never return more than 15. */
						v = v & 0x7FFF;
						goto l1;
					}
					if (32 == 8) {

						/* Sometimes called with 32 bits, though pixel is 24,
						   but we must never return more than 24. */
						v = v & 0xFFFFFF;
						goto l1;
					}
					goto l1;
				}
				if (v == 0) {
					goto l1;
				}
				d = 32 - destDepth;

				/* Transfer mask */
				mask3 = (1U << destDepth) - 1;
				srcPix = ((usqInt) v) >> d;
				destPix = srcPix & mask3;
				mask3 = ((sqInt)((usqInt)(mask3) << destDepth));
				srcPix = ((usqInt) srcPix) >> d;
				destPix = (destPix + (srcPix & mask3)) + ((((usqInt) srcPix) >> d) & (((sqInt)((usqInt)(mask3) << destDepth))));
				if (destPix == 0) {
					v = 1;
					goto l1;
				}
				v = destPix;
				goto l1;
			}
	l1:	/* end rgbMap:from:to: */;
		}
		result = result | (((sqInt)((usqInt)(v) << ((i - 1) * destDepth))));

		/* slide left to next partition */
		mask = ((usqInt)(mask) << destDepth);
	}
	return result;
}


/*	Subract the pixels in the source and destination, color by color,
	and return the sum of the absolute value of all the differences.
	For non-rgb, return the number of differing pixels. */

	/* BitBltSimulation>>#rgbDiff:with: */
static unsigned int
rgbDiffwith(unsigned int sourceWord, unsigned int destinationWord)
{
	sqInt bitsPerColor;
	unsigned int destPixVal;
	unsigned int destShifted;
	sqInt diff;
	sqInt i;
	unsigned int maskShifted;
	unsigned int pixMask;
	unsigned int rgbMask;
	unsigned int sourcePixVal;
	unsigned int sourceShifted;

	pixMask = maskTable[destDepth];
	if (destDepth == 16) {
		bitsPerColor = 5;
		rgbMask = 0x1F;
	}
	else {
		bitsPerColor = 8;
		rgbMask = 0xFF;
	}
	maskShifted = destMask;
	destShifted = destinationWord;
	sourceShifted = sourceWord;
	for (i = 1; i <= destPPW; i += 1) {
		if ((maskShifted & pixMask) > 0) {

			/* Only tally pixels within the destination rectangle */
			destPixVal = destShifted & pixMask;
			sourcePixVal = sourceShifted & pixMask;
			if (destDepth < 16) {
				if (sourcePixVal == destPixVal) {
					diff = 0;
				}
				else {
					diff = 1;
				}
			}
			else {
				diff = partitionedSubfromnBitsnPartitions(sourcePixVal, destPixVal, bitsPerColor, 3);
				diff = ((diff & rgbMask) + ((((usqInt) diff) >> bitsPerColor) & rgbMask)) + ((((usqInt) (((usqInt) diff) >> bitsPerColor)) >> bitsPerColor) & rgbMask);
			}
			bitCount += diff;
		}
		maskShifted = ((usqInt) maskShifted) >> destDepth;
		sourceShifted = ((usqInt) sourceShifted) >> destDepth;
		destShifted = ((usqInt) destShifted) >> destDepth;
	}
	return destinationWord;
}


/*	Convert the given 16bit pixel value to a 32bit RGBA value.
	Note: This method is intended to deal with different source formats. */

	/* BitBltSimulation>>#rgbMap16To32: */
static sqInt
rgbMap16To32(sqInt sourcePixel)
{
	return ((((sqInt)((usqInt)((sourcePixel & 0x1F)) << 3))) | (((sqInt)((usqInt)((sourcePixel & 0x3E0)) << 6)))) | (((sqInt)((usqInt)((sourcePixel & 0x7C00)) << 9)));
}


/*	Convert the given 32bit pixel value to a 32bit RGBA value.
	Note: This method is intended to deal with different source formats. */

	/* BitBltSimulation>>#rgbMap32To32: */
static sqInt
rgbMap32To32(sqInt sourcePixel)
{
	return sourcePixel;
}


/*	Perform the RGBA conversion for the given source pixel */

	/* BitBltSimulation>>#rgbMapPixel:flags: */
static sqInt
rgbMapPixelflags(sqInt sourcePixel, sqInt mapperFlags)
{
	sqInt val;

	val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourcePixel & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
	val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourcePixel & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
	val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourcePixel & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
	return val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourcePixel & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourcePixel & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
}


/*	Convert the given pixel value with nBitsIn bits for each color component
	to a pixel value with nBitsOut bits for each color component. Typical
	values for nBitsIn/nBitsOut are 3, 5, or 8.
 */

	/* BitBltSimulation>>#rgbMap:from:to: */
static sqInt
rgbMapfromto(sqInt sourcePixel, sqInt nBitsIn, sqInt nBitsOut)
{
	sqInt d;
	sqInt destPix;
	sqInt mask;
	sqInt srcPix;

	if (((d = nBitsOut - nBitsIn)) > 0) {

		/* Expand to more bits by zero-fill */

		/* Transfer mask */
		mask = (1U << nBitsIn) - 1;
		srcPix = ((sqInt)((usqInt)(sourcePixel) << d));
		mask = ((sqInt)((usqInt)(mask) << d));
		destPix = srcPix & mask;
		mask = ((sqInt)((usqInt)(mask) << nBitsOut));
		srcPix = ((sqInt)((usqInt)(srcPix) << d));
		return (destPix + (srcPix & mask)) + ((((sqInt)((usqInt)(srcPix) << d))) & (((sqInt)((usqInt)(mask) << nBitsOut))));
	}
	else {

		/* Compress to fewer bits by truncation */
		if (d == 0) {
			if (nBitsIn == 5) {

				/* Sometimes called with 16 bits, though pixel is 15,
				   but we must never return more than 15. */
				return sourcePixel & 0x7FFF;
			}
			if (nBitsIn == 8) {

				/* Sometimes called with 32 bits, though pixel is 24,
				   but we must never return more than 24. */
				return sourcePixel & 0xFFFFFF;
			}
			return sourcePixel;
		}
		if (sourcePixel == 0) {
			return sourcePixel;
		}
		d = nBitsIn - nBitsOut;

		/* Transfer mask */
		mask = (1U << nBitsOut) - 1;
		srcPix = ((usqInt) sourcePixel) >> d;
		destPix = srcPix & mask;
		mask = ((sqInt)((usqInt)(mask) << nBitsOut));
		srcPix = ((usqInt) srcPix) >> d;
		destPix = (destPix + (srcPix & mask)) + ((((usqInt) srcPix) >> d) & (((sqInt)((usqInt)(mask) << nBitsOut))));
		if (destPix == 0) {
			return 1;
		}
		return destPix;
	}
}

	/* BitBltSimulation>>#rgbMax:with: */
static unsigned int
rgbMaxwith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (destDepth < 16) {

		/* Max each pixel separately */
		return partitionedMaxwithnBitsnPartitions(sourceWord, destinationWord, destDepth, destPPW);
	}
	if (destDepth == 16) {

		/* Max RGB components of each pixel separately */
		return (partitionedMaxwithnBitsnPartitions(sourceWord, destinationWord, 5, 3)) + (((usqInt)((partitionedMaxwithnBitsnPartitions(((usqInt) sourceWord) >> 16, ((usqInt) destinationWord) >> 16, 5, 3))) << 16));
	}
	else {

		/* Max RGBA components of the pixel separately */
		return partitionedMaxwithnBitsnPartitions(sourceWord, destinationWord, 8, 4);
	}
}

	/* BitBltSimulation>>#rgbMinInvert:with: */
static unsigned int
rgbMinInvertwith(unsigned int wordToInvert, unsigned int destinationWord)
{
	unsigned int sourceWord;

	sourceWord = (unsigned int)~wordToInvert;
	if (destDepth < 16) {

		/* Min each pixel separately */
		return partitionedMinwithnBitsnPartitions(sourceWord, destinationWord, destDepth, destPPW);
	}
	if (destDepth == 16) {

		/* Min RGB components of each pixel separately */
		return (partitionedMinwithnBitsnPartitions(sourceWord, destinationWord, 5, 3)) + (((usqInt)((partitionedMinwithnBitsnPartitions(((usqInt) sourceWord) >> 16, ((usqInt) destinationWord) >> 16, 5, 3))) << 16));
	}
	else {

		/* Min RGBA components of the pixel separately */
		return partitionedMinwithnBitsnPartitions(sourceWord, destinationWord, 8, 4);
	}
}

	/* BitBltSimulation>>#rgbMin:with: */
static unsigned int
rgbMinwith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (destDepth < 16) {

		/* Min each pixel separately */
		return partitionedMinwithnBitsnPartitions(sourceWord, destinationWord, destDepth, destPPW);
	}
	if (destDepth == 16) {

		/* Min RGB components of each pixel separately */
		return (partitionedMinwithnBitsnPartitions(sourceWord, destinationWord, 5, 3)) + (((usqInt)((partitionedMinwithnBitsnPartitions(((usqInt) sourceWord) >> 16, ((usqInt) destinationWord) >> 16, 5, 3))) << 16));
	}
	else {

		/* Min RGBA components of the pixel separately */
		return partitionedMinwithnBitsnPartitions(sourceWord, destinationWord, 8, 4);
	}
}

	/* BitBltSimulation>>#rgbMul:with: */
static unsigned int
rgbMulwith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (destDepth < 16) {

		/* Mul each pixel separately */
		return partitionedMulwithnBitsnPartitions(sourceWord, destinationWord, destDepth, destPPW);
	}
	if (destDepth == 16) {

		/* Mul RGB components of each pixel separately */
		return (partitionedMulwithnBitsnPartitions(sourceWord, destinationWord, 5, 3)) + (((usqInt)((partitionedMulwithnBitsnPartitions(((usqInt) sourceWord) >> 16, ((usqInt) destinationWord) >> 16, 5, 3))) << 16));
	}
	else {

		/* Mul RGBA components of the pixel separately */
		return partitionedMulwithnBitsnPartitions(sourceWord, destinationWord, 8, 4);
	}
}

	/* BitBltSimulation>>#rgbSub:with: */
static unsigned int
rgbSubwith(unsigned int sourceWord, unsigned int destinationWord)
{
	if (destDepth < 16) {

		/* Sub each pixel separately */
		return partitionedSubfromnBitsnPartitions(sourceWord, destinationWord, destDepth, destPPW);
	}
	if (destDepth == 16) {

		/* Sub RGB components of each pixel separately */
		return (partitionedSubfromnBitsnPartitions(sourceWord, destinationWord, 5, 3)) + (((usqInt)((partitionedSubfromnBitsnPartitions(((usqInt) sourceWord) >> 16, ((usqInt) destinationWord) >> 16, 5, 3))) << 16));
	}
	else {

		/* Sub RGBA components of the pixel separately */
		return partitionedSubfromnBitsnPartitions(sourceWord, destinationWord, 8, 4);
	}
}


/*	Note: This is coded so that it can be run in Squeak. */

	/* InterpreterPlugin>>#setInterpreter: */
EXPORT(sqInt)
setInterpreter(struct VirtualMachine *anInterpreter)
{
	sqInt ok;

	interpreterProxy = anInterpreter;
	ok = ((interpreterProxy->majorVersion()) == (VM_PROXY_MAJOR))
	 && ((interpreterProxy->minorVersion()) >= (VM_PROXY_MINOR));
	if (ok) {
		
#if !defined(SQUEAK_BUILTIN_PLUGIN)
		byteSizeOf = interpreterProxy->byteSizeOf;
		failed = interpreterProxy->failed;
		fetchIntegerofObject = interpreterProxy->fetchIntegerofObject;
		fetchLong32ofObject = interpreterProxy->fetchLong32ofObject;
		fetchPointerofObject = interpreterProxy->fetchPointerofObject;
		firstIndexableField = interpreterProxy->firstIndexableField;
		floatValueOf = interpreterProxy->floatValueOf;
		integerObjectOf = interpreterProxy->integerObjectOf;
		integerValueOf = interpreterProxy->integerValueOf;
		ioLoadFunctionFrom = interpreterProxy->ioLoadFunctionFrom;
		isArray = interpreterProxy->isArray;
		isBytes = interpreterProxy->isBytes;
		isIntegerObject = interpreterProxy->isIntegerObject;
		isPointers = interpreterProxy->isPointers;
#if VM_PROXY_MAJOR > 1 || (VM_PROXY_MAJOR == 1 && VM_PROXY_MINOR >= 15)
		isPositiveMachineIntegerObject = interpreterProxy->isPositiveMachineIntegerObject;
#else
#if !defined(isPositiveMachineIntegerObject)
		isPositiveMachineIntegerObject = 0;
#endif
#endif
		isWords = interpreterProxy->isWords;
		isWordsOrBytes = interpreterProxy->isWordsOrBytes;
		methodArgumentCount = interpreterProxy->methodArgumentCount;
		methodReturnInteger = interpreterProxy->methodReturnInteger;
		methodReturnReceiver = interpreterProxy->methodReturnReceiver;
		nilObject = interpreterProxy->nilObject;
		pop = interpreterProxy->pop;
		popthenPush = interpreterProxy->popthenPush;
		positive32BitIntegerFor = interpreterProxy->positive32BitIntegerFor;
		positive32BitValueOf = interpreterProxy->positive32BitValueOf;
		positive64BitValueOf = interpreterProxy->positive64BitValueOf;
		primitiveFail = interpreterProxy->primitiveFail;
		primitiveFailFor = interpreterProxy->primitiveFailFor;
		showDisplayBitsLeftTopRightBottom = interpreterProxy->showDisplayBitsLeftTopRightBottom;
		slotSizeOf = interpreterProxy->slotSizeOf;
		stackIntegerValue = interpreterProxy->stackIntegerValue;
		stackObjectValue = interpreterProxy->stackObjectValue;
		stackValue = interpreterProxy->stackValue;
#if VM_PROXY_MAJOR > 1 || (VM_PROXY_MAJOR == 1 && VM_PROXY_MINOR >= 14)
		statNumGCs = interpreterProxy->statNumGCs;
#else
#if !defined(statNumGCs)
		statNumGCs = 0;
#endif
#endif
		storeIntegerofObjectwithValue = interpreterProxy->storeIntegerofObjectwithValue;
#endif /* !defined(SQUEAK_BUILTIN_PLUGIN) */
	}
	return ok;
}


/*	WARNING: For WarpBlt w/ smoothing the source depth is wrong here! */

	/* BitBltSimulation>>#setupColorMasks */
static sqInt
setupColorMasks(void)
{
	sqInt bits;
	sqInt targetBits;

	bits = (targetBits = 0);
	if (sourceDepth <= 8) {
		return null;
	}
	if (sourceDepth == 16) {
		bits = 5;
	}
	if (sourceDepth == 32) {
		bits = 8;
	}
	if (cmBitsPerColor == 0) {

		/* Convert to destDepth */
		if (destDepth <= 8) {
			return null;
		}
		if (destDepth == 16) {
			targetBits = 5;
		}
		if (destDepth == 32) {
			targetBits = 8;
		}
	}
	else {
		targetBits = cmBitsPerColor;
	}
	setupColorMasksFromto(bits, targetBits);
	return 0;
}


/*	Setup color masks for converting an incoming RGB pixel value from srcBits
	to targetBits.
 */

	/* BitBltSimulation>>#setupColorMasksFrom:to: */
static sqInt
setupColorMasksFromto(sqInt srcBits, sqInt targetBits)
{
	sqInt deltaBits;
	int mask;
	static unsigned int masks[4] = {0, 0, 0, 0};
	static int shifts[4] = {0, 0, 0, 0};

	deltaBits = targetBits - srcBits;
	if (deltaBits == 0) {
		return 0;
	}
	if (deltaBits <= 0) {

		/* Mask for extracting a color part of the source */
		mask = (1U << targetBits) - 1;
		masks[RedIndex] = (((sqInt)((usqInt)(mask) << ((srcBits * 2) - deltaBits))));
		masks[GreenIndex] = (((sqInt)((usqInt)(mask) << (srcBits - deltaBits))));
		masks[BlueIndex] = (((sqInt)((usqInt)(mask) << (0 - deltaBits))));
		masks[AlphaIndex] = 0;
	}
	else {

		/* Mask for extracting a color part of the source */
		mask = (1U << srcBits) - 1;
		masks[RedIndex] = (((sqInt)((usqInt)(mask) << (srcBits * 2))));
		masks[GreenIndex] = (((sqInt)((usqInt)(mask) << srcBits)));
		masks[BlueIndex] = mask;
	}
	shifts[RedIndex] = (deltaBits * 3);
	shifts[GreenIndex] = (deltaBits * 2);
	shifts[BlueIndex] = deltaBits;
	shifts[AlphaIndex] = 0;
	cmShiftTable = shifts;
	cmMaskTable = masks;
	cmFlags = cmFlags | (ColorMapPresent | ColorMapFixedPart);
	return 0;
}

	/* BitBltSimulation>>#showDisplayBits */
static sqInt
showDisplayBits(void)
{
	/* begin ensureDestAndSourceFormsAreValid */
	if (numGCsOnInvocation != (statNumGCs())) {
		reloadDestAndSourceForms();
	}
	showDisplayBitsLeftTopRightBottom(destForm, affectedL, affectedT, affectedR, affectedB);
	return 0;
}


/*	This is only used when source and dest are same depth,
	ie, when the barrel-shift copy loop is used. */

	/* BitBltSimulation>>#sourceSkewAndPointerInit */
static sqInt
sourceSkewAndPointerInit(void)
{
	sqInt dxLowBits;
	unsigned int m1;
	sqInt pixPerM1;
	sqInt startBits;
	sqInt sxLowBits;

	assert((destPPW == sourcePPW)
	 && ((destMSB == sourceMSB)
	 && (destDepth == sourceDepth)));

	/* A mask, assuming power of two */
	pixPerM1 = destPPW - 1;
	sxLowBits = sx & pixPerM1;

	/* how many pixels in first word */
	dxLowBits = dx & pixPerM1;
	startBits = (hDir > 0
		? sourcePPW - (sx & pixPerM1)
		: (((sx + bbW) - 1) & pixPerM1) + 1);
	m1 = (destMSB
		? ((usqInt) AllOnes) >> (32 - (startBits * destDepth))
		: ((usqInt)(AllOnes) << (32 - (startBits * destDepth))));

	/* i.e. there are some missing bits */
	/* calculate right-shift skew from source to dest */
	preload = (m1 & mask1) != mask1;

	/* -32..32 */
	skew = destDepth * ((sourceMSB
	? sxLowBits - dxLowBits
	: dxLowBits - sxLowBits));
	if (preload) {
		skew = (skew < 0
			? skew + 32
			: skew - 32);
	}

	/* calculate increments from end of 1 line to start of next */
	sourceIndex = (sourceBits + (sy * sourcePitch)) + ((sx / (32 / sourceDepth)) * 4);
	sourceDelta = (sourcePitch * vDir) - (4 * (nWords * hDir));
	if (preload) {

		/* Compensate for extra source word fetched */
		sourceDelta -= 4 * hDir;
	}
	assert(!((preload
 && (skew == 0))));
	assert(((skew >= -31) && (skew <= 0x1F)));
	return 0;
}

	/* BitBltSimulation>>#sourceWord:with: */
static unsigned int
sourceWordwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord;
}

	/* SmartSyntaxInterpreterPlugin>>#sqAssert: */
static sqInt
sqAssert(sqInt aBool)
{
	/* missing DebugCode */;
	return aBool;
}

	/* BitBltSimulation>>#subWord:with: */
static unsigned int
subWordwith(unsigned int sourceWord, unsigned int destinationWord)
{
	return sourceWord - destinationWord;
}


/*	Tally pixels into the color map. Those tallied are exactly those
	in the destination rectangle. Note that the source should be 
	specified == destination, in order for the proper color map checks 
	to be performed at setup. */

	/* BitBltSimulation>>#tallyIntoMap:with: */
static unsigned int
tallyIntoMapwith(unsigned int sourceWord, unsigned int destinationWord)
{
	sqInt d;
	sqInt d1;
	sqInt destPix;
	sqInt destPix1;
	unsigned int destShifted;
	sqInt i;
	sqInt mapIndex;
	sqInt mask;
	sqInt mask3;
	unsigned int maskShifted;
	unsigned int pixMask;
	unsigned int pixVal;
	sqInt srcPix;
	sqInt srcPix1;
	sqInt value;

	if (!((cmFlags & (ColorMapPresent | ColorMapIndexedPart)) == (ColorMapPresent | ColorMapIndexedPart))) {
		return destinationWord;
	}
	pixMask = maskTable[destDepth];
	destShifted = destinationWord;
	maskShifted = destMask;
	for (i = 1; i <= destPPW; i += 1) {
		if (!((maskShifted & pixMask) == 0)) {

			/* Only tally pixels within the destination rectangle */
			pixVal = destShifted & pixMask;
			if (destDepth < 16) {
				mapIndex = pixVal;
			}
			else {
				if (destDepth == 16) {
					/* begin rgbMap:from:to: */
					if (((d = cmBitsPerColor - 5)) > 0) {

						/* Expand to more bits by zero-fill */

						/* Transfer mask */
						mask = (1U << 5) - 1;
						srcPix = ((usqInt)(pixVal) << d);
						mask = ((sqInt)((usqInt)(mask) << d));
						destPix = srcPix & mask;
						mask = ((sqInt)((usqInt)(mask) << cmBitsPerColor));
						srcPix = ((sqInt)((usqInt)(srcPix) << d));
						mapIndex = (destPix + (srcPix & mask)) + ((((sqInt)((usqInt)(srcPix) << d))) & (((sqInt)((usqInt)(mask) << cmBitsPerColor))));
						goto l1;
					}
					else {

						/* Compress to fewer bits by truncation */
						if (d == 0) {
							if (5 == 5) {

								/* Sometimes called with 16 bits, though pixel is 15,
								   but we must never return more than 15. */
								mapIndex = pixVal & 0x7FFF;
								goto l1;
							}
							if (5 == 8) {

								/* Sometimes called with 32 bits, though pixel is 24,
								   but we must never return more than 24. */
								mapIndex = pixVal & 0xFFFFFF;
								goto l1;
							}
							mapIndex = pixVal;
							goto l1;
						}
						if (pixVal == 0) {
							mapIndex = pixVal;
							goto l1;
						}
						d = 5 - cmBitsPerColor;

						/* Transfer mask */
						mask = (1U << cmBitsPerColor) - 1;
						srcPix = ((usqInt) pixVal) >> d;
						destPix = srcPix & mask;
						mask = ((sqInt)((usqInt)(mask) << cmBitsPerColor));
						srcPix = ((usqInt) srcPix) >> d;
						destPix = (destPix + (srcPix & mask)) + ((((usqInt) srcPix) >> d) & (((sqInt)((usqInt)(mask) << cmBitsPerColor))));
						if (destPix == 0) {
							mapIndex = 1;
							goto l1;
						}
						mapIndex = destPix;
						goto l1;
					}
	l1:	/* end rgbMap:from:to: */;
				}
				else {
					/* begin rgbMap:from:to: */
					if (((d1 = cmBitsPerColor - 8)) > 0) {

						/* Expand to more bits by zero-fill */

						/* Transfer mask */
						mask3 = (1U << 8) - 1;
						srcPix1 = ((usqInt)(pixVal) << d1);
						mask3 = ((sqInt)((usqInt)(mask3) << d1));
						destPix1 = srcPix1 & mask3;
						mask3 = ((sqInt)((usqInt)(mask3) << cmBitsPerColor));
						srcPix1 = ((sqInt)((usqInt)(srcPix1) << d1));
						mapIndex = (destPix1 + (srcPix1 & mask3)) + ((((sqInt)((usqInt)(srcPix1) << d1))) & (((sqInt)((usqInt)(mask3) << cmBitsPerColor))));
						goto l2;
					}
					else {

						/* Compress to fewer bits by truncation */
						if (d1 == 0) {
							if (8 == 5) {

								/* Sometimes called with 16 bits, though pixel is 15,
								   but we must never return more than 15. */
								mapIndex = pixVal & 0x7FFF;
								goto l2;
							}
							if (8 == 8) {

								/* Sometimes called with 32 bits, though pixel is 24,
								   but we must never return more than 24. */
								mapIndex = pixVal & 0xFFFFFF;
								goto l2;
							}
							mapIndex = pixVal;
							goto l2;
						}
						if (pixVal == 0) {
							mapIndex = pixVal;
							goto l2;
						}
						d1 = 8 - cmBitsPerColor;

						/* Transfer mask */
						mask3 = (1U << cmBitsPerColor) - 1;
						srcPix1 = ((usqInt) pixVal) >> d1;
						destPix1 = srcPix1 & mask3;
						mask3 = ((sqInt)((usqInt)(mask3) << cmBitsPerColor));
						srcPix1 = ((usqInt) srcPix1) >> d1;
						destPix1 = (destPix1 + (srcPix1 & mask3)) + ((((usqInt) srcPix1) >> d1) & (((sqInt)((usqInt)(mask3) << cmBitsPerColor))));
						if (destPix1 == 0) {
							mapIndex = 1;
							goto l2;
						}
						mapIndex = destPix1;
						goto l2;
					}
	l2:	/* end rgbMap:from:to: */;
				}
			}
			/* begin tallyMapAt:put: */
			value = (cmLookupTable[mapIndex & cmMask]) + 1;
			cmLookupTable[mapIndex & cmMask] = value;
		}
		maskShifted = ((usqInt) maskShifted) >> destDepth;
		destShifted = ((usqInt) destShifted) >> destDepth;
	}
	return destinationWord;
}


/*	Shortcut for stuff that's being run from the balloon engine.
	Since we do this at each scan line we should avoid the expensive 
	setup for source and destination. */
/*	We need a source. */

	/* BitBltSimulation>>#tryCopyingBitsQuickly */
static sqInt
tryCopyingBitsQuickly(void)
{
	if (noSource) {
		return 0;
	}
	if (!((combinationRule == 34)
		 || (combinationRule == 41))) {
		return 0;
	}
	if (!(sourceDepth == 32)) {
		return 0;
	}
	if (sourceForm == destForm) {
		return 0;
	}
	if (combinationRule == 41) {
		if (destDepth == 32) {
			rgbComponentAlpha32();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			return 1;
		}
		if (destDepth == 16) {
			rgbComponentAlpha16();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			return 1;
		}
		if (destDepth == 8) {
			rgbComponentAlpha8();
			affectedL = dx;
			affectedR = dx + bbW;
			affectedT = dy;
			affectedB = dy + bbH;
			return 1;
		}
		return 0;
	}
	if (destDepth < 8) {
		return 0;
	}
	if ((destDepth == 8)
	 && ((cmFlags & ColorMapPresent) == 0)) {
		return 0;
	}
	if (destDepth == 32) {
		alphaSourceBlendBits32();
	}
	if (destDepth == 16) {
		alphaSourceBlendBits16();
	}
	if (destDepth == 8) {
		alphaSourceBlendBits8();
	}
	affectedL = dx;
	affectedR = dx + bbW;
	affectedT = dy;
	affectedB = dy + bbH;
	return 1;
}


/*	Unlock the bits of any OS surfaces. */
/*	See the comment in lockSurfaces. Similar rules apply. That is, the area
	provided in ioUnlockSurface
	can be used to determine the dirty region after drawing. If a source is
	unlocked, then the area will
	be (0,0,0,0) to indicate that no portion is dirty. Note that if a GC
	happens during unlockSourceFn
	(if it is effectively a callback) no matter. No bits are touched after
	unlock.  */

	/* BitBltSimulation>>#unlockSurfaces */
static sqInt
unlockSurfaces(void)
{
	sqInt destHandle;
	sqInt destLocked;
	sqInt sourceHandle;

	if (!hasSurfaceLock) {
		return 0;
	}
	if (unlockSurfaceFn == 0) {
		if (!(loadSurfacePlugin())) {
			return 0;
		}
	}
	/* begin ensureDestAndSourceFormsAreValid */
	if (numGCsOnInvocation != (statNumGCs())) {
		reloadDestAndSourceForms();
	}
	destLocked = 0;
	destHandle = fetchPointerofObject(FormBitsIndex, destForm);
	if (isIntegerObject(destHandle)) {

		/* The destBits are always assumed to be dirty */
		unlockSurfaceFn(integerValueOf(destHandle), affectedL, affectedT, affectedR - affectedL, affectedB - affectedT);
		destBits = (destPitch = 0);
		destLocked = 1;
	}
	if (!noSource) {
		/* begin ensureDestAndSourceFormsAreValid */
		if (numGCsOnInvocation != (statNumGCs())) {
			reloadDestAndSourceForms();
		}
		sourceHandle = fetchPointerofObject(FormBitsIndex, sourceForm);
		if (isIntegerObject(sourceHandle)) {

			/* Only unlock sourceHandle if different from destHandle */
			if (!(destLocked
				 && (sourceHandle == destHandle))) {
				unlockSurfaceFn(integerValueOf(sourceHandle), 0, 0, 0, 0);
			}
			sourceBits = (sourcePitch = 0);
		}
	}
	hasSurfaceLock = 0;
	return 0;
}

	/* BitBltSimulation>>#warpBits */
static sqInt
warpBits(void)
{
	sqInt endBits;
	sqInt ns;
	sqInt pixPerM1;
	sqInt startBits;

	ns = noSource;
	noSource = 1;
	clipRange();
	noSource = ns;
	if (noSource
	 || ((bbW <= 0)
	 || (bbH <= 0))) {

		/* zero width or height; noop */
		affectedL = (affectedR = (affectedT = (affectedB = 0)));
		return null;
	}
	if (!(lockSurfaces())) {
		return primitiveFail();
	}
	/* begin destMaskAndPointerInit */

	/* A mask, assuming power of two */
	/* how many pixels in first word */
	pixPerM1 = destPPW - 1;

	/* how many pixels in last word */
	startBits = destPPW - (dx & pixPerM1);
	endBits = (((dx + bbW) - 1) & pixPerM1) + 1;
	if (destMSB) {
		mask1 = ((usqInt) AllOnes) >> (32 - (startBits * destDepth));
		mask2 = ((usqInt)(AllOnes) << (32 - (endBits * destDepth)));
	}
	else {
		mask1 = ((usqInt)(AllOnes) << (32 - (startBits * destDepth)));
		mask2 = ((usqInt) AllOnes) >> (32 - (endBits * destDepth));
	}
	if (bbW < startBits) {
		mask1 = mask1 & mask2;
		mask2 = 0;
		nWords = 1;
	}
	else {
		nWords = (((bbW - startBits) + pixPerM1) / destPPW) + 1;
	}

	/* defaults for no overlap with source */
	/* calculate byte addr and delta, based on first word of data */
	/* Note pitch is bytes and nWords is longs, not bytes */
	hDir = (vDir = 1);
	destIndex = (destBits + (dy * destPitch)) + ((dx / destPPW) * 4);

	/* byte addr delta */
	destDelta = (destPitch * vDir) - (4 * (nWords * hDir));
	warpLoop();
	if (hDir > 0) {
		affectedL = dx;
		affectedR = dx + bbW;
	}
	else {
		affectedL = (dx - bbW) + 1;
		affectedR = dx + 1;
	}
	if (vDir > 0) {
		affectedT = dy;
		affectedB = dy + bbH;
	}
	else {
		affectedT = (dy - bbH) + 1;
		affectedB = dy + 1;
	}
	unlockSurfaces();
	return 0;
}


/*	This version of the inner loop traverses an arbirary quadrilateral
	source, thus producing a general affine transformation. */

	/* BitBltSimulation>>#warpLoop */
static sqInt
warpLoop(void)
{
	sqInt deltaP12x;
	sqInt deltaP12y;
	sqInt deltaP43x;
	sqInt deltaP43y;
	sqInt destPix;
	unsigned int destWord;
	sqInt destWord1;
	int dstMask;
	int dstShiftInc;
	int dstShiftLeft;
	sqInt endBits;
	sqInt fieldOop;
	sqInt fieldOop1;
	sqInt fieldOop2;
	sqInt fieldOop3;
	sqInt fieldOop4;
	sqInt fieldOop5;
	sqInt fieldOop6;
	sqInt fieldOop7;
	double floatValue;
	double floatValue1;
	double floatValue2;
	double floatValue3;
	double floatValue4;
	double floatValue5;
	double floatValue6;
	double floatValue7;
	unsigned int halftoneWord;
	sqInt i;
	sqInt i1;
	sqInt mapperFlags;
	unsigned int (*mergeFnwith)(unsigned int, unsigned int);
	unsigned int mergeWord;
	sqInt nPix;
	sqInt nPix1;
	sqInt nSteps;
	sqInt pAx;
	sqInt pAy;
	sqInt pBx;
	sqInt pBy;
	sqInt pv;
	unsigned int skewWord;
	sqInt smoothingCount;
	sqInt sourceMapOop;
	unsigned int sourcePix;
	sqInt sourcePix1;
	sqInt sourcePix2;
	unsigned int sourceWord;
	unsigned int sourceWord1;
	sqInt srcIndex;
	sqInt srcIndex1;
	sqInt startBits;
	sqInt val;
	sqInt words;
	sqInt words1;
	sqInt x;
	sqInt x1;
	sqInt xDelta;
	sqInt y;
	sqInt y1;
	sqInt yDelta;

	halftoneWord = 0;
	mergeFnwith = ((unsigned int (*)(unsigned int, unsigned int)) (opTable[combinationRule + 1]));
	if (!((slotSizeOf(bitBltOop)) >= (BBWarpBase + 12))) {
		return primitiveFail();
	}
	nSteps = height - 1;
	if (nSteps <= 0) {
		nSteps = 1;
	}
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop = fetchPointerofObject(BBWarpBase, bitBltOop);
	if (isIntegerObject(fieldOop)) {
		pAx = integerValueOf(fieldOop);
		goto l9;
	}
	floatValue = floatValueOf(fieldOop);
	if (!((-2.147483648e9 <= floatValue)
		 && (floatValue <= 2.147483647e9))) {
		primitiveFail();
		pAx = 0;
		goto l9;
	}
	pAx = ((sqInt)floatValue);
	l9:	/* end fetchIntOrFloat:ofObject: */;
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop1 = fetchPointerofObject(BBWarpBase + 3, bitBltOop);
	if (isIntegerObject(fieldOop1)) {
		words = integerValueOf(fieldOop1);
		goto l10;
	}
	floatValue1 = floatValueOf(fieldOop1);
	if (!((-2.147483648e9 <= floatValue1)
		 && (floatValue1 <= 2.147483647e9))) {
		primitiveFail();
		words = 0;
		goto l10;
	}
	words = ((sqInt)floatValue1);
	l10:	/* end fetchIntOrFloat:ofObject: */;
	/* begin deltaFrom:to:nSteps: */
	if (words > pAx) {
		deltaP12x = (((words - pAx) + FixedPt1) / (nSteps + 1)) + 1;
		goto l11;
	}
	else {
		if (words == pAx) {
			deltaP12x = 0;
			goto l11;
		}
		deltaP12x = 0 - ((((pAx - words) + FixedPt1) / (nSteps + 1)) + 1);
		goto l11;
	}
	l11:	/* end deltaFrom:to:nSteps: */;
	if (deltaP12x < 0) {
		pAx = words - (nSteps * deltaP12x);
	}
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop2 = fetchPointerofObject(BBWarpBase + 1, bitBltOop);
	if (isIntegerObject(fieldOop2)) {
		pAy = integerValueOf(fieldOop2);
		goto l12;
	}
	floatValue2 = floatValueOf(fieldOop2);
	if (!((-2.147483648e9 <= floatValue2)
		 && (floatValue2 <= 2.147483647e9))) {
		primitiveFail();
		pAy = 0;
		goto l12;
	}
	pAy = ((sqInt)floatValue2);
	l12:	/* end fetchIntOrFloat:ofObject: */;
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop3 = fetchPointerofObject(BBWarpBase + 4, bitBltOop);
	if (isIntegerObject(fieldOop3)) {
		words = integerValueOf(fieldOop3);
		goto l13;
	}
	floatValue3 = floatValueOf(fieldOop3);
	if (!((-2.147483648e9 <= floatValue3)
		 && (floatValue3 <= 2.147483647e9))) {
		primitiveFail();
		words = 0;
		goto l13;
	}
	words = ((sqInt)floatValue3);
	l13:	/* end fetchIntOrFloat:ofObject: */;
	/* begin deltaFrom:to:nSteps: */
	if (words > pAy) {
		deltaP12y = (((words - pAy) + FixedPt1) / (nSteps + 1)) + 1;
		goto l14;
	}
	else {
		if (words == pAy) {
			deltaP12y = 0;
			goto l14;
		}
		deltaP12y = 0 - ((((pAy - words) + FixedPt1) / (nSteps + 1)) + 1);
		goto l14;
	}
	l14:	/* end deltaFrom:to:nSteps: */;
	if (deltaP12y < 0) {
		pAy = words - (nSteps * deltaP12y);
	}
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop4 = fetchPointerofObject(BBWarpBase + 9, bitBltOop);
	if (isIntegerObject(fieldOop4)) {
		pBx = integerValueOf(fieldOop4);
		goto l15;
	}
	floatValue4 = floatValueOf(fieldOop4);
	if (!((-2.147483648e9 <= floatValue4)
		 && (floatValue4 <= 2.147483647e9))) {
		primitiveFail();
		pBx = 0;
		goto l15;
	}
	pBx = ((sqInt)floatValue4);
	l15:	/* end fetchIntOrFloat:ofObject: */;
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop5 = fetchPointerofObject(BBWarpBase + 6, bitBltOop);
	if (isIntegerObject(fieldOop5)) {
		words = integerValueOf(fieldOop5);
		goto l16;
	}
	floatValue5 = floatValueOf(fieldOop5);
	if (!((-2.147483648e9 <= floatValue5)
		 && (floatValue5 <= 2.147483647e9))) {
		primitiveFail();
		words = 0;
		goto l16;
	}
	words = ((sqInt)floatValue5);
	l16:	/* end fetchIntOrFloat:ofObject: */;
	/* begin deltaFrom:to:nSteps: */
	if (words > pBx) {
		deltaP43x = (((words - pBx) + FixedPt1) / (nSteps + 1)) + 1;
		goto l17;
	}
	else {
		if (words == pBx) {
			deltaP43x = 0;
			goto l17;
		}
		deltaP43x = 0 - ((((pBx - words) + FixedPt1) / (nSteps + 1)) + 1);
		goto l17;
	}
	l17:	/* end deltaFrom:to:nSteps: */;
	if (deltaP43x < 0) {
		pBx = words - (nSteps * deltaP43x);
	}
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop6 = fetchPointerofObject(BBWarpBase + 10, bitBltOop);
	if (isIntegerObject(fieldOop6)) {
		pBy = integerValueOf(fieldOop6);
		goto l18;
	}
	floatValue6 = floatValueOf(fieldOop6);
	if (!((-2.147483648e9 <= floatValue6)
		 && (floatValue6 <= 2.147483647e9))) {
		primitiveFail();
		pBy = 0;
		goto l18;
	}
	pBy = ((sqInt)floatValue6);
	l18:	/* end fetchIntOrFloat:ofObject: */;
	/* begin fetchIntOrFloat:ofObject: */
	fieldOop7 = fetchPointerofObject(BBWarpBase + 7, bitBltOop);
	if (isIntegerObject(fieldOop7)) {
		words = integerValueOf(fieldOop7);
		goto l19;
	}
	floatValue7 = floatValueOf(fieldOop7);
	if (!((-2.147483648e9 <= floatValue7)
		 && (floatValue7 <= 2.147483647e9))) {
		primitiveFail();
		words = 0;
		goto l19;
	}
	words = ((sqInt)floatValue7);
	l19:	/* end fetchIntOrFloat:ofObject: */;
	/* begin deltaFrom:to:nSteps: */
	if (words > pBy) {
		deltaP43y = (((words - pBy) + FixedPt1) / (nSteps + 1)) + 1;
		goto l20;
	}
	else {
		if (words == pBy) {
			deltaP43y = 0;
			goto l20;
		}
		deltaP43y = 0 - ((((pBy - words) + FixedPt1) / (nSteps + 1)) + 1);
		goto l20;
	}
	l20:	/* end deltaFrom:to:nSteps: */;
	if (deltaP43y < 0) {
		pBy = words - (nSteps * deltaP43y);
	}
	if (failed()) {
		return 0;
	}
	if ((methodArgumentCount()) == 2) {
		smoothingCount = stackIntegerValue(1);
		sourceMapOop = stackValue(0);
		if (sourceMapOop == (nilObject())) {
			if (sourceDepth < 16) {

				/* color map is required to smooth non-RGB dest */
				return primitiveFail();
			}
		}
		else {
			if ((slotSizeOf(sourceMapOop)) < (1U << sourceDepth)) {

				/* sourceMap must be long enough for sourceDepth */
				return primitiveFail();
			}
			sourceMapOop = oopForPointer(firstIndexableField(sourceMapOop));
		}
	}
	else {
		smoothingCount = 1;
		sourceMapOop = nilObject();
	}
	nSteps = width - 1;
	if (nSteps <= 0) {
		nSteps = 1;
	}
	startBits = destPPW - (dx & (destPPW - 1));
	endBits = (((dx + bbW) - 1) & (destPPW - 1)) + 1;
	if (bbW < startBits) {
		startBits = bbW;
	}
	if (destY < clipY) {

		/* Advance increments if there was clipping in y */
		pAx += (clipY - destY) * deltaP12x;
		pAy += (clipY - destY) * deltaP12y;
		pBx += (clipY - destY) * deltaP43x;
		pBy += (clipY - destY) * deltaP43y;
	}
	/* begin warpLoopSetup */
	warpSrcShift = 0;

	/* recycle temp */
	words1 = sourceDepth;
	while (!(words1 == 1)) {
		warpSrcShift += 1;
		words1 = ((usqInt) words1) >> 1;
	}

	/* warpAlignShift: Shift for aligning x position to word boundary */
	warpSrcMask = maskTable[sourceDepth];

	/* warpAlignMask: Mask for extracting the pixel position from an x position */
	warpAlignShift = 5 - warpSrcShift;

	/* Setup the lookup table for source bit shifts */
	/* warpBitShiftTable: given an sub-word x value what's the bit shift? */
	warpAlignMask = (1U << warpAlignShift) - 1;
	for (i1 = 0; i1 <= warpAlignMask; i1 += 1) {
		if (sourceMSB) {
			warpBitShiftTable[i1] = (32 - (((sqInt)((usqInt)((i1 + 1)) << warpSrcShift))));
		}
		else {
			warpBitShiftTable[i1] = (((sqInt)((usqInt)(i1) << warpSrcShift)));
		}
	}
	if ((smoothingCount > 1)
	 && ((cmFlags & ColorMapNewStyle) == 0)) {
		if (cmLookupTable == null) {
			if (destDepth == 16) {
				setupColorMasksFromto(8, 5);
			}
		}
		else {
			setupColorMasksFromto(8, cmBitsPerColor);
		}
	}
	mapperFlags = cmFlags & ((unsigned int)~ColorMapNewStyle);
	if (destMSB) {
		dstShiftInc = 0 - destDepth;
		dstShiftLeft = 32 - destDepth;
	}
	else {
		dstShiftInc = destDepth;
		dstShiftLeft = 0;
	}
	if (noHalftone) {
		halftoneWord = AllOnes;
	}
	for (i = 1; i <= bbH; i += 1) {

		/* here is the vertical loop... */
		/* begin deltaFrom:to:nSteps: */
		if (pBx > pAx) {
			xDelta = (((pBx - pAx) + FixedPt1) / (nSteps + 1)) + 1;
			goto l6;
		}
		else {
			if (pBx == pAx) {
				xDelta = 0;
				goto l6;
			}
			xDelta = 0 - ((((pAx - pBx) + FixedPt1) / (nSteps + 1)) + 1);
			goto l6;
		}
	l6:	/* end deltaFrom:to:nSteps: */;
		if (xDelta >= 0) {
			sx = pAx;
		}
		else {
			sx = pBx - (nSteps * xDelta);
		}
		/* begin deltaFrom:to:nSteps: */
		if (pBy > pAy) {
			yDelta = (((pBy - pAy) + FixedPt1) / (nSteps + 1)) + 1;
			goto l7;
		}
		else {
			if (pBy == pAy) {
				yDelta = 0;
				goto l7;
			}
			yDelta = 0 - ((((pAy - pBy) + FixedPt1) / (nSteps + 1)) + 1);
			goto l7;
		}
	l7:	/* end deltaFrom:to:nSteps: */;
		if (yDelta >= 0) {
			sy = pAy;
		}
		else {
			sy = pBy - (nSteps * yDelta);
		}
		if (destMSB) {
			dstBitShift = 32 - (((dx & (destPPW - 1)) + 1) * destDepth);
		}
		else {
			dstBitShift = (dx & (destPPW - 1)) * destDepth;
		}
		if (destX < clipX) {

			/* Advance increments if there was clipping in x */
			sx += (clipX - destX) * xDelta;
			sy += (clipX - destX) * yDelta;
		}
		if (!noHalftone) {
			/* begin halftoneAt: */
			halftoneWord = ((sqInt) (long32At(halftoneBase + ((((dy + i) - 1) % halftoneHeight) * 4))));
		}
		destMask = mask1;

		/* Here is the inner loop... */
		nPix = startBits;
		words = nWords;
		do {
			if (smoothingCount == 1) {

				/* Faster if not smoothing */
				/* begin warpPickSourcePixels:xDeltah:yDeltah:xDeltav:yDeltav:dstShiftInc:flags: */
				dstMask = maskTable[destDepth];
				destWord1 = 0;
				nPix1 = nPix;
				if (mapperFlags == (ColorMapPresent | ColorMapIndexedPart)) {

					/* a little optimization for (pretty crucial) blits using indexed lookups only */
					/* grab, colormap and mix in pixel */
					do {
						/* begin pickWarpPixelAtX:y: */
						if ((sx < 0)
						 || ((sy < 0)
						 || ((((x = ((usqInt) sx) >> BinaryPoint)) >= sourceWidth)
						 || (((y = ((usqInt) sy) >> BinaryPoint)) >= sourceHeight)))) {
							sourcePix = 0;
							goto l22;
						}
						srcIndex = (sourceBits + (y * sourcePitch)) + ((((usqInt) x) >> warpAlignShift) * 4);

						/* Extract pixel from word */
						assert((((usqInt)srcIndex)) < endOfSource);
						sourceWord = long32At(srcIndex);
						srcBitShift = warpBitShiftTable[x & warpAlignMask];
						sourcePix1 = (((usqInt) sourceWord) >> srcBitShift) & warpSrcMask;
						sourcePix = sourcePix1;
	l22:	/* end pickWarpPixelAtX:y: */;
						destPix = cmLookupTable[sourcePix & cmMask];
						destWord1 = destWord1 | (((sqInt)((usqInt)((destPix & dstMask)) << dstBitShift)));
						dstBitShift += dstShiftInc;
						sx += xDelta;
						sy += yDelta;
					} while(!(((nPix1 -= 1)) == 0));
				}
				else {

					/* grab, colormap and mix in pixel */
					do {
						/* begin pickWarpPixelAtX:y: */
						if ((sx < 0)
						 || ((sy < 0)
						 || ((((x1 = ((usqInt) sx) >> BinaryPoint)) >= sourceWidth)
						 || (((y1 = ((usqInt) sy) >> BinaryPoint)) >= sourceHeight)))) {
							sourcePix = 0;
							goto l24;
						}
						srcIndex1 = (sourceBits + (y1 * sourcePitch)) + ((((usqInt) x1) >> warpAlignShift) * 4);

						/* Extract pixel from word */
						assert((((usqInt)srcIndex1)) < endOfSource);
						sourceWord1 = long32At(srcIndex1);
						srcBitShift = warpBitShiftTable[x1 & warpAlignMask];
						sourcePix2 = (((usqInt) sourceWord1) >> srcBitShift) & warpSrcMask;
						sourcePix = sourcePix2;
	l24:	/* end pickWarpPixelAtX:y: */;
						/* begin mapPixel:flags: */
						pv = sourcePix;
						if ((mapperFlags & ColorMapPresent) != 0) {
							if ((mapperFlags & ColorMapFixedPart) != 0) {
								/* begin rgbMapPixel:flags: */
								val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourcePix & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
								val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourcePix & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
								val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourcePix & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
								pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourcePix & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
								if ((pv == 0)
								 && (sourcePix != 0)) {
									pv = 1;
								}
							}
							if ((mapperFlags & ColorMapIndexedPart) != 0) {
								pv = cmLookupTable[pv & cmMask];
							}
						}
						destPix = pv;
						destWord1 = destWord1 | (((sqInt)((usqInt)((destPix & dstMask)) << dstBitShift)));
						dstBitShift += dstShiftInc;
						sx += xDelta;
						sy += yDelta;
					} while(!(((nPix1 -= 1)) == 0));
				}
				skewWord = destWord1;
			}
			else {

				/* more difficult with smoothing */
				skewWord = warpPickSmoothPixelsxDeltahyDeltahxDeltavyDeltavsourceMapsmoothingdstShiftInc(nPix, xDelta, yDelta, deltaP12x, deltaP12y, sourceMapOop, smoothingCount, dstShiftInc);
			}
			dstBitShift = dstShiftLeft;
			if (destMask == AllOnes) {

				/* avoid read-modify-write */
				mergeWord = mergeFnwith(skewWord & halftoneWord, (assert((((usqInt)destIndex)) < endOfDestination),
				long32At(destIndex)));
				/* begin dstLongAt:put: */
				long32Atput(destIndex, destMask & mergeWord);
			}
			else {

				/* General version using dest masking */
				/* begin dstLongAt: */
				assert((((usqInt)destIndex)) < endOfDestination);
				destWord = ((sqInt) (long32At(destIndex)));
				mergeWord = mergeFnwith(skewWord & halftoneWord, destWord & destMask);
				destWord = (destMask & mergeWord) | (destWord & ((unsigned int)~destMask));
				/* begin dstLongAt:put: */
				long32Atput(destIndex, destWord);
			}
			/* begin incDestIndex: */
			destIndex += 4;
			if (words == 2) {

				/* e.g., is the next word the last word? */
				/* set mask for last word in this row */
				destMask = mask2;
				nPix = endBits;
			}
			else {

				/* use fullword mask for inner loop */
				destMask = AllOnes;
				nPix = destPPW;
			}
		} while(!(((words -= 1)) == 0));
		pAx += deltaP12x;
		pAy += deltaP12y;
		pBx += deltaP43x;
		pBy += deltaP43y;
		/* begin incDestIndex: */
		destIndex += destDelta;
	}
	return 0;
}


/*	Setup values for faster pixel fetching. */

	/* BitBltSimulation>>#warpLoopSetup */
static sqInt
warpLoopSetup(void)
{
	sqInt i;
	sqInt words;


	/* warpSrcShift = log2(sourceDepth) */
	warpSrcShift = 0;

	/* recycle temp */
	words = sourceDepth;
	while (!(words == 1)) {
		warpSrcShift += 1;
		words = ((usqInt) words) >> 1;
	}

	/* warpAlignShift: Shift for aligning x position to word boundary */
	warpSrcMask = maskTable[sourceDepth];

	/* warpAlignMask: Mask for extracting the pixel position from an x position */
	warpAlignShift = 5 - warpSrcShift;

	/* Setup the lookup table for source bit shifts */
	/* warpBitShiftTable: given an sub-word x value what's the bit shift? */
	warpAlignMask = (1U << warpAlignShift) - 1;
	for (i = 0; i <= warpAlignMask; i += 1) {
		if (sourceMSB) {
			warpBitShiftTable[i] = (32 - (((sqInt)((usqInt)((i + 1)) << warpSrcShift))));
		}
		else {
			warpBitShiftTable[i] = (((sqInt)((usqInt)(i) << warpSrcShift)));
		}
	}
	return 0;
}


/*	Pick n (sub-) pixels from the source form, mapped by sourceMap,
	average the RGB values, map by colorMap and return the new word.
	This version is only called from WarpBlt with smoothingCount > 1 */

	/* BitBltSimulation>>#warpPickSmoothPixels:xDeltah:yDeltah:xDeltav:yDeltav:sourceMap:smoothing:dstShiftInc: */
static sqInt
warpPickSmoothPixelsxDeltahyDeltahxDeltavyDeltavsourceMapsmoothingdstShiftInc(sqInt nPixels, sqInt xDeltah, sqInt yDeltah, sqInt xDeltav, sqInt yDeltav, sqInt sourceMap, sqInt n, sqInt dstShiftInc)
{
	sqInt a;
	sqInt b;
	sqInt destWord;
	int dstMask;
	sqInt g;
	sqInt i;
	sqInt j;
	sqInt k;
	sqInt nPix;
	sqInt pv;
	sqInt r;
	unsigned int rgb;
	sqInt sourcePix;
	unsigned int sourceWord;
	sqInt srcIndex;
	sqInt val;
	int x;
	sqInt x1;
	sqInt xdh;
	sqInt xdv;
	int xx;
	int y;
	sqInt y1;
	sqInt ydh;
	sqInt ydv;
	int yy;


	/* nope - too much stuff in here */
	dstMask = maskTable[destDepth];
	destWord = 0;
	if (n == 2) {

		/* Try avoiding divides for most common n (divide by 2 is generated as shift) */
		xdh = xDeltah / 2;
		ydh = yDeltah / 2;
		xdv = xDeltav / 2;
		ydv = yDeltav / 2;
	}
	else {
		xdh = xDeltah / n;
		ydh = yDeltah / n;
		xdv = xDeltav / n;
		ydv = yDeltav / n;
	}
	i = nPixels;
	do {
		x = sx;
		y = sy;

		/* Pick and average n*n subpixels */
		a = (r = (g = (b = 0)));

		/* actual number of pixels (not clipped and not transparent) */
		nPix = 0;
		j = n;
		do {
			xx = x;
			yy = y;
			k = n;
			do {
				/* begin pickWarpPixelAtX:y: */
				if ((xx < 0)
				 || ((yy < 0)
				 || ((((x1 = ((usqInt) xx) >> BinaryPoint)) >= sourceWidth)
				 || (((y1 = ((usqInt) yy) >> BinaryPoint)) >= sourceHeight)))) {
					rgb = 0;
					goto l3;
				}
				srcIndex = (sourceBits + (y1 * sourcePitch)) + ((((usqInt) x1) >> warpAlignShift) * 4);

				/* Extract pixel from word */
				assert((((usqInt)srcIndex)) < endOfSource);
				sourceWord = long32At(srcIndex);
				srcBitShift = warpBitShiftTable[x1 & warpAlignMask];
				sourcePix = (((usqInt) sourceWord) >> srcBitShift) & warpSrcMask;
				rgb = sourcePix;
	l3:	/* end pickWarpPixelAtX:y: */;
				if (!((combinationRule == 25)
					 && (rgb == 0))) {

					/* If not clipped and not transparent, then tally rgb values */
					nPix += 1;
					if (sourceDepth < 16) {

						/* Get RGBA values from sourcemap table */
						rgb = long32At(sourceMap + (((usqInt)(rgb) << 2)));
					}
					else {

						/* Already in RGB format */
						if (sourceDepth == 16) {
							/* begin rgbMap16To32: */
							rgb = ((((usqInt)((rgb & 0x1F)) << 3)) | (((usqInt)((rgb & 0x3E0)) << 6))) | (((usqInt)((rgb & 0x7C00)) << 9));
						}
					}
					b += rgb & 0xFF;
					g += (((usqInt) rgb) >> 8) & 0xFF;
					r += (((usqInt) rgb) >> 16) & 0xFF;
					a += ((usqInt) rgb) >> 24;
				}
				xx += xdh;
				yy += ydh;
			} while(!(((k -= 1)) == 0));
			x += xdv;
			y += ydv;
		} while(!(((j -= 1)) == 0));
		if ((nPix == 0)
		 || ((combinationRule == 25)
		 && (nPix < ((n * n) / 2)))) {

			/* All pixels were 0, or most were transparent */
			rgb = 0;
		}
		else {

			/* normalize rgba sums */
			if (nPix == 4) {

				/* Try to avoid divides for most common n */
				r = ((usqInt) r) >> 2;
				g = ((usqInt) g) >> 2;
				b = ((usqInt) b) >> 2;
				a = ((usqInt) a) >> 2;
			}
			else {
				r = r / nPix;
				g = g / nPix;
				b = b / nPix;
				a = a / nPix;
			}

			/* map the pixel */
			rgb = (((((sqInt)((usqInt)(a) << 24))) + (((sqInt)((usqInt)(r) << 16)))) + (((sqInt)((usqInt)(g) << 8)))) + b;
			if (rgb == 0) {

				/* only generate zero if pixel is really transparent */
				if ((((r + g) + b) + a) > 0) {
					rgb = 1;
				}
			}
			/* begin mapPixel:flags: */
			pv = rgb;
			if ((cmFlags & ColorMapPresent) != 0) {
				if ((cmFlags & ColorMapFixedPart) != 0) {
					/* begin rgbMapPixel:flags: */
					val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (rgb & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (rgb & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
					val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (rgb & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (rgb & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
					val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (rgb & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (rgb & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
					pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (rgb & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (rgb & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
					if ((pv == 0)
					 && (rgb != 0)) {
						pv = 1;
					}
				}
				if ((cmFlags & ColorMapIndexedPart) != 0) {
					pv = cmLookupTable[pv & cmMask];
				}
			}
			rgb = pv;
		}
		destWord = destWord | (((usqInt)((rgb & dstMask)) << dstBitShift));
		dstBitShift += dstShiftInc;
		sx += xDeltah;
		sy += yDeltah;
	} while(!(((i -= 1)) == 0));
	return destWord;
}


/*	Pick n pixels from the source form,
	map by colorMap and return aligned by dstBitShift.
	This version is only called from WarpBlt with smoothingCount = 1 */

	/* BitBltSimulation>>#warpPickSourcePixels:xDeltah:yDeltah:xDeltav:yDeltav:dstShiftInc:flags: */
static sqInt
warpPickSourcePixelsxDeltahyDeltahxDeltavyDeltavdstShiftIncflags(sqInt nPixels, sqInt xDeltah, sqInt yDeltah, sqInt xDeltav, sqInt yDeltav, sqInt dstShiftInc, sqInt mapperFlags)
{
	sqInt destPix;
	sqInt destWord;
	int dstMask;
	sqInt nPix;
	sqInt pv;
	unsigned int sourcePix;
	sqInt sourcePix1;
	sqInt sourcePix2;
	unsigned int sourceWord;
	unsigned int sourceWord1;
	sqInt srcIndex;
	sqInt srcIndex1;
	sqInt val;
	sqInt x;
	sqInt x1;
	sqInt y;
	sqInt y1;


	/* Yepp - this should go into warpLoop */
	dstMask = maskTable[destDepth];
	destWord = 0;
	nPix = nPixels;
	if (mapperFlags == (ColorMapPresent | ColorMapIndexedPart)) {

		/* a little optimization for (pretty crucial) blits using indexed lookups only */
		/* grab, colormap and mix in pixel */
		do {
			/* begin pickWarpPixelAtX:y: */
			if ((sx < 0)
			 || ((sy < 0)
			 || ((((x = ((usqInt) sx) >> BinaryPoint)) >= sourceWidth)
			 || (((y = ((usqInt) sy) >> BinaryPoint)) >= sourceHeight)))) {
				sourcePix = 0;
				goto l1;
			}
			srcIndex = (sourceBits + (y * sourcePitch)) + ((((usqInt) x) >> warpAlignShift) * 4);

			/* Extract pixel from word */
			assert((((usqInt)srcIndex)) < endOfSource);
			sourceWord = long32At(srcIndex);
			srcBitShift = warpBitShiftTable[x & warpAlignMask];
			sourcePix1 = (((usqInt) sourceWord) >> srcBitShift) & warpSrcMask;
			sourcePix = sourcePix1;
	l1:	/* end pickWarpPixelAtX:y: */;
			destPix = cmLookupTable[sourcePix & cmMask];
			destWord = destWord | (((sqInt)((usqInt)((destPix & dstMask)) << dstBitShift)));
			dstBitShift += dstShiftInc;
			sx += xDeltah;
			sy += yDeltah;
		} while(!(((nPix -= 1)) == 0));
	}
	else {

		/* grab, colormap and mix in pixel */
		do {
			/* begin pickWarpPixelAtX:y: */
			if ((sx < 0)
			 || ((sy < 0)
			 || ((((x1 = ((usqInt) sx) >> BinaryPoint)) >= sourceWidth)
			 || (((y1 = ((usqInt) sy) >> BinaryPoint)) >= sourceHeight)))) {
				sourcePix = 0;
				goto l2;
			}
			srcIndex1 = (sourceBits + (y1 * sourcePitch)) + ((((usqInt) x1) >> warpAlignShift) * 4);

			/* Extract pixel from word */
			assert((((usqInt)srcIndex1)) < endOfSource);
			sourceWord1 = long32At(srcIndex1);
			srcBitShift = warpBitShiftTable[x1 & warpAlignMask];
			sourcePix2 = (((usqInt) sourceWord1) >> srcBitShift) & warpSrcMask;
			sourcePix = sourcePix2;
	l2:	/* end pickWarpPixelAtX:y: */;
			/* begin mapPixel:flags: */
			pv = sourcePix;
			if ((mapperFlags & ColorMapPresent) != 0) {
				if ((mapperFlags & ColorMapFixedPart) != 0) {
					/* begin rgbMapPixel:flags: */
					val = (((((int) (cmShiftTable[0]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[0])) >> -(((int) (cmShiftTable[0])))) : ((usqInt) (sourcePix & (cmMaskTable[0])) << (((int) (cmShiftTable[0])))));
					val = val | ((((((int) (cmShiftTable[1]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[1])) >> -(((int) (cmShiftTable[1])))) : ((usqInt) (sourcePix & (cmMaskTable[1])) << (((int) (cmShiftTable[1]))))));
					val = val | ((((((int) (cmShiftTable[2]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[2])) >> -(((int) (cmShiftTable[2])))) : ((usqInt) (sourcePix & (cmMaskTable[2])) << (((int) (cmShiftTable[2]))))));
					pv = val | ((((((int) (cmShiftTable[3]))) < 0) ? ((usqInt) (sourcePix & (cmMaskTable[3])) >> -(((int) (cmShiftTable[3])))) : ((usqInt) (sourcePix & (cmMaskTable[3])) << (((int) (cmShiftTable[3]))))));
					if ((pv == 0)
					 && (sourcePix != 0)) {
						pv = 1;
					}
				}
				if ((mapperFlags & ColorMapIndexedPart) != 0) {
					pv = cmLookupTable[pv & cmMask];
				}
			}
			destPix = pv;
			destWord = destWord | (((sqInt)((usqInt)((destPix & dstMask)) << dstBitShift)));
			dstBitShift += dstShiftInc;
			sx += xDeltah;
			sy += yDeltah;
		} while(!(((nPix -= 1)) == 0));
	}
	return destWord;
}


#ifdef SQUEAK_BUILTIN_PLUGIN

static char _m[] = "BitBltPlugin";
void* BitBltPlugin_exports[][3] = {
	{(void*)_m, "copyBits\000\003", (void*)copyBits},
	{(void*)_m, "copyBitsFromtoat", (void*)copyBitsFromtoat},
	{(void*)_m, "getModuleName", (void*)getModuleName},
	{(void*)_m, "initialiseModule", (void*)initialiseModule},
	{(void*)_m, "loadBitBltFrom", (void*)loadBitBltFrom},
	{(void*)_m, "moduleUnloaded", (void*)moduleUnloaded},
	{(void*)_m, "primitiveCompareColors\000\003", (void*)primitiveCompareColors},
	{(void*)_m, "primitiveCopyBits\000\003", (void*)primitiveCopyBits},
	{(void*)_m, "primitiveDisplayString\000\003", (void*)primitiveDisplayString},
	{(void*)_m, "primitiveDrawLoop\000\003", (void*)primitiveDrawLoop},
	{(void*)_m, "primitivePixelValueAt\000\001", (void*)primitivePixelValueAt},
	{(void*)_m, "primitiveWarpBits\000\003", (void*)primitiveWarpBits},
	{(void*)_m, "setInterpreter", (void*)setInterpreter},
	{NULL, NULL, NULL}
};

#else /* ifdef SQ_BUILTIN_PLUGIN */

EXPORT(signed char) copyBitsAccessorDepth = 3;
EXPORT(signed char) primitiveCompareColorsAccessorDepth = 3;
EXPORT(signed char) primitiveCopyBitsAccessorDepth = 3;
EXPORT(signed char) primitiveDisplayStringAccessorDepth = 3;
EXPORT(signed char) primitiveDrawLoopAccessorDepth = 3;
EXPORT(signed char) primitivePixelValueAtAccessorDepth = 1;
EXPORT(signed char) primitiveWarpBitsAccessorDepth = 3;

#endif /* ifdef SQ_BUILTIN_PLUGIN */
