Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

CGCompiler fails (mem alloc problem)

1 view
Skip to first unread message

Skybuck Flying

unread,
Dec 27, 2009, 3:34:08 PM12/27/09
to
Hello,

This is my test shader, to test the maximum ammount of local variables
inside a shader. (via array).

The CG Compiler kinda freaks out and uses 2 gigabyte of memory and runs out
of memory...

I could try with enabling 4GB of ram.. but maybe that not gonna cut it...

I wonder if you guys know what's going on and if there is maybe a
solution...

Also is it a "hardware-limitation-problem" ? or is it simply a compiler
problem ?

// *** Begin of TestShader.cgfx ***:

texture SpaceShipTexture
<
string ResourceName = ""; // must be set in fx composer gui/properties.
string UIName = "SpaceShipTexture";
string ResourceType = "2D";
>;

sampler2D SpaceShipSampler2D = sampler_state
{
Texture = <SpaceShipTexture>;
MinFilter = Linear;
MipFilter = Linear;
MagFilter = Linear;
AddressU = Clamp;
AddressV = Clamp;
};

float4x4 WorldViewProj : WorldViewProjection;

struct TVertexShaderIn
{
float3 mPosition : POSITION;
float2 mTextureCoordinate0 : TEXCOORD0;
};

struct TVertexShaderOut
{
float4 mPosition : POSITION;
float2 mTextureCoordinate0 : TEXCOORD0;
};

// void routines not supported by nvidia shader debugger
/*
void TVertexShader_Main( in TVertexShaderIn ParaIn , out TVertexShaderOut
ParaOut )
{
ParaOut.mPosition = mul(WorldViewProj, float4(ParaIn.mPosition.xyz, 1.0));
ParaOut.mTextureCoordinate0 = ParaIn.mTextureCoordinate0;
}
*/

// return routines/functions do work in nvidia shader debugger:
TVertexShaderOut TVertexShader_Main( in TVertexShaderIn ParaIn )
{
TVertexShaderOut ParaOut;

ParaOut.mPosition = mul(WorldViewProj, float4(ParaIn.mPosition.xyz, 1.0));
ParaOut.mTextureCoordinate0 = ParaIn.mTextureCoordinate0;

return ParaOut;
}

struct TPixelShaderIn
{
float2 mPosition : TEXCOORD0;
};

struct TPixelShaderOut
{
float4 mColor : COLOR;
};

// void not supported by nvidia shader debugger
/*
void TPixelShader_Main( in TPixelShaderIn ParaIn, out TPixelShaderOut
ParaOut )
{
ParaOut.mColor = tex2D( SpaceShipSampler2D, ParaIn.mPosition );
}
*/

// return routines/functions do work in nvidia shader debugger:
TPixelShaderOut TPixelShader_Main( in TPixelShaderIn ParaIn )
{
TPixelShaderOut ParaOut;

ParaOut.mColor.x = 0;
ParaOut.mColor.y = 0;
ParaOut.mColor.z = 0;

if
(
(ParaIn.mPosition.x >= 0.49) && (ParaIn.mPosition.x <= 0.51) &&
(ParaIn.mPosition.y >= 0.49) && (ParaIn.mPosition.y <= 0.51)
)
{


ParaOut.mColor = tex2D( SpaceShipSampler2D, ParaIn.mPosition );

float4 vArray[4096];
int vX, vY;
int vLocation;

int vOffset1;
int vOffset2;
int vOffset3;

// load bitmap into array
vY = 0;

while (vY < 64)
{
vX = 0;

while (vX < 64)
{
vLocation = (vY * 64) + vX;
vArray[vLocation] = tex2D( SpaceShipSampler2D, float2( vX, vY ) );
vArray[vLocation] = vArray[vLocation] * 256;
vX = vX + 1;
}
vY = vY + 1;
}

// manipulate bitmap
vLocation = 0;

while (vLocation < 4000)
{
// vOffset1 = vArray[vLocation].x * 256;
// vOffset2 = vArray[vLocation].y * 256;
// vOffset3 = vArray[vLocation].z * 256;

vArray[vOffset1].x = vArray[vOffset1].x + 1;
vArray[vOffset2].y = vArray[vOffset2].y - 1;
vArray[vOffset3].z = vArray[vOffset3].z + 3;

vLocation = vLocation + 1;
}

// determine final output color
ParaOut.mColor.x = 0;
ParaOut.mColor.y = 0;
ParaOut.mColor.z = 0;

vLocation = 0;
while (vLocation < 4000)
{
vOffset1 = vArray[vLocation].x;
vOffset2 = vArray[vLocation].y;
vOffset3 = vArray[vLocation].z;

ParaOut.mColor.x = ParaOut.mColor.x + vArray[vOffset1].x/256.0;
ParaOut.mColor.y = ParaOut.mColor.y + vArray[vOffset2].y/256.0;
ParaOut.mColor.z = ParaOut.mColor.z + vArray[vOffset3].z/256.0;

vLocation = vLocation + 1;
}

}


return ParaOut;
}

technique technique0 {
pass p0 {
CullFaceEnable = false;
VertexProgram = compile vp40 TVertexShader_Main();
FragmentProgram = compile fp40 TPixelShader_Main();
}
}

// *** End of TestShader.cgfx ***

I tried to compile with:

"
cgc -profile fp40 -entry TPixelShader_Main -o TestShader.asm TestShader.cgfx
"

Bye,
Skybuck.


Skybuck Flying

unread,
Dec 27, 2009, 5:05:15 PM12/27/09
to
To me it seems at least like a compiler failure... and maybe a hardware
limitation as well...

The fragment specification says something about "maximum" value retrieval...
but using some opengl api call...

However in reality those constants have different names in opengl headers...
so this a question of finding and using the correct constants and api call
to try and find limitations like...

"max temporaries" and things like that.

So for now I could assume a compiler failure and hope for the best... and I
could try and give fragments/arb/assemblies... so coding directly in gpu asm
to circumvent the cgc compiler which would be kinda cool/interesting to do
anyway to see how the instruction set more or less works...

Since I can now also load assembly binaries into my "gpu" engine ;) :D

However first things first... it might be better to first try and find these
constants for limitation information retrieval... then maybe later try asm
anyway to see how far it can go...

Bye,
Skybuck.


Skybuck Flying

unread,
Dec 27, 2009, 5:08:28 PM12/27/09
to
Hmm I was reading this little document:

http://oss.sgi.com/projects/ogl-sample/registry/ARB/fragment_program.txt

But maybe that's not for nvidia cards ;) :)

So that's kinda a danger :)

nvidia has a similiar document but which is still totally different called
GL_NV_fragment_program.txt

Hmm...

(I was kinda liking the first one... )

Bye,
Skybuck.


Skybuck Flying

unread,
Dec 27, 2009, 5:41:23 PM12/27/09
to
Reading what other people have programmed on other hardware which is a bit
less powerfull but samiliar it seems that it's all about "temporaries" and
temporarely registers...

And only a few are available... So I am guessing arrays are implemented as
"temporaries"...

Which means there is no way that 2006 hardware can do thousands of
temporaries ?! ;)

So this means two things:

1. The best case scenerio for corewars executor is not possible with 2006
hardware.
(Other scenerio's might be possible though.. but they would definetly be
slower... maybe me investigate those possibilities...)

2. Shaders for 2006 hardware is all about "few instructions", "few
registers", "few temporaries", and "woops you ran out of resources" ! :) =D

It also kinda sucks that I have no clue how many resources the shaders are
using...

It would be nice if the compiler could at least somehow give some indication
while it's compiling... or simply stop compiling if it ran out of resources
!? ;)

Bye,
Skybuck.


keith

unread,
Dec 28, 2009, 10:33:13 AM12/28/09
to
Hey, by now, you should have realized you know all the answers, just
wait a little before posting :)

Chris Dodd

unread,
Dec 29, 2009, 4:33:12 PM12/29/09
to
"Skybuck Flying" <IntoTh...@hotmail.com> wrote in
news:64de3$4b37c4ba$d53371df$10...@cache5.tilbu1.nb.home.nl:
> This is my test shader, to test the maximum ammount of local variables
> inside a shader. (via array).
> The CG Compiler kinda freaks out and uses 2 gigabyte of memory and runs
> out of memory...
[large shader deleted]

> I tried to compile with:
> cgc -profile fp40 -entry TPixelShader_Main -o TestShader.asm
TestShader.cgfx

This is almost certainly caused by the compiler being overly agressive about
unrolling the loops. Try adding "-unroll none" to the compile line to
disable unrolling. I would guess that your shader won't run to completion
due to being too long for fp40 -- it only allows 64K instructions to be
executed in a single shader, at which point it kills the shader (to prevent
runaway infinite loops).

-chris

fungus

unread,
Dec 30, 2009, 9:24:27 AM12/30/09
to
Rule of Upper Limits: "If you have to ask what the upper limit is,
you're probably doing it wrong."


--
<\___/>
/ O O \
\_____/ FTB.

http://www.topaz3d.com/ - New 3D editor for real time simulation

Skybuck Flying

unread,
Dec 30, 2009, 1:15:50 PM12/30/09
to

"Chris Dodd" <cd...@acm.org> wrote in message
news:Xns9CF089DDA3...@194.177.98.144...

> "Skybuck Flying" <IntoTh...@hotmail.com> wrote in
> news:64de3$4b37c4ba$d53371df$10...@cache5.tilbu1.nb.home.nl:
>> This is my test shader, to test the maximum ammount of local variables
>> inside a shader. (via array).
>> The CG Compiler kinda freaks out and uses 2 gigabyte of memory and runs
>> out of memory...
> [large shader deleted]
>> I tried to compile with:
>> cgc -profile fp40 -entry TPixelShader_Main -o TestShader.asm
> TestShader.cgfx
>
> This is almost certainly caused by the compiler being overly agressive
> about
> unrolling the loops. Try adding "-unroll none" to the compile line to

Already tried that ;) Didn't help :)

> disable unrolling. I would guess that your shader won't run to completion
> due to being too long for fp40 -- it only allows 64K instructions to be
> executed in a single shader, at which point it kills the shader (to
> prevent
> runaway infinite loops).

The shader doesn't seem to be that big...

I also tried another test program... which has two loops like so:

loop 0 to 400
loop 0 to 500
read texel at 400, 500

To me it seems if loops together are over 4096 it won't work ?

So these are very serious limitations... sigh.

Bye,
Skybuck.


0 new messages