# Transformer by Hand

136 views

### antonio....@gmail.com

Jul 10, 2024, 7:01:53 AMJul 10
to Harbour Users
https://x.com/ProfTomYeh/status/1809939766907228334

// Transformer by Hand

function Main()

local aInput := { { 1, 1, 1, 1, 1 },;
{ 1, 0, 0, 1, -1 },;
{ 1, 1, 1, 1, 2 },;
{ 0, 1, 0, 0, 1 } }
local aAWM := { { 1, 1, 0, 1, 0 },;
{ 1, 0, 1, 1, 0 },;
{ 0, 0, 1, 1, 0 },;
{ 0, 1, 0, 0, 1 },;
{ 0, 1, 1, 0, 1 } }

local aAWF := MatMul( aInput, aAWM )

local aWB := { { 1, 0, 0, 1, 3 },;
{ 0, 1, 1, 1, 4 },;
{ 1, 1, -1, 1, -1 } }

local aWB2 := { { 1, 0, 1, -1 },;
{ -1, 0, 1, 7 },;
{ 1, -1, 0, 2 },;
{ 0, 1, -1, -1 } }

local aWBxaAWF, aReLU, aWB2xaReLU

AAdd( aAWF, { 1, 1, 1, 1, 1 } )
aWBxaAWF = MatMul( aWB, aAWF )

aReLU = ReLU( aWBxaAWF )
AAdd( aReLU, { 1, 1, 1, 1, 1 } )

aWB2xaReLU = MatMul( aWB2, aReLU )
? ReLU( aWB2xaReLU )

return nil

function MatMul( aMatrix1, aMatrix2 )

local nRowsA := Len( aMatrix1 )
local nColsA := Len( aMatrix1[ 1 ] )
local nColsB := Len( aMatrix2[ 1 ] )
local aMatrixResult := Array( nRowsA, nColsB )
local i, j, k

for i := 1 to nRowsA
for j := 1 to nColsB
aMatrixResult[ i, j ] := 0
for k := 1 to nColsA
aMatrixResult[ i, j ] += aMatrix1[ i, k ] * aMatrix2[ k, j ]
next
next
next

return aMatrixResult

function ReLU( aMatrix )

LOCAL aOutput := AClone( aMatrix )
LOCAL i, j

FOR i := 1 TO Len( aOutput )
FOR j := 1 TO Len( aOutput[ i ] )
aOutput[ i ][ j ] := Max( 0, aOutput[ i ][ j ] )
NEXT
NEXT

RETURN aOutput

Final result: {{6, 6, 7, 7, 5}, {2, 0, 0, 1, 1}, {0, 0, 1, 0, 0}, {6, 9, 8, 7, 7}}

regards

Transformer by Hand ✍️ - v1.0.xlsx

### antonio....@gmail.com

Jul 10, 2024, 8:24:46 AMJul 10
to Harbour Users
functions MatMul() and ReLU() ported to C code thanks to Kwon Oh Chul (Charles):

function Main()

local aInput := { { 1, 1, 1, 1, 1 },;
{ 1, 0, 0, 1, -1 },;
{ 1, 1, 1, 1, 2 },;
{ 0, 1, 0, 0, 1 } }
local aAWM := { { 1, 1, 0, 1, 0 },;
{ 1, 0, 1, 1, 0 },;
{ 0, 0, 1, 1, 0 },;
{ 0, 1, 0, 0, 1 },;
{ 0, 1, 1, 0, 1 } }

local aAWF := MatMul( aInput, aAWM )

local aWB := { { 1, 0, 0, 1, 3 },;
{ 0, 1, 1, 1, 4 },;
{ 1, 1, -1, 1, -1 } }

local aWB2 := { { 1, 0, 1, -1 },;
{ -1, 0, 1, 7 },;
{ 1, -1, 0, 2 },;
{ 0, 1, -1, -1 } }

local aWBxaAWF, aReLU, aWB2xaReLU

AAdd( aAWF, { 1, 1, 1, 1, 1 } )
aWBxaAWF = MatMul( aWB, aAWF )

aReLU = ReLU( aWBxaAWF )
AAdd( aReLU, { 1, 1, 1, 1, 1 } )

aWB2xaReLU = MatMul( aWB2, aReLU )
? hb_ValToExp( ReLU( aWB2xaReLU ) )

return nil

#pragma BEGINDUMP

#include <hbapi.h>
#include <hbapiitm.h>
#include <hbapierr.h>

#define UNROLL_FACTOR 4

HB_FUNC( MATMUL )
{
PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY );
PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY );

if( pMatrix1 && pMatrix2 )
{
HB_SIZE nRowsA = hb_arrayLen( pMatrix1 );
HB_SIZE nColsA = hb_arrayLen( hb_arrayGetItemPtr( pMatrix1, 1 ) );
HB_SIZE nColsB = hb_arrayLen( hb_arrayGetItemPtr( pMatrix2, 1 ) );

PHB_ITEM pMatrixResult = hb_itemArrayNew( nRowsA );
HB_SIZE i, j, k;

// Allocate contiguous memory for matrices
double* pDataA = (double*)hb_xgrab(nRowsA * nColsA * sizeof(double));
double* pDataB = (double*)hb_xgrab(nColsA * nColsB * sizeof(double));
double* pDataResult = (double*)hb_xgrab(nRowsA * nColsB * sizeof(double));

// Copy data to contiguous memory
for( i = 0; i < nRowsA; i++ )
{
PHB_ITEM pRowA = hb_arrayGetItemPtr( pMatrix1, i+1 );
for( j = 0; j < nColsA; j++ )
{
pDataA[i*nColsA + j] = hb_arrayGetND( pRowA, j+1 );
}
}

for( i = 0; i < nColsA; i++ )
{
PHB_ITEM pRowB = hb_arrayGetItemPtr( pMatrix2, i+1 );
for( j = 0; j < nColsB; j++ )
{
pDataB[i*nColsB + j] = hb_arrayGetND( pRowB, j+1 );
}
}

// Perform matrix multiplication
for( i = 0; i < nRowsA; i++ )
{
for( j = 0; j < nColsB; j++ )
{
double sum = 0.0;
for( k = 0; k < nColsA - (UNROLL_FACTOR-1); k += UNROLL_FACTOR )
{
sum += pDataA[i*nColsA + k] * pDataB[k*nColsB + j]
+ pDataA[i*nColsA + k+1] * pDataB[(k+1)*nColsB + j]
+ pDataA[i*nColsA + k+2] * pDataB[(k+2)*nColsB + j]
+ pDataA[i*nColsA + k+3] * pDataB[(k+3)*nColsB + j];
}
// Handle remaining elements
for( ; k < nColsA; k++ )
{
sum += pDataA[i*nColsA + k] * pDataB[k*nColsB + j];
}
pDataResult[i*nColsB + j] = sum;
}
}

// Copy result back to Harbour array
for( i = 0; i < nRowsA; i++ )
{
PHB_ITEM pRow = hb_itemArrayNew( nColsB );
for( j = 0; j < nColsB; j++ )
{
hb_arraySetND( pRow, j+1, pDataResult[i*nColsB + j] );
}
hb_arraySet( pMatrixResult, i+1, pRow );
hb_itemRelease( pRow );
}

hb_xfree(pDataA);
hb_xfree(pDataB);
hb_xfree(pDataResult);

hb_itemReturnRelease( pMatrixResult );
}
else
{
hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
}
}

HB_FUNC( RELU )
{
PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY );

if( pMatrix )
{
HB_SIZE nRows = hb_arrayLen( pMatrix );
HB_SIZE nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) );

PHB_ITEM pOutput = hb_itemArrayNew( nRows );
HB_SIZE i, j;

double* pData = (double*)hb_xgrab(nRows * nCols * sizeof(double));

// Copy data to contiguous memory
for( i = 0; i < nRows; i++ )
{
PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i+1 );
for( j = 0; j < nCols; j++ )
{
pData[i*nCols + j] = hb_arrayGetND( pRow, j+1 );
}
}

// Perform ReLU operation
for( i = 0; i < nRows * nCols; i += UNROLL_FACTOR )
{
pData[i] = (pData[i] > 0.0) ? pData[i] : 0.0;
pData[i+1] = (pData[i+1] > 0.0) ? pData[i+1] : 0.0;
pData[i+2] = (pData[i+2] > 0.0) ? pData[i+2] : 0.0;
pData[i+3] = (pData[i+3] > 0.0) ? pData[i+3] : 0.0;
}
// Handle remaining elements
for( ; i < nRows * nCols; i++ )
{
pData[i] = (pData[i] > 0.0) ? pData[i] : 0.0;
}

// Copy result back to Harbour array
for( i = 0; i < nRows; i++ )
{
PHB_ITEM pRow = hb_itemArrayNew( nCols );
for( j = 0; j < nCols; j++ )
{
hb_arraySetND( pRow, j+1, pData[i*nCols + j] );
}
hb_arraySet( pOutput, i+1, pRow );
hb_itemRelease( pRow );
}

hb_xfree(pData);

hb_itemReturnRelease( pOutput );
}
else
{
hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
}
}

#pragma ENDDUMP

regards