[AOLSERVER] oci_error_p - nsoracle.c - talking about memory allocation...

21 views
Skip to first unread message

Maurizio Martignano

unread,
May 4, 2008, 4:09:58 AM5/4/08
to AOLS...@listserv.aol.com

Dear all,

                I’d like to report a memory allocation problem in the function oci_error_p, belonging to the module nsoracle.c.

 

When the system is used on Windows and Oracle (via plsql) execute a raise_application_error, Aolserver (nsd) stops working.

 

Let’a analyse the code

 

static int

oci_error_p(const char *file, int line, const char *fn,

            Ns_DbHandle * dbh, char *ocifn, char *query,

            oci_status_t oci_status)

{

    ora_connection_t *connection = 0;

    ub2               offset = 0;

    sb4               errorcode = 0;

 

    char             *msgbuf;    ß dynamic memory allocation

    char             *buf;       ß dynamic memory allocation

    char              exceptbuf[EXCEPTION_CODE_SIZE + 1];  ß allocation on the stack

 

    if (dbh) {

        connection = dbh->connection;

    }

 

    if (oci_status == OCI_SUCCESS)

        return 0;

 

    /* Until we get the logging situation worked out, return

     * OCI_SUCCESS_WITH_INFO as a pure success.

     */

    if (oci_status == OCI_SUCCESS_WITH_INFO)

        return 0;

 

    /* If the query is long, nilp will return "[too long]";

     * if null (we're not doing a query yet, e.g., could

     * be opening db), then "[nil]" 

     */

    query = nilp(query);

 

    msgbuf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char));  ß dynamic creation of msgbuf

    buf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char));  ß dynamic creation of buf

    *msgbuf = 0;

 

    switch (oci_status) {

 

        case OCI_NEED_DATA:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NEED_DATA");

            break;

        case OCI_NO_DATA:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NO_DATA");

            break;

        case OCI_ERROR:

 

            if (connection == 0)

                snprintf(msgbuf, STACK_BUFFER_SIZE, "NULL connection");

            else {

                oci_status_t oci_status1;

                char         errorbuf[1024];

 

                oci_status1 = OCIErrorGet(connection->err,

                                          1,

                                          NULL,

                                          &errorcode,

                                          errorbuf,

                                          sizeof errorbuf, OCI_HTYPE_ERROR);

                if (oci_status1) {

                    snprintf(msgbuf, STACK_BUFFER_SIZE,

                             "`OCIErrorGet ()' error");

                } else {

                    snprintf(msgbuf, STACK_BUFFER_SIZE, "%s", errorbuf);

                }

 

                oci_status1 = OCIAttrGet(connection->stmt,

                                         OCI_HTYPE_STMT,

                                         &offset,

                                         NULL,

                                         OCI_ATTR_PARSE_ERROR_OFFSET,

                                         connection->err);

 

                if (errorcode == 1041 ||

                    errorcode == 3113 ||

                    errorcode == 12571 ||

                    errorcode == 28 ||

                    errorcode == 1012 ||

                    errorcode == 24324) {

 

                    /* 3113 is 'end-of-file on communications channel', which

                     *      happens if the oracle process dies

                     * 12571 is TNS:packet writer failure, which also happens if

                     *      the oracle process dies

                     * 1041 is the dreaded "hostdef extension doesn't exist error,

                     *      which means the db handle is screwed and can't be used

                     *      for anything else.

                     *

                     * In either case, close and re-open the handle to clear the

                     * error condition

                     */

                    Ns_OracleFlush(dbh);

                    Ns_OracleCloseDb(dbh);

                }

 

                if (errorcode == 20 || errorcode == 1034) {

                    /* ora-00020 means 'maximum number of processes exceeded.

                     * ora-01034 means 'oracle not available'.

                     *           we want to make sure the oracleSID process

                     *           goes away so we don't make the problem worse

                     */

                    Ns_OracleCloseDb(dbh);

                }

            }

            break;

        case OCI_INVALID_HANDLE:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_INVALID_HANDLE");

            break;

        case OCI_STILL_EXECUTING:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_STILL_EXECUTING");

            break;

        case OCI_CONTINUE:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_CONTINUE");

            break;

    }

 

    if (((errorcode == 900) || (offset > 0)) && (strlen(query) >= offset)) {

        /* ora-00900 is invalid sql statment

         *           it seems to be the msg most likely to be a parse

         *           error that sets offset to 0

         */

        int len;

        len = snprintf(buf, STACK_BUFFER_SIZE,

                       "%s:%d:%s: error in `%s ()': %s\nSQL: ",

                       file, line, fn, ocifn, msgbuf);

        if (offset > 0)

            len +=

                snprintf(buf + len, STACK_BUFFER_SIZE - len, "%.*s",

                         offset - 1, query);

 

        snprintf(buf + len, STACK_BUFFER_SIZE - len, " !>>>!%s",

                 query + offset);

    } else {

        snprintf(buf, STACK_BUFFER_SIZE,

                 "%s:%d:%s: error in `%s ()': %s\nSQL: %s",

                 file, line, fn, ocifn, msgbuf, query);

    }

 

    Ns_Log(Error, "%s", buf);

 

    /* We need to call this so that AOLserver will print out the relevant

     * error on pages served to browsers where ClientDebug is set.

     */

    snprintf(exceptbuf, EXCEPTION_CODE_SIZE, "%d", (int) errorcode);

    Ns_DbSetException(dbh, exceptbuf, buf); ß here exceptbuf (allocated on the stack) and buf (dimanically allocated) are

                                               passed to Ns_DbSetException

 

    Ns_Free(msgbuf); ß destruction of msgbuf

    Ns_Free(buf); ß destruction of buf

 

    return 1; ß after this return neither msgbuf and buf (already destroyed) nor exceptbuf (on the stack) do formally exists

                

}

 

This code DOES work on Linux. In Windows, when  raise_application_error is called the code BREAKS the server.

 

To make it work I had to change the code as follows:

 

static int

oci_error_p(const char *file, int line, const char *fn,

            Ns_DbHandle * dbh, char *ocifn, char *query,

            oci_status_t oci_status)

{

    ora_connection_t *connection = 0;

    ub2               offset = 0;

    sb4               errorcode = 0;

 

    static char       msgbuf[STACK_BUFFER_SIZE +1]; ß static memory allocation of msgbuf

    static            buf[STACK_BUFFER_SIZE +1]; ß static memory allocation of buf

    static char       exceptbuf[EXCEPTION_CODE_SIZE + 1]; ß static memory allocatin of exceptbuf

 

    if (dbh) {

        connection = dbh->connection;

    }

 

    if (oci_status == OCI_SUCCESS)

        return 0;

 

    /* Until we get the logging situation worked out, return

     * OCI_SUCCESS_WITH_INFO as a pure success.

     */

    if (oci_status == OCI_SUCCESS_WITH_INFO)

        return 0;

 

    /* If the query is long, nilp will return "[too long]";

     * if null (we're not doing a query yet, e.g., could

     * be opening db), then "[nil]" 

     */

    query = nilp(query);

 

    // msgbuf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char));

    // buf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char));

    *msgbuf = 0;

 

    switch (oci_status) {

 

        case OCI_NEED_DATA:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NEED_DATA");

            break;

        case OCI_NO_DATA:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NO_DATA");

            break;

        case OCI_ERROR:

 

            if (connection == 0)

                snprintf(msgbuf, STACK_BUFFER_SIZE, "NULL connection");

            else {

                oci_status_t oci_status1;

                char         errorbuf[1024];

 

                oci_status1 = OCIErrorGet(connection->err,

                                          1,

                                          NULL,

                                          &errorcode,

                                          errorbuf,

                                          sizeof errorbuf, OCI_HTYPE_ERROR);

                if (oci_status1) {

                    snprintf(msgbuf, STACK_BUFFER_SIZE,

                             "`OCIErrorGet ()' error");

                } else {

                    snprintf(msgbuf, STACK_BUFFER_SIZE, "%s", errorbuf);

                }

 

                oci_status1 = OCIAttrGet(connection->stmt,

                                         OCI_HTYPE_STMT,

                                         &offset,

                                         NULL,

                                         OCI_ATTR_PARSE_ERROR_OFFSET,

                                         connection->err);

 

                if (errorcode == 1041 ||

                    errorcode == 3113 ||

                    errorcode == 12571 ||

                    errorcode == 28 ||

                    errorcode == 1012 ||

                    errorcode == 24324) {

 

                    /* 3113 is 'end-of-file on communications channel', which

                     *      happens if the oracle process dies

                     * 12571 is TNS:packet writer failure, which also happens if

                     *      the oracle process dies

                     * 1041 is the dreaded "hostdef extension doesn't exist error,

                     *      which means the db handle is screwed and can't be used

                     *      for anything else.

                     *

                     * In either case, close and re-open the handle to clear the

                     * error condition

                     */

                    Ns_OracleFlush(dbh);

                    Ns_OracleCloseDb(dbh);

                }

 

                if (errorcode == 20 || errorcode == 1034) {

                    /* ora-00020 means 'maximum number of processes exceeded.

                     * ora-01034 means 'oracle not available'.

                     *           we want to make sure the oracleSID process

                     *           goes away so we don't make the problem worse

                     */

                    Ns_OracleCloseDb(dbh);

                }

            }

            break;

        case OCI_INVALID_HANDLE:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_INVALID_HANDLE");

            break;

        case OCI_STILL_EXECUTING:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_STILL_EXECUTING");

            break;

        case OCI_CONTINUE:

            snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_CONTINUE");

            break;

    }

 

    if (((errorcode == 900) || (offset > 0)) && (strlen(query) >= offset)) {

        /* ora-00900 is invalid sql statment

         *           it seems to be the msg most likely to be a parse

         *           error that sets offset to 0

         */

        int len;

        len = snprintf(buf, STACK_BUFFER_SIZE,

                       "%s:%d:%s: error in `%s ()': %s\nSQL: ",

                       file, line, fn, ocifn, msgbuf);

        if (offset > 0)

            len +=

                snprintf(buf + len, STACK_BUFFER_SIZE - len, "%.*s",

                         offset - 1, query);

 

        snprintf(buf + len, STACK_BUFFER_SIZE - len, " !>>>!%s",

                 query + offset);

    } else {

        snprintf(buf, STACK_BUFFER_SIZE,

                 "%s:%d:%s: error in `%s ()': %s\nSQL: %s",

                 file, line, fn, ocifn, msgbuf, query);

    }

 

    Ns_Log(Error, "%s", buf);

 

    /* We need to call this so that AOLserver will print out the relevant

     * error on pages served to browsers where ClientDebug is set.

     */

    snprintf(exceptbuf, EXCEPTION_CODE_SIZE, "%d", (int) errorcode);

    Ns_DbSetException(dbh, exceptbuf, buf);

 

    // Ns_Free(msgbuf);

    // Ns_Free(buf);

 

    return 1;

}

 

Any suggestion on why this is happening? How about incorporating these changes in the standard nsoracle.c module?

 

Thanks in advance,

Maurizio

 

 

-- AOLserver - http://www.aolserver.com/

To Remove yourself from this list, simply send an email to <list...@listserv.aol.com> with the body of "SIGNOFF AOLSERVER" in the email message. You can leave the Subject: field of your email blank.

Maurizio Martignano

unread,
May 4, 2008, 4:11:00 AM5/4/08
to AOLS...@listserv.aol.com

Maurizio Martignano

unread,
May 4, 2008, 4:26:23 AM5/4/08
to AOLS...@listserv.aol.com

Andrew Steets

unread,
May 14, 2008, 3:43:57 PM5/14/08
to AOLS...@listserv.aol.com
Hello Maurizio,

Sorry I did not see this message when it originally was posted.

Ns_DbSetException makes copies of it's second and third arguments.  It should not matter whether the arguments are cleaned up after the call to Ns_DbSetException.  Also, the driver needs to be reentrant.  By making these changes you are asking for trouble if oci_error_p is called simultaneously from two separate threads.

Do you have a stack trace or something similar from the error under Windows?  Maybe we can help get to the bottom of it.

-Andrew


...

[Message clipped]  

Maurizio Martignano

unread,
May 20, 2008, 1:37:17 PM5/20/08
to AOLS...@listserv.aol.com

Dear Andrews,

                Thanks a lot for your e-mail message and sorry for my late reply.

 

When I use the standard "oci_error_p” function (without the static memory declaration) and I try to execute the following piece of TCL code:

 

db_exec_plsql fred {

            begin

                  raise_application_error (-20000,

                                           'this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message');

            end;       

      }

 

The NSD.EXE program stops running. I traced its execution with Dependency Walker and this is what I get:

 

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIStmtExecute") called from "OCI.DLL" at address 0x019A2E25 and returned 0x61C268A4 by thread 8.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIErrorGet") called from "OCI.DLL" at address 0x019A329D and returned 0x61C26B1E by thread 8.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIAttrGet") called from "OCI.DLL" at address 0x019A39A1 and returned 0x61C26D5E by thread 8.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIHandleFree") called from "OCI.DLL" at address 0x019A281D and returned 0x61C26012 by thread 8.

Second chance exception 0xC0000005 (Access Violation) occurred in "NSDB.DLL" at address 0x00C212B2 by thread 8.

Thread 5 exited with code 255 (0xFF).

Thread 16 exited with code 255 (0xFF).

Thread 15 exited with code 255 (0xFF).

 

On the contrary if I try the same piece of TCL with my changes in “oci_error_p”, this is what I get:

 

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIStmtExecute") called from "OCI.DLL" at address 0x019F2E25 and returned 0x61C268A4 by thread 6.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIErrorGet") called from "OCI.DLL" at address 0x019F329D and returned 0x61C26B1E by thread 6.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIAttrGet") called from "OCI.DLL" at address 0x019F39A1 and returned 0x61C26D5E by thread 6.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIHandleFree") called from "OCI.DLL" at address 0x019F281D and returned 0x61C26012 by thread 6.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIHandleAlloc") called from "OCI.DLL" at address 0x019F27D9 and returned 0x61C25FCA by thread 6.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIStmtPrepare") called from "OCI.DLL" at address 0x019F2BA5 and returned 0x61C2651C by thread 6.

GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIAttrGet") called from "OCI.DLL" at address 0x019F39A1 and returned 0x61C26D5E by thread 6.

 

Etc….

 

Till we don’t solve this problem, I’ll stick to my version, with the memory statically allocated.

 

Hope it helps,

 

Maurizio

Reply all
Reply to author
Forward
0 new messages