Dear all,
I’d like to report a memory allocation problem in the function oci_error_p, belonging to the module nsoracle.c.
When the system is used on Windows and Oracle (via plsql) execute a raise_application_error, Aolserver (nsd) stops working.
Let’a analyse the code
static int
oci_error_p(const char *file, int line, const char *fn,
Ns_DbHandle * dbh, char *ocifn, char *query,
oci_status_t oci_status)
{
ora_connection_t *connection = 0;
ub2 offset = 0;
sb4 errorcode = 0;
char *msgbuf; ß dynamic memory allocation
char *buf; ß dynamic memory allocation
char exceptbuf[EXCEPTION_CODE_SIZE + 1]; ß allocation on the stack
if (dbh) {
connection = dbh->connection;
}
if (oci_status == OCI_SUCCESS)
return 0;
/* Until we get the logging situation worked out, return
* OCI_SUCCESS_WITH_INFO as a pure success.
*/
if (oci_status == OCI_SUCCESS_WITH_INFO)
return 0;
/* If the query is long, nilp will return "[too long]";
* if null (we're not doing a query yet, e.g., could
* be opening db), then "[nil]"
*/
query = nilp(query);
msgbuf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char)); ß dynamic creation of msgbuf
buf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char)); ß dynamic creation of buf
*msgbuf = 0;
switch (oci_status) {
case OCI_NEED_DATA:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NEED_DATA");
break;
case OCI_NO_DATA:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NO_DATA");
break;
case OCI_ERROR:
if (connection == 0)
snprintf(msgbuf, STACK_BUFFER_SIZE, "NULL connection");
else {
oci_status_t oci_status1;
char errorbuf[1024];
oci_status1 = OCIErrorGet(connection->err,
1,
NULL,
&errorcode,
errorbuf,
sizeof errorbuf, OCI_HTYPE_ERROR);
if (oci_status1) {
snprintf(msgbuf, STACK_BUFFER_SIZE,
"`OCIErrorGet ()' error");
} else {
snprintf(msgbuf, STACK_BUFFER_SIZE, "%s", errorbuf);
}
oci_status1 = OCIAttrGet(connection->stmt,
OCI_HTYPE_STMT,
&offset,
NULL,
OCI_ATTR_PARSE_ERROR_OFFSET,
connection->err);
if (errorcode == 1041 ||
errorcode == 3113 ||
errorcode == 12571 ||
errorcode == 28 ||
errorcode == 1012 ||
errorcode == 24324) {
/* 3113 is 'end-of-file on communications channel', which
* happens if the oracle process dies
* 12571 is TNS:packet writer failure, which also happens if
* the oracle process dies
* 1041 is the dreaded "hostdef extension doesn't exist error,
* which means the db handle is screwed and can't be used
* for anything else.
*
* In either case, close and re-open the handle to clear the
* error condition
*/
Ns_OracleFlush(dbh);
Ns_OracleCloseDb(dbh);
}
if (errorcode == 20 || errorcode == 1034) {
/* ora-00020 means 'maximum number of processes exceeded.
* ora-01034 means 'oracle not available'.
* we want to make sure the oracleSID process
* goes away so we don't make the problem worse
*/
Ns_OracleCloseDb(dbh);
}
}
break;
case OCI_INVALID_HANDLE:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_INVALID_HANDLE");
break;
case OCI_STILL_EXECUTING:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_STILL_EXECUTING");
break;
case OCI_CONTINUE:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_CONTINUE");
break;
}
if (((errorcode == 900) || (offset > 0)) && (strlen(query) >= offset)) {
/* ora-00900 is invalid sql statment
* it seems to be the msg most likely to be a parse
* error that sets offset to 0
*/
int len;
len = snprintf(buf, STACK_BUFFER_SIZE,
"%s:%d:%s: error in `%s ()': %s\nSQL: ",
file, line, fn, ocifn, msgbuf);
if (offset > 0)
len +=
snprintf(buf + len, STACK_BUFFER_SIZE - len, "%.*s",
offset - 1, query);
snprintf(buf + len, STACK_BUFFER_SIZE - len, " !>>>!%s",
query + offset);
} else {
snprintf(buf, STACK_BUFFER_SIZE,
"%s:%d:%s: error in `%s ()': %s\nSQL: %s",
file, line, fn, ocifn, msgbuf, query);
}
Ns_Log(Error, "%s", buf);
/* We need to call this so that AOLserver will print out the relevant
* error on pages served to browsers where ClientDebug is set.
*/
snprintf(exceptbuf, EXCEPTION_CODE_SIZE, "%d", (int) errorcode);
Ns_DbSetException(dbh, exceptbuf, buf); ß here exceptbuf (allocated on the stack) and buf (dimanically allocated) are
passed to Ns_DbSetException
Ns_Free(msgbuf); ß destruction of msgbuf
Ns_Free(buf); ß destruction of buf
return 1; ß after this return neither msgbuf and buf (already destroyed) nor exceptbuf (on the stack) do formally exists
}
This code DOES work on Linux. In Windows, when raise_application_error is called the code BREAKS the server.
To make it work I had to change the code as follows:
static int
oci_error_p(const char *file, int line, const char *fn,
Ns_DbHandle * dbh, char *ocifn, char *query,
oci_status_t oci_status)
{
ora_connection_t *connection = 0;
ub2 offset = 0;
sb4 errorcode = 0;
static char msgbuf[STACK_BUFFER_SIZE +1]; ß static memory allocation of msgbuf
static buf[STACK_BUFFER_SIZE +1]; ß static memory allocation of buf
static char exceptbuf[EXCEPTION_CODE_SIZE + 1]; ß static memory allocatin of exceptbuf
if (dbh) {
connection = dbh->connection;
}
if (oci_status == OCI_SUCCESS)
return 0;
/* Until we get the logging situation worked out, return
* OCI_SUCCESS_WITH_INFO as a pure success.
*/
if (oci_status == OCI_SUCCESS_WITH_INFO)
return 0;
/* If the query is long, nilp will return "[too long]";
* if null (we're not doing a query yet, e.g., could
* be opening db), then "[nil]"
*/
query = nilp(query);
// msgbuf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char));
// buf = (char *) Ns_Malloc(STACK_BUFFER_SIZE * sizeof(char));
*msgbuf = 0;
switch (oci_status) {
case OCI_NEED_DATA:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NEED_DATA");
break;
case OCI_NO_DATA:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_NO_DATA");
break;
case OCI_ERROR:
if (connection == 0)
snprintf(msgbuf, STACK_BUFFER_SIZE, "NULL connection");
else {
oci_status_t oci_status1;
char errorbuf[1024];
oci_status1 = OCIErrorGet(connection->err,
1,
NULL,
&errorcode,
errorbuf,
sizeof errorbuf, OCI_HTYPE_ERROR);
if (oci_status1) {
snprintf(msgbuf, STACK_BUFFER_SIZE,
"`OCIErrorGet ()' error");
} else {
snprintf(msgbuf, STACK_BUFFER_SIZE, "%s", errorbuf);
}
oci_status1 = OCIAttrGet(connection->stmt,
OCI_HTYPE_STMT,
&offset,
NULL,
OCI_ATTR_PARSE_ERROR_OFFSET,
connection->err);
if (errorcode == 1041 ||
errorcode == 3113 ||
errorcode == 12571 ||
errorcode == 28 ||
errorcode == 1012 ||
errorcode == 24324) {
/* 3113 is 'end-of-file on communications channel', which
* happens if the oracle process dies
* 12571 is TNS:packet writer failure, which also happens if
* the oracle process dies
* 1041 is the dreaded "hostdef extension doesn't exist error,
* which means the db handle is screwed and can't be used
* for anything else.
*
* In either case, close and re-open the handle to clear the
* error condition
*/
Ns_OracleFlush(dbh);
Ns_OracleCloseDb(dbh);
}
if (errorcode == 20 || errorcode == 1034) {
/* ora-00020 means 'maximum number of processes exceeded.
* ora-01034 means 'oracle not available'.
* we want to make sure the oracleSID process
* goes away so we don't make the problem worse
*/
Ns_OracleCloseDb(dbh);
}
}
break;
case OCI_INVALID_HANDLE:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_INVALID_HANDLE");
break;
case OCI_STILL_EXECUTING:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_STILL_EXECUTING");
break;
case OCI_CONTINUE:
snprintf(msgbuf, STACK_BUFFER_SIZE, "Error - OCI_CONTINUE");
break;
}
if (((errorcode == 900) || (offset > 0)) && (strlen(query) >= offset)) {
/* ora-00900 is invalid sql statment
* it seems to be the msg most likely to be a parse
* error that sets offset to 0
*/
int len;
len = snprintf(buf, STACK_BUFFER_SIZE,
"%s:%d:%s: error in `%s ()': %s\nSQL: ",
file, line, fn, ocifn, msgbuf);
if (offset > 0)
len +=
snprintf(buf + len, STACK_BUFFER_SIZE - len, "%.*s",
offset - 1, query);
snprintf(buf + len, STACK_BUFFER_SIZE - len, " !>>>!%s",
query + offset);
} else {
snprintf(buf, STACK_BUFFER_SIZE,
"%s:%d:%s: error in `%s ()': %s\nSQL: %s",
file, line, fn, ocifn, msgbuf, query);
}
Ns_Log(Error, "%s", buf);
/* We need to call this so that AOLserver will print out the relevant
* error on pages served to browsers where ClientDebug is set.
*/
snprintf(exceptbuf, EXCEPTION_CODE_SIZE, "%d", (int) errorcode);
Ns_DbSetException(dbh, exceptbuf, buf);
// Ns_Free(msgbuf);
// Ns_Free(buf);
return 1;
}
Any suggestion on why this is happening? How about incorporating these changes in the standard nsoracle.c module?
Thanks in advance,
Maurizio
-- AOLserver - http://www.aolserver.com/
To Remove yourself from this list, simply send an email to <list...@listserv.aol.com> with the body of "SIGNOFF AOLSERVER" in the email message. You can leave the Subject: field of your email blank.
...
[Message clipped]
Dear Andrews,
Thanks a lot for your e-mail message and sorry for my late reply.
When I use the standard "oci_error_p” function (without the static memory declaration) and I try to execute the following piece of TCL code:
db_exec_plsql fred {
begin
raise_application_error (-20000,
'this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message this is the user error message');
end;
}
The NSD.EXE program stops running. I traced its execution with Dependency Walker and this is what I get:
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIStmtExecute") called from "OCI.DLL" at address 0x019A2E25 and returned 0x61C268A4 by thread 8.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIErrorGet") called from "OCI.DLL" at address 0x019A329D and returned 0x61C26B1E by thread 8.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIAttrGet") called from "OCI.DLL" at address 0x019A39A1 and returned 0x61C26D5E by thread 8.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIHandleFree") called from "OCI.DLL" at address 0x019A281D and returned 0x61C26012 by thread 8.
Second chance exception 0xC0000005 (Access Violation) occurred in "NSDB.DLL" at address 0x00C212B2 by thread 8.
Thread 5 exited with code 255 (0xFF).
Thread 16 exited with code 255 (0xFF).
Thread 15 exited with code 255 (0xFF).
On the contrary if I try the same piece of TCL with my changes in “oci_error_p”, this is what I get:
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIStmtExecute") called from "OCI.DLL" at address 0x019F2E25 and returned 0x61C268A4 by thread 6.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIErrorGet") called from "OCI.DLL" at address 0x019F329D and returned 0x61C26B1E by thread 6.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIAttrGet") called from "OCI.DLL" at address 0x019F39A1 and returned 0x61C26D5E by thread 6.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIHandleFree") called from "OCI.DLL" at address 0x019F281D and returned 0x61C26012 by thread 6.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIHandleAlloc") called from "OCI.DLL" at address 0x019F27D9 and returned 0x61C25FCA by thread 6.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIStmtPrepare") called from "OCI.DLL" at address 0x019F2BA5 and returned 0x61C2651C by thread 6.
GetProcAddress(0x61C20000 [ORACLIENT10.DLL], "OCIAttrGet") called from "OCI.DLL" at address 0x019F39A1 and returned 0x61C26D5E by thread 6.
Etc….
Till we don’t solve this problem, I’ll stick to my version, with the memory statically allocated.
Hope it helps,
Maurizio