View | Details | Raw Unified | Return to bug 597293
Collapse All | Expand All

(-)a/ChangeLog (-6 / +13 lines)
Lines 1-11 Link Here
1
---------------------------------------------------------------------------
1
---------------------------------------------------------------------------
2
Version 5.4.2  [v5-stable] (rgerhards), 2010-03-??
2
Version 5.4.2  [v5-stable] (rgerhards), 2010-03-??
3
- bugfix(minor): status variable was uninitialized
3
- bugfix(kind of): output plugin retry behaviour could cause engine to loop
4
  However, this would have caused harm only if NO parser modules at
4
  The rsyslog engine did not guard itself against output modules that do
5
  all were loaded, which would lead to a defunctional configuration
5
  not properly convey back the tryResume() behaviour. This then leads to
6
  at all. And, even more important, this is impossible as two parser
6
  what looks like an endless loop. I consider this to be a bug of the 
7
  modules are built-in and thus can not be "not loaded", so we always
7
  engine not only because it should be hardened against plugin misbehaviour,
8
  have a minimum of two.
8
  but also because plugins may not be totally able to avoid this situation
9
  (depending on the type of and processing done by the plugin).
9
- bugfix: testbench failed when not executed in UTC+1 timezone
10
- bugfix: testbench failed when not executed in UTC+1 timezone
10
  accidently, the time zone information was kept inside some
11
  accidently, the time zone information was kept inside some
11
  to-be-checked-for responses
12
  to-be-checked-for responses
Lines 13-18 Version 5.4.2 [v5-stable] (rgerhards), 2010-03-?? Link Here
13
  message-induced off-by-one error (potential segfault) (see 4.6.2)
14
  message-induced off-by-one error (potential segfault) (see 4.6.2)
14
  The analysis has been completed and a better fix been crafted and 
15
  The analysis has been completed and a better fix been crafted and 
15
  integrated.
16
  integrated.
17
- bugfix(minor): status variable was uninitialized
18
  However, this would have caused harm only if NO parser modules at
19
  all were loaded, which would lead to a defunctional configuration
20
  at all. And, even more important, this is impossible as two parser
21
  modules are built-in and thus can not be "not loaded", so we always
22
  have a minimum of two.
16
---------------------------------------------------------------------------
23
---------------------------------------------------------------------------
17
Version 5.4.1  [v5-stable] (rgerhards), 2010-03-??
24
Version 5.4.1  [v5-stable] (rgerhards), 2010-03-??
18
- added new property replacer option "date-rfc3164-buggyday" primarily
25
- added new property replacer option "date-rfc3164-buggyday" primarily
(-)a/action.c (-4 / +24 lines)
Lines 445-450 static void actionCommitted(action_t *pThis) Link Here
445
static void actionRetry(action_t *pThis)
445
static void actionRetry(action_t *pThis)
446
{
446
{
447
	actionSetState(pThis, ACT_STATE_RTRY);
447
	actionSetState(pThis, ACT_STATE_RTRY);
448
	pThis->iResumeOKinRow++;
448
}
449
}
449
450
450
451
Lines 480-502 static inline void actionSuspend(action_t *pThis, time_t ttNow) Link Here
480
/* actually do retry processing. Note that the function receives a timestamp so
481
/* actually do retry processing. Note that the function receives a timestamp so
481
 * that we do not need to call the (expensive) time() API.
482
 * that we do not need to call the (expensive) time() API.
482
 * Note that we do the full retry processing here, doing the configured number of
483
 * Note that we do the full retry processing here, doing the configured number of
483
 * iterations.
484
 * iterations. -- rgerhards, 2009-05-07
484
 * rgerhards, 2009-05-07
485
 * We need to guard against module which always return RS_RET_OK from their tryResume()
486
 * entry point. This is invalid, but has harsh consequences: it will cause the rsyslog
487
 * engine to go into a tight loop. That obviously is not acceptable. As such, we track the
488
 * count of iterations that a tryResume returning RS_RET_OK is immediately followed by
489
 * an unsuccessful call to doAction(). If that happens more than 1,000 times, we assume 
490
 * the return acutally is a RS_RET_SUSPENDED. In order to go through the various 
491
 * resumption stages, we do this for every 1000 requests. This magic number 1000 may
492
 * not be the most appropriate, but it should be thought of a "if nothing else helps"
493
 * kind of facility: in the first place, the module should return a proper indication
494
 * of its inability to recover. -- rgerhards, 2010-04-26.
485
 */
495
 */
486
static rsRetVal actionDoRetry(action_t *pThis, time_t ttNow)
496
static rsRetVal actionDoRetry(action_t *pThis, time_t ttNow)
487
{
497
{
488
	int iRetries;
498
	int iRetries;
489
	int iSleepPeriod;
499
	int iSleepPeriod;
500
	int bTreatOKasSusp;
490
	DEFiRet;
501
	DEFiRet;
491
502
492
	ASSERT(pThis != NULL);
503
	ASSERT(pThis != NULL);
493
504
494
	iRetries = 0;
505
	iRetries = 0;
495
	while(pThis->eState == ACT_STATE_RTRY) {
506
	while(pThis->eState == ACT_STATE_RTRY) {
507
dbgprintf("YYY: resume in row %d\n", pThis->iResumeOKinRow);
496
		iRet = pThis->pMod->tryResume(pThis->pModData);
508
		iRet = pThis->pMod->tryResume(pThis->pModData);
497
		if(iRet == RS_RET_OK) {
509
		if((pThis->iResumeOKinRow > 999) && (pThis->iResumeOKinRow % 1000 == 0)) {
510
			bTreatOKasSusp = 1;
511
		} else {
512
			bTreatOKasSusp = 0;
513
		}
514
		if((iRet == RS_RET_OK) && (!bTreatOKasSusp)) {
498
			actionSetState(pThis, ACT_STATE_RDY);
515
			actionSetState(pThis, ACT_STATE_RDY);
499
		} else if(iRet == RS_RET_SUSPENDED) {
516
		} else if(iRet == RS_RET_SUSPENDED || bTreatOKasSusp) {
500
			/* max retries reached? */
517
			/* max retries reached? */
501
			if((pThis->iResumeRetryCount != -1 && iRetries >= pThis->iResumeRetryCount)) {
518
			if((pThis->iResumeRetryCount != -1 && iRetries >= pThis->iResumeRetryCount)) {
502
				actionSuspend(pThis, ttNow);
519
				actionSuspend(pThis, ttNow);
Lines 715-727 actionCallDoAction(action_t *pThis, msg_t *pMsg) Link Here
715
	switch(iRet) {
732
	switch(iRet) {
716
		case RS_RET_OK:
733
		case RS_RET_OK:
717
			actionCommitted(pThis);
734
			actionCommitted(pThis);
735
			pThis->iResumeOKinRow = 0; /* we had a successful call! */
718
			break;
736
			break;
719
		case RS_RET_DEFER_COMMIT:
737
		case RS_RET_DEFER_COMMIT:
738
			pThis->iResumeOKinRow = 0; /* we had a successful call! */
720
			/* we are done, action state remains the same */
739
			/* we are done, action state remains the same */
721
			break;
740
			break;
722
		case RS_RET_PREVIOUS_COMMITTED:
741
		case RS_RET_PREVIOUS_COMMITTED:
723
			/* action state remains the same, but we had a commit. */
742
			/* action state remains the same, but we had a commit. */
724
			pThis->bHadAutoCommit = 1;
743
			pThis->bHadAutoCommit = 1;
744
			pThis->iResumeOKinRow = 0; /* we had a successful call! */
725
			break;
745
			break;
726
		case RS_RET_SUSPENDED:
746
		case RS_RET_SUSPENDED:
727
			actionRetry(pThis);
747
			actionRetry(pThis);
(-)a/action.h (-1 / +2 lines)
Lines 56-63 struct action_s { Link Here
56
	bool	bWriteAllMarkMsgs;/* should all mark msgs be written (not matter how recent the action was executed)? */
56
	bool	bWriteAllMarkMsgs;/* should all mark msgs be written (not matter how recent the action was executed)? */
57
	int	iSecsExecOnceInterval; /* if non-zero, minimum seconds to wait until action is executed again */
57
	int	iSecsExecOnceInterval; /* if non-zero, minimum seconds to wait until action is executed again */
58
	action_state_t eState;	/* current state of action */
58
	action_state_t eState;	/* current state of action */
59
	int	bHadAutoCommit;	/* did an auto-commit happen during doAction()? */
59
	bool	bHadAutoCommit;	/* did an auto-commit happen during doAction()? */
60
	time_t	ttResumeRtry;	/* when is it time to retry the resume? */
60
	time_t	ttResumeRtry;	/* when is it time to retry the resume? */
61
	int	iResumeOKinRow;	/* number of times in a row that resume said OK with an immediate failure following */
61
	int	iResumeInterval;/* resume interval for this action */
62
	int	iResumeInterval;/* resume interval for this action */
62
	int	iResumeRetryCount;/* how often shall we retry a suspended action? (-1 --> eternal) */
63
	int	iResumeRetryCount;/* how often shall we retry a suspended action? (-1 --> eternal) */
63
	int	iNbrResRtry;	/* number of retries since last suspend */
64
	int	iNbrResRtry;	/* number of retries since last suspend */
(-)a/tests/pipe_noreader.sh (-1 / +8 lines)
Lines 4-9 Link Here
4
# as a permanent testcase. For some details, please see bug tracker
4
# as a permanent testcase. For some details, please see bug tracker
5
# http://bugzilla.adiscon.com/show_bug.cgi?id=186
5
# http://bugzilla.adiscon.com/show_bug.cgi?id=186
6
#
6
#
7
# IMPORTANT: we do NOT check any result message set. The whole point in
8
# this test is to verify that we do NOT run into an eternal loop. As such,
9
# the test is "PASS", if rsyslogd terminates. If it does not terminate, we
10
# obviously do not cause "FAIL", but processing will hang, which should be
11
# a good-enough indication of failure.
12
#
7
# added 2010-04-26 by Rgerhards
13
# added 2010-04-26 by Rgerhards
8
# This file is part of the rsyslog project, released  under GPLv3
14
# This file is part of the rsyslog project, released  under GPLv3
9
echo ===============================================================================
15
echo ===============================================================================
Lines 18-22 source $srcdir/diag.sh startup pipe_noreader.conf Link Here
18
source $srcdir/diag.sh tcpflood -m1000 -d500
24
source $srcdir/diag.sh tcpflood -m1000 -d500
19
source $srcdir/diag.sh shutdown-when-empty # shut down rsyslogd when done processing messages
25
source $srcdir/diag.sh shutdown-when-empty # shut down rsyslogd when done processing messages
20
source $srcdir/diag.sh wait-shutdown       # and wait for it to terminate
26
source $srcdir/diag.sh wait-shutdown       # and wait for it to terminate
21
source $srcdir/diag.sh seq-check 0 999
27
# NO need to check seqno -- see header comment
28
echo we did not loop, so the test is sucessfull
22
source $srcdir/diag.sh exit
29
source $srcdir/diag.sh exit

Return to bug 597293