diff --git a/postfix/HISTORY b/postfix/HISTORY index 59019d5b2..9cb1b12b5 100644 --- a/postfix/HISTORY +++ b/postfix/HISTORY @@ -13813,3 +13813,29 @@ Apologies for any names omitted. Bugfix (introduced: 20071004) missing exception handling in smtp-sink per-command delay feature. Victor Duchovni. File: smtpstone/smtp-sink.c. + +2007117-20 + + Revised queue manager with separate mechanisms for + per-destination concurrency control and dead destination + detection. The concurrency control supports non-integer + feedback for more gradual concurrency adjustments, and uses + hysteresis to avoid rapid oscillations. A destination is + declared "dead" after a configurable number of pseudo-cohorts + (number of deliveries equal to a destination's concurrency) + reports connection or handshake failure. This work began + with a discussion that Wietse started with Patrik Rak and + Victor Duchovni late January 2004, and that Victor revived + late October 2007. To establish a baseline for further + improvement, Wietse implemented a few simple mechanisms. + + Configuration parameters: qmgr_concurrency_feedback_debug, + qmgr_negative_concurrency_feedback_hysteresis, + qmgr_negative_concurrency_feedback_style, + qmgr_positive_concurrency_feedback_hysteresis, + qmgr_positive_concurrency_feedback_style, qmgr_sacrifice_cohorts. + See postconf(5) for detailed information. Right now, the + defaults are compatible with older Postfix versions. After + further review the number of parameters will be consolidated + and the defaults will select the better algorithms. Files: + qmgr/qmgr_queue.c, qmgr/qmgr_deliver.c. diff --git a/postfix/RELEASE_NOTES b/postfix/RELEASE_NOTES index 62f58225d..c9dff4ce1 100644 --- a/postfix/RELEASE_NOTES +++ b/postfix/RELEASE_NOTES @@ -17,7 +17,46 @@ Incompatibility with Postfix 2.3 and earlier If you upgrade from Postfix 2.3 or earlier, read RELEASE_NOTES-2.4 before proceeding. -Major changes with Postfix snapshot 20071110 +Major changes with Postfix snapshot 20071121 +============================================ + +Revised queue manager with separate mechanisms for per-destination +concurrency control and for dead destination detection. The +concurrency control supports non-integer feedback to allow for more +gradual concurrency adjustments, and uses hysteresis to avoid rapid +oscillations. A destination is declared "dead" after a configurable +number of pseudo-cohorts(*) reports connection or handshake failure. + +(*) A pseudo-cohort is a number of delivery requests equal to a + destination's delivery concurrency. + +The drawbacks of the old +/-1 feedback scheduler are a) overshoot +due to exponential delivery concurrency growth with each pseudo-cohort(*) +(5-10-20...); b) throttling down to zero concurrency after a single +pseudo-cohort(*) failure. The second problem was especially an issue +with low-concurrency channels where a single failure could be +sufficient to mark a destination as "dead", and suspend further +deliveries. + +The new code is a laboratory model with a multitude of configuration +parameters, so that developers can experiment with different feedback +functions and hysteresis values. This is a baseline against which +further improvements will be measured: a) is the additional improvement +worth the additional complexity; b) is the design sound, i.e. free +from arbitrary constants and other tweaks that optimize for a narrow +range of application. + +New main.cf parameters: qmgr_concurrency_feedback_debug, +qmgr_negative_feedback_hysteresis, qmgr_negative_feedback_method, +qmgr_positive_feedback_hysteresis, qmgr_positive_feedback_method, +qmgr_sacrifice_cohorts. See postconf(5) for extensive descriptions. + +The default parameter settings are backwards compatible with older +Postfix versions. However, after a testing period, the number of +parameters will be consolidated, and the default settings will be +changed to take advantage of the "better" algorithm. + +Major changes with Postfix snapshot 20071111 ============================================ Header/body checks are now available in the SMTP client, after the diff --git a/postfix/html/postconf.5.html b/postfix/html/postconf.5.html index 5cef8f2da..2d9054f51 100644 --- a/postfix/html/postconf.5.html +++ b/postfix/html/postconf.5.html @@ -5915,6 +5915,18 @@ This feature is available in Postfix 2.0 and later.
+ + +Make the queue manager's feedback algorithm verbose for performance +analysis purposes.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change.
+ +The per-destination integer amount of negative concurrency +feedback that must accumulate between negative adjustments of a +destination's delivery concurrency. The concurrency adjustment is +equal in size to the negative hysteresis value, and is applied at +the beginning of a cycle of (hysteresis / feedback) steps. +At that same time, the destination's positive feedback hysteresis +cycle is reset to its beginning.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ + +The per-destination amount of negative delivery concurrency +feedback, after a delivery completes with a connection or handshake +failure.
+ +A pseudo-cohort is a number of deliveries equal to the destination's +delivery concurrency.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ + +The per-destination integer amount of positive concurrency +feedback that must accumulate before positive adjustments of a +destination's delivery concurrency. The concurrency adjustment is +equal in size to the positive hysteresis value, and is applied at +the end of a cycle of (hysteresis / feedback) steps. At that +same time, the destination's negative feedback hysteresis cycle is +reset to its beginning.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ + +The per-destination amount of positive delivery concurrency +feedback, after a delivery completes without connection or handshake +failure.
+ +A pseudo-cohort is a number of deliveries equal to the destination's +delivery concurrency.
+ +This feature is temporarily available in Postfix 2.5. The default +setting is compatible with earlier Postfix versions.
+ + +How many pseudo-cohorts must suffer connection or handshake +failure before a specific destination is considered unavailable +(and further delivery is suspended). A pseudo-cohort is a number +of deliveries equal to a destination's concurrency. The pseudo-cohort +failure count is reset each time a delivery completes without +connection or handshake failure for that specific destination.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ +This feature is available in Postfix 2.5 and later.
+ +%PARAM qmgr_concurrency_feedback_debug no + +Make the queue manager's feedback algorithm verbose for performance +analysis purposes.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change.
+ +%PARAM qmgr_sacrificial_cohorts 1 + +How many pseudo-cohorts must suffer connection or handshake +failure before a specific destination is considered unavailable +(and further delivery is suspended). A pseudo-cohort is a number +of deliveries equal to a destination's concurrency. The pseudo-cohort +failure count is reset each time a delivery completes without +connection or handshake failure for that specific destination.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ +%PARAM qmgr_negative_concurrency_feedback_hysteresis 1 + +The per-destination integer amount of negative concurrency +feedback that must accumulate between negative adjustments of a +destination's delivery concurrency. The concurrency adjustment is +equal in size to the negative hysteresis value, and is applied at +the beginning of a cycle of (hysteresis / feedback) steps. +At that same time, the destination's positive feedback hysteresis +cycle is reset to its beginning.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ +%PARAM qmgr_positive_concurrency_feedback_hysteresis 1 + +The per-destination integer amount of positive concurrency +feedback that must accumulate before positive adjustments of a +destination's delivery concurrency. The concurrency adjustment is +equal in size to the positive hysteresis value, and is applied at +the end of a cycle of (hysteresis / feedback) steps. At that +same time, the destination's negative feedback hysteresis cycle is +reset to its beginning.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ +%PARAM qmgr_negative_concurrency_feedback_style fixed_1 + +The per-destination amount of negative delivery concurrency +feedback, after a delivery completes with a connection or handshake +failure.
+ +A pseudo-cohort is a number of deliveries equal to the destination's +delivery concurrency.
+ +This feature is temporarily available in Postfix 2.5; its final +form is likely to change. The default setting is compatible with +earlier Postfix versions.
+ +%PARAM qmgr_positive_concurrency_feedback_style fixed_1 + +The per-destination amount of positive delivery concurrency +feedback, after a delivery completes without connection or handshake +failure.
+ +A pseudo-cohort is a number of deliveries equal to the destination's +delivery concurrency.
+ +This feature is temporarily available in Postfix 2.5. The default +setting is compatible with earlier Postfix versions.
diff --git a/postfix/src/global/mail_params.h b/postfix/src/global/mail_params.h index 7b9110107..35b4a5558 100644 --- a/postfix/src/global/mail_params.h +++ b/postfix/src/global/mail_params.h @@ -2830,6 +2830,39 @@ extern char *var_smtp_body_chks; #define VAR_LMTP_BODY_CHKS "lmtp_body_checks" #define DEF_LMTP_BODY_CHKS "" + /* + * Scheduler concurrency feedback algorithms. + */ +#define VAR_QMGR_POS_FDBACK "qmgr_positive_concurrency_feedback_style" +#define DEF_QMGR_POS_FDBACK QMGR_FDBACK_NAME_FIXED_1 +extern char *var_qmgr_pos_feedback; + +#define VAR_QMGR_NEG_FDBACK "qmgr_negative_concurrency_feedback_style" +#define DEF_QMGR_NEG_FDBACK QMGR_FDBACK_NAME_FIXED_1 +extern char *var_qmgr_neg_feedback; + +#define QMGR_FDBACK_NAME_FIXED_1 "fixed_1" +#define QMGR_FDBACK_NAME_INVERSE_1 "inverse_1" /* deprecated */ +#define QMGR_FDBACK_NAME_INVERSE_WIN "inverse_concurrency" +#define QMGR_FDBACK_NAME_INV_SQRT "inverse_sqrt" /* deprecated */ +#define QMGR_FDBACK_NAME_INV_SQRT_WIN "inverse_sqrt_concurrency" + +#define VAR_QMGR_POS_HYST "qmgr_positive_concurrency_feedback_hysteresis" +#define DEF_QMGR_POS_HYST 1 +extern int var_qmgr_pos_hysteresis; + +#define VAR_QMGR_NEG_HYST "qmgr_negative_concurrency_feedback_hysteresis" +#define DEF_QMGR_NEG_HYST 1 +extern int var_qmgr_neg_hysteresis; + +#define VAR_QMGR_SAC_COHORTS "qmgr_sacrificial_cohorts" +#define DEF_QMGR_SAC_COHORTS 1 +extern int var_qmgr_sac_cohorts; + +#define VAR_QMGR_FDBACK_DEBUG "qmgr_concurrency_feedback_debug" +#define DEF_QMGR_FDBACK_DEBUG 0 +extern bool var_qmgr_feedback_debug; + /* LICENSE /* .ad /* .fi diff --git a/postfix/src/global/mail_version.h b/postfix/src/global/mail_version.h index b4d627ed7..51f2f6868 100644 --- a/postfix/src/global/mail_version.h +++ b/postfix/src/global/mail_version.h @@ -20,7 +20,7 @@ * Patches change both the patchlevel and the release date. Snapshots have no * patchlevel; they change the release date only. */ -#define MAIL_RELEASE_DATE "20071111" +#define MAIL_RELEASE_DATE "20071121" #define MAIL_VERSION_NUMBER "2.5" #ifdef SNAPSHOT diff --git a/postfix/src/qmgr/Makefile.in b/postfix/src/qmgr/Makefile.in index d7db1a073..ea62bdecc 100644 --- a/postfix/src/qmgr/Makefile.in +++ b/postfix/src/qmgr/Makefile.in @@ -14,7 +14,7 @@ CFLAGS = $(DEBUG) $(OPT) $(DEFS) TESTPROG= PROG = qmgr INC_DIR = ../../include -LIBS = ../../lib/libmaster.a ../../lib/libglobal.a ../../lib/libutil.a +LIBS = ../../lib/libmaster.a ../../lib/libglobal.a ../../lib/libutil.a -lm .c.o:; $(CC) $(CFLAGS) -c $*.c @@ -290,6 +290,7 @@ qmgr_queue.o: ../../include/htable.h qmgr_queue.o: ../../include/mail_params.h qmgr_queue.o: ../../include/msg.h qmgr_queue.o: ../../include/mymalloc.h +qmgr_queue.o: ../../include/name_code.h qmgr_queue.o: ../../include/recipient_list.h qmgr_queue.o: ../../include/scan_dir.h qmgr_queue.o: ../../include/sys_defs.h diff --git a/postfix/src/qmgr/qmgr.c b/postfix/src/qmgr/qmgr.c index 4cc999671..c932d1c2a 100644 --- a/postfix/src/qmgr/qmgr.c +++ b/postfix/src/qmgr/qmgr.c @@ -205,6 +205,29 @@ /* destination. /* .IP "\fItransport\fB_destination_concurrency_limit ($default_destination_concurrency_limit)\fR" /* Idem, for delivery via the named message \fItransport\fR. +/* .IP "\fBqmgr_concurrency_feedback_debug (no)\fR" +/* Make the queue manager's feedback algorithm verbose for performance +/* analysis purposes. +/* .IP "\fBqmgr_negative_concurrency_feedback_hysteresis (1)\fR" +/* The per-destination integer amount of negative concurrency +/* feedback that must accumulate between negative adjustments of a +/* destination's delivery concurrency. +/* .IP "\fBqmgr_negative_concurrency_feedback_style (fixed_1)\fR" +/* The per-destination amount of negative delivery concurrency +/* feedback, after a delivery completes with a connection or handshake +/* failure. +/* .IP "\fBqmgr_positive_concurrency_feedback_hysteresis (1)\fR" +/* The per-destination integer amount of positive concurrency +/* feedback that must accumulate before positive adjustments of a +/* destination's delivery concurrency. +/* .IP "\fBqmgr_positive_concurrency_feedback_style (fixed_1)\fR" +/* The per-destination amount of positive delivery concurrency +/* feedback, after a delivery completes without connection or handshake +/* failure. +/* .IP "\fBqmgr_sacrificial_cohorts (1)\fR" +/* How many pseudo-cohorts must suffer connection or handshake +/* failure before a specific destination is considered unavailable +/* (and further delivery is suspended). /* RECIPIENT SCHEDULING CONTROLS /* .ad /* .fi @@ -238,15 +261,17 @@ /* OTHER RESOURCE AND RATE CONTROLS /* .ad /* .fi -/* .IP "\fBminimal_backoff_time (version dependent)\fR" -/* The minimal time between attempts to deliver a deferred message. +/* .IP "\fBminimal_backoff_time (300s)\fR" +/* The minimal time between attempts to deliver a deferred message; +/* prior to Postfix 2.4 the default value was 1000s. /* .IP "\fBmaximal_backoff_time (4000s)\fR" /* The maximal time between attempts to deliver a deferred message. /* .IP "\fBmaximal_queue_lifetime (5d)\fR" /* The maximal time a message is queued before it is sent back as /* undeliverable. -/* .IP "\fBqueue_run_delay (version dependent)\fR" -/* The time between deferred queue scans by the queue manager. +/* .IP "\fBqueue_run_delay (300s)\fR" +/* The time between deferred queue scans by the queue manager; +/* prior to Postfix 2.4 the default value was 1000s. /* .IP "\fBtransport_retry_time (60s)\fR" /* The time between attempts by the Postfix queue manager to contact /* a malfunctioning message delivery transport. @@ -390,6 +415,12 @@ int var_local_rcpt_lim; int var_proc_limit; bool var_verp_bounce_off; int var_qmgr_clog_warn_time; +char *var_qmgr_pos_feedback; +char *var_qmgr_neg_feedback; +int var_qmgr_pos_hysteresis; +int var_qmgr_neg_hysteresis; +int var_qmgr_sac_cohorts; +int var_qmgr_feedback_debug; static QMGR_SCAN *qmgr_scans[2]; @@ -614,6 +645,11 @@ static void qmgr_post_init(char *name, char **unused_argv) qmgr_scans[QMGR_SCAN_IDX_DEFERRED] = qmgr_scan_create(MAIL_QUEUE_DEFERRED); qmgr_scan_request(qmgr_scans[QMGR_SCAN_IDX_INCOMING], QMGR_SCAN_START); qmgr_deferred_run_event(0, (char *) 0); + + /* + * Scheduler initialization. + */ + qmgr_queue_feedback_init(); } MAIL_VERSION_STAMP_DECLARE; @@ -624,6 +660,8 @@ int main(int argc, char **argv) { static CONFIG_STR_TABLE str_table[] = { VAR_DEFER_XPORTS, DEF_DEFER_XPORTS, &var_defer_xports, 0, 0, + VAR_QMGR_POS_FDBACK, DEF_QMGR_POS_FDBACK, &var_qmgr_pos_feedback, 1, 0, + VAR_QMGR_NEG_FDBACK, DEF_QMGR_NEG_FDBACK, &var_qmgr_neg_feedback, 1, 0, 0, }; static CONFIG_TIME_TABLE time_table[] = { @@ -654,11 +692,15 @@ int main(int argc, char **argv) VAR_LOCAL_RCPT_LIMIT, DEF_LOCAL_RCPT_LIMIT, &var_local_rcpt_lim, 0, 0, VAR_LOCAL_CON_LIMIT, DEF_LOCAL_CON_LIMIT, &var_local_con_lim, 0, 0, VAR_PROC_LIMIT, DEF_PROC_LIMIT, &var_proc_limit, 1, 0, + VAR_QMGR_POS_HYST, DEF_QMGR_POS_HYST, &var_qmgr_pos_hysteresis, 1, 0, + VAR_QMGR_NEG_HYST, DEF_QMGR_NEG_HYST, &var_qmgr_neg_hysteresis, 1, 0, + VAR_QMGR_SAC_COHORTS, DEF_QMGR_SAC_COHORTS, &var_qmgr_sac_cohorts, 1, 0, 0, }; static CONFIG_BOOL_TABLE bool_table[] = { VAR_ALLOW_MIN_USER, DEF_ALLOW_MIN_USER, &var_allow_min_user, VAR_VERP_BOUNCE_OFF, DEF_VERP_BOUNCE_OFF, &var_verp_bounce_off, + VAR_QMGR_FDBACK_DEBUG, DEF_QMGR_FDBACK_DEBUG, &var_qmgr_feedback_debug, 0, }; diff --git a/postfix/src/qmgr/qmgr.h b/postfix/src/qmgr/qmgr.h index 6392e098f..7490e842e 100644 --- a/postfix/src/qmgr/qmgr.h +++ b/postfix/src/qmgr/qmgr.h @@ -198,6 +198,9 @@ struct QMGR_QUEUE { int todo_refcount; /* queue entries (todo list) */ int busy_refcount; /* queue entries (busy list) */ int window; /* slow open algorithm */ + double success; /* cumulative positive feedback */ + double failure; /* cumulative negative feedback */ + double fail_cohorts; /* pseudo-cohort failure count */ QMGR_TRANSPORT *transport; /* transport linkage */ QMGR_ENTRY_LIST todo; /* todo queue entries */ QMGR_ENTRY_LIST busy; /* messages on the wire */ @@ -217,6 +220,7 @@ extern void qmgr_queue_done(QMGR_QUEUE *); extern void qmgr_queue_throttle(QMGR_QUEUE *, DSN *); extern void qmgr_queue_unthrottle(QMGR_QUEUE *); extern QMGR_QUEUE *qmgr_queue_find(QMGR_TRANSPORT *, const char *); +extern void qmgr_queue_feedback_init(void); #define QMGR_QUEUE_THROTTLED(q) ((q)->window <= 0) diff --git a/postfix/src/qmgr/qmgr_deliver.c b/postfix/src/qmgr/qmgr_deliver.c index 346562897..36c2a7b6b 100644 --- a/postfix/src/qmgr/qmgr_deliver.c +++ b/postfix/src/qmgr/qmgr_deliver.c @@ -317,9 +317,11 @@ static void qmgr_deliver_update(int unused_event, char *context) if (VSTRING_LEN(dsb->reason) == 0) vstring_strcpy(dsb->reason, "unknown error"); vstring_prepend(dsb->reason, SUSPENDED, sizeof(SUSPENDED) - 1); - qmgr_queue_throttle(queue, DSN_FROM_DSN_BUF(dsb)); - if (queue->window == 0) - qmgr_defer_todo(queue, &dsb->dsn); + if (queue->window > 0) { + qmgr_queue_throttle(queue, DSN_FROM_DSN_BUF(dsb)); + if (queue->window == 0) + qmgr_defer_todo(queue, &dsb->dsn); + } } } diff --git a/postfix/src/qmgr/qmgr_queue.c b/postfix/src/qmgr/qmgr_queue.c index 414de5836..4d86aea2f 100644 --- a/postfix/src/qmgr/qmgr_queue.c +++ b/postfix/src/qmgr/qmgr_queue.c @@ -50,8 +50,9 @@ /* transport. A null result means that the queue was not found. /* /* qmgr_queue_throttle() handles a delivery error, and decrements the -/* concurrency limit for the destination. When the concurrency limit -/* for a destination becomes zero, qmgr_queue_throttle() starts a timer +/* concurrency limit for the destination, with a lower bound of 1. +/* When the cohort failure bound is reached, qmgr_queue_throttle() +/* sets the concurrency limit to zero and starts a timer /* to re-enable delivery to the destination after a configurable delay. /* /* qmgr_queue_unthrottle() undoes qmgr_queue_throttle()'s effects. @@ -71,7 +72,7 @@ /* P.O. Box 704 /* Yorktown Heights, NY 10598, USA /* -/* Scheduler enhancements: +/* Pre-emptive scheduler enhancements: /* Patrik Rak /* Modra 6 /* 155 00, Prague, Czech Republic @@ -81,6 +82,7 @@ #include