diff --git a/bin/tools/named-journalprint.c b/bin/tools/named-journalprint.c index 20b8740021..0fcfa2d0de 100644 --- a/bin/tools/named-journalprint.c +++ b/bin/tools/named-journalprint.c @@ -13,6 +13,7 @@ #include +#include #include #include #include @@ -23,6 +24,14 @@ #include #include +const char *progname = NULL; + +static void +usage(void) { + fprintf(stderr, "Usage: %s [-x] journal\n", progname); + exit(1); +} + /* * Setup logging to use stderr. */ @@ -57,18 +66,32 @@ main(int argc, char **argv) { isc_mem_t *mctx = NULL; isc_result_t result; isc_log_t *lctx = NULL; + uint32_t flags = 0U; + char ch; - if (argc != 2) { - printf("usage: %s journal\n", argv[0]); - return (1); + progname = argv[0]; + while ((ch = isc_commandline_parse(argc, argv, "x")) != -1) { + switch (ch) { + case 'x': + flags |= DNS_JOURNAL_PRINTXHDR; + break; + default: + usage(); + } } - file = argv[1]; + argc -= isc_commandline_index; + argv += isc_commandline_index; + + if (argc != 1) { + usage(); + } + file = argv[0]; isc_mem_create(&mctx); RUNTIME_CHECK(setup_logging(mctx, stderr, &lctx) == ISC_R_SUCCESS); - result = dns_journal_print(mctx, file, stdout); + result = dns_journal_print(mctx, flags, file, stdout); if (result == DNS_R_NOJOURNAL) { fprintf(stderr, "%s\n", dns_result_totext(result)); } diff --git a/bin/tools/named-journalprint.rst b/bin/tools/named-journalprint.rst index ffa9ac9577..2ce6a05bb4 100644 --- a/bin/tools/named-journalprint.rst +++ b/bin/tools/named-journalprint.rst @@ -29,7 +29,7 @@ named-journalprint - print zone journal in human-readable form Synopsis ~~~~~~~~ -:program:`named-journalprint` {journal} +:program:`named-journalprint` [**-x**] {journal} Description ~~~~~~~~~~~ @@ -50,6 +50,9 @@ into a human-readable text format. Each line begins with ``add`` or ``del``, to indicate whether the record was added or deleted, and continues with the resource record in master-file format. +The ``-x`` option causes additional information about the transaction +header to be printed before each group of changes. + See Also ~~~~~~~~ diff --git a/doc/man/named-journalprint.1in b/doc/man/named-journalprint.1in index 2c86eec8df..84684cb8d6 100644 --- a/doc/man/named-journalprint.1in +++ b/doc/man/named-journalprint.1in @@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .. .SH SYNOPSIS .sp -\fBnamed\-journalprint\fP {journal} +\fBnamed\-journalprint\fP [\fB\-x\fP] {journal} .SH DESCRIPTION .sp \fBnamed\-journalprint\fP prints the contents of a zone journal file in a @@ -50,6 +50,9 @@ file. into a human\-readable text format. Each line begins with \fBadd\fP or \fBdel\fP, to indicate whether the record was added or deleted, and continues with the resource record in master\-file format. +.sp +The \fB\-x\fP option causes additional information about the transaction +header to be printed before each group of changes. .SH SEE ALSO .sp \fBnamed(8)\fP, \fBnsupdate(1)\fP, BIND 9 Administrator Reference Manual. diff --git a/lib/dns/include/dns/events.h b/lib/dns/include/dns/events.h index 50a8d01ac6..894c4dd211 100644 --- a/lib/dns/include/dns/events.h +++ b/lib/dns/include/dns/events.h @@ -79,6 +79,7 @@ #define DNS_EVENT_RPZUPDATED (ISC_EVENTCLASS_DNS + 57) #define DNS_EVENT_STARTUPDATE (ISC_EVENTCLASS_DNS + 58) #define DNS_EVENT_TRYSTALE (ISC_EVENTCLASS_DNS + 59) +#define DNS_EVENT_ZONEFLUSH (ISC_EVENTCLASS_DNS + 60) #define DNS_EVENT_FIRSTEVENT (ISC_EVENTCLASS_DNS + 0) #define DNS_EVENT_LASTEVENT (ISC_EVENTCLASS_DNS + 65535) diff --git a/lib/dns/include/dns/journal.h b/lib/dns/include/dns/journal.h index 8acf9ab44e..4d3b73efb5 100644 --- a/lib/dns/include/dns/journal.h +++ b/lib/dns/include/dns/journal.h @@ -48,6 +48,12 @@ #define DNS_JOURNAL_SIZE_MAX INT32_MAX #define DNS_JOURNAL_SIZE_MIN 4096 +/*% Print transaction header data */ +#define DNS_JOURNAL_PRINTXHDR 0x0001 + +/*% Rewrite whole journal file instead of compacting */ +#define DNS_JOURNAL_COMPACTALL 0x0001 + /*** *** Types ***/ @@ -258,12 +264,18 @@ dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, unsigned int options, *\li DNS_R_NOJOURNAL when journal does not exist. *\li ISC_R_NOTFOUND when current serial in not in journal. *\li ISC_R_RANGE when current serial in not in journals range. - *\li ISC_R_SUCCESS journal has been applied successfully to database. + *\li DNS_R_UPTODATE when the database was already up to date. + *\li ISC_R_SUCCESS journal has been applied successfully to the + * database without any issues. + *\li DNS_R_RECOVERABLE if successful or up to date, but the journal + * was found to contain at least one outdated transaction header. + * * others */ isc_result_t -dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file); +dns_journal_print(isc_mem_t *mctx, uint32_t flags, const char *filename, + FILE *file); /* For debugging not general use */ isc_result_t @@ -286,7 +298,7 @@ dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera, isc_result_t dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, - uint32_t target_size); + uint32_t flags, uint32_t target_size); /*%< * Attempt to compact the journal if it is greater that 'target_size'. * Changes from 'serial' onwards will be preserved. If the journal diff --git a/lib/dns/journal.c b/lib/dns/journal.c index bc1ad96314..5007f0c9ef 100644 --- a/lib/dns/journal.c +++ b/lib/dns/journal.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -193,6 +194,11 @@ typedef struct { */ #define JOURNAL_HEADER_SIZE 64 /* Bytes. */ +typedef enum { + XHDR_VERSION1 = 1, + XHDR_VERSION2 = 2, +} xhdr_version_t; + /*% * The on-disk representation of the journal header. * All numbers are stored in big-endian order. @@ -216,7 +222,7 @@ typedef union { } journal_rawheader_t; /*% - * The on-disk representation of the transaction header. + * The on-disk representation of the transaction header, version 2. * There is one of these at the beginning of each transaction. */ typedef struct { @@ -226,6 +232,16 @@ typedef struct { unsigned char serial1[4]; /*%< SOA serial after update. */ } journal_rawxhdr_t; +/*% + * Old-style raw transaction header, version 1, used for backward + * compatibility mode. + */ +typedef struct { + unsigned char size[4]; + unsigned char serial0[4]; + unsigned char serial1[4]; +} journal_rawxhdr_ver1_t; + /*% * The on-disk representation of the RR header. * There is one of these at the beginning of each RR. @@ -275,16 +291,19 @@ typedef struct { * Initial contents to store in the header of a newly created * journal file. * - * The header starts with the magic string ";BIND LOG V9\n" + * The header starts with the magic string ";BIND LOG V9.2\n" * to identify the file as a BIND 9 journal file. An ASCII * identification string is used rather than a binary magic * number to be consistent with BIND 8 (BIND 8 journal files * are ASCII text files). */ -static journal_header_t initial_journal_header = { +static journal_header_t journal_header_ver1 = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0 }; +static journal_header_t initial_journal_header = { + ";BIND LOG V9.2\n", { 0, 0 }, { 0, 0 }, 0, 0, 0 +}; #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset) @@ -300,13 +319,19 @@ struct dns_journal { unsigned int magic; /*%< JOUR */ isc_mem_t *mctx; /*%< Memory context */ journal_state_t state; - char *filename; /*%< Journal file name */ - FILE *fp; /*%< File handle */ - isc_offset_t offset; /*%< Current file offset */ - journal_header_t header; /*%< In-core journal header */ - unsigned char *rawindex; /*%< In-core buffer for journal index - * in on-disk format */ - journal_pos_t *index; /*%< In-core journal index */ + xhdr_version_t xhdr_version; /*%< Expected transaction header version */ + bool header_ver1; /*%< Transaction header compatibility + * mode is allowed */ + bool recovered; /*%< A recoverable error was found + * while reading the journal */ + char *filename; /*%< Journal file name */ + FILE *fp; /*%< File handle */ + isc_offset_t offset; /*%< Current file offset */ + journal_xhdr_t curxhdr; /*%< Current transaction header */ + journal_header_t header; /*%< In-core journal header */ + unsigned char *rawindex; /*%< In-core buffer for journal index + * in on-disk format */ + journal_pos_t *index; /*%< In-core journal index */ /*% Current transaction state (when writing). */ struct { @@ -321,8 +346,7 @@ struct dns_journal { journal_pos_t bpos; /*%< Position before first, */ journal_pos_t epos; /*%< and after last transaction */ /* The rest is iterator state. */ - uint32_t current_serial; /*%< Current SOA serial - * */ + uint32_t current_serial; /*%< Current SOA serial */ isc_buffer_t source; /*%< Data from disk */ isc_buffer_t target; /*%< Data from _fromwire check */ dns_decompress_t dctx; /*%< Dummy decompression ctx */ @@ -353,6 +377,7 @@ journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) { static void journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) { INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); + memmove(cooked->format, raw->h.format, sizeof(cooked->format)); journal_pos_decode(&raw->h.begin, &cooked->begin); journal_pos_decode(&raw->h.end, &cooked->end); @@ -366,6 +391,7 @@ journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) { unsigned char flags = 0; INSIST(sizeof(cooked->format) == sizeof(raw->h.format)); + memset(raw->pad, 0, sizeof(raw->pad)); memmove(raw->h.format, cooked->format, sizeof(raw->h.format)); journal_pos_encode(&raw->h.begin, &cooked->begin); @@ -432,6 +458,7 @@ journal_write(dns_journal_t *j, void *mem, size_t nbytes) { static isc_result_t journal_fsync(dns_journal_t *j) { isc_result_t result; + result = isc_stdio_flush(j->fp); if (result != ISC_R_SUCCESS) { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, @@ -452,26 +479,49 @@ journal_fsync(dns_journal_t *j) { /* * Read/write a transaction header at the current file position. */ - static isc_result_t journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) { - journal_rawxhdr_t raw; isc_result_t result; - result = journal_read(j, &raw, sizeof(raw)); - if (result != ISC_R_SUCCESS) { - return (result); + + switch (j->xhdr_version) { + case XHDR_VERSION1: { + journal_rawxhdr_ver1_t raw; + result = journal_read(j, &raw, sizeof(raw)); + if (result != ISC_R_SUCCESS) { + return (result); + } + xhdr->size = decode_uint32(raw.size); + xhdr->count = 0; + xhdr->serial0 = decode_uint32(raw.serial0); + xhdr->serial1 = decode_uint32(raw.serial1); + j->curxhdr = *xhdr; + return (ISC_R_SUCCESS); + } + + case XHDR_VERSION2: { + journal_rawxhdr_t raw; + result = journal_read(j, &raw, sizeof(raw)); + if (result != ISC_R_SUCCESS) { + return (result); + } + xhdr->size = decode_uint32(raw.size); + xhdr->count = decode_uint32(raw.count); + xhdr->serial0 = decode_uint32(raw.serial0); + xhdr->serial1 = decode_uint32(raw.serial1); + j->curxhdr = *xhdr; + return (ISC_R_SUCCESS); + } + + default: + return (ISC_R_NOTIMPLEMENTED); } - xhdr->size = decode_uint32(raw.size); - xhdr->count = decode_uint32(raw.count); - xhdr->serial0 = decode_uint32(raw.serial0); - xhdr->serial1 = decode_uint32(raw.serial1); - return (ISC_R_SUCCESS); } static isc_result_t journal_write_xhdr(dns_journal_t *j, uint32_t size, uint32_t count, uint32_t serial0, uint32_t serial1) { journal_rawxhdr_t raw; + encode_uint32(size, raw.size); encode_uint32(count, raw.count); encode_uint32(serial0, raw.serial0); @@ -487,6 +537,7 @@ static isc_result_t journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) { journal_rawrrhdr_t raw; isc_result_t result; + result = journal_read(j, &raw, sizeof(raw)); if (result != ISC_R_SUCCESS) { return (result); @@ -503,7 +554,7 @@ journal_file_create(isc_mem_t *mctx, const char *filename) { journal_rawheader_t rawheader; int index_size = 56; /* XXX configurable */ int size; - void *mem; /* Memory for temporary index image. */ + void *mem = NULL; /* Memory for temporary index image. */ INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE); @@ -558,23 +609,15 @@ journal_open(isc_mem_t *mctx, const char *filename, bool writable, bool create, journal_rawheader_t rawheader; dns_journal_t *j; - INSIST(journalp != NULL && *journalp == NULL); + REQUIRE(journalp != NULL && *journalp == NULL); + j = isc_mem_get(mctx, sizeof(*j)); - - j->mctx = NULL; + *j = (dns_journal_t){ .state = JOURNAL_STATE_INVALID, + .filename = isc_mem_strdup(mctx, filename), + .xhdr_version = XHDR_VERSION2 }; isc_mem_attach(mctx, &j->mctx); - j->state = JOURNAL_STATE_INVALID; - j->fp = NULL; - j->filename = isc_mem_strdup(mctx, filename); - j->index = NULL; - j->rawindex = NULL; - - if (j->filename == NULL) { - FAIL(ISC_R_NOMEMORY); - } result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp); - if (result == ISC_R_FILENOTFOUND) { if (create) { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1), @@ -607,9 +650,29 @@ journal_open(isc_mem_t *mctx, const char *filename, bool writable, bool create, CHECK(journal_seek(j, 0)); CHECK(journal_read(j, &rawheader, sizeof(rawheader))); - if (memcmp(rawheader.h.format, initial_journal_header.format, - sizeof(initial_journal_header.format)) != 0) + if (memcmp(rawheader.h.format, journal_header_ver1.format, + sizeof(journal_header_ver1.format)) == 0) { + /* + * The file header says it's the old format, but it + * still might have the new xhdr format because we + * forgot to change the format string when we introduced + * the new xhdr. When we first try to read it, we assume + * it uses the new xhdr format. If that fails, we'll be + * called a second time with compat set to true, in which + * case we can lower xhdr_version to 1 if we find a + * corrupt transaction. + */ + j->header_ver1 = true; + } else if (memcmp(rawheader.h.format, initial_journal_header.format, + sizeof(initial_journal_header.format)) == 0) + { + /* + * File header says this is format version 2; all + * transactions have to match. + */ + j->header_ver1 = false; + } else { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, "%s: journal format not recognized", j->filename); FAIL(ISC_R_UNEXPECTED); @@ -795,9 +858,11 @@ ixfr_order(const void *av, const void *bv) { * Other results due to file errors are possible. */ static isc_result_t -journal_next(dns_journal_t *j, journal_pos_t *pos) { +journal_next(dns_journal_t *j, journal_pos_t *pos, bool retry) { isc_result_t result; journal_xhdr_t xhdr; + size_t hdrsize; + REQUIRE(DNS_JOURNAL_VALID(j)); result = journal_seek(j, pos->offset); @@ -808,6 +873,7 @@ journal_next(dns_journal_t *j, journal_pos_t *pos) { if (pos->serial == j->header.end.serial) { return (ISC_R_NOMORE); } + /* * Read the header of the current transaction. * This will return ISC_R_NOMORE if we are at EOF. @@ -820,26 +886,59 @@ journal_next(dns_journal_t *j, journal_pos_t *pos) { /* * Check serial number consistency. */ - if (xhdr.serial0 != pos->serial) { - isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, - "%s: journal file corrupt: " - "expected serial %u, got %u", - j->filename, pos->serial, xhdr.serial0); - return (ISC_R_UNEXPECTED); + if (xhdr.serial0 != pos->serial || + isc_serial_le(xhdr.serial1, xhdr.serial0)) { + if (j->header_ver1 && j->xhdr_version == XHDR_VERSION1 && + xhdr.serial1 == pos->serial && !retry) + { + /* XHDR_VERSION1 -> XHDR_VERSION2 */ + isc_log_write( + JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3), + "%s: XHDR_VERSION1 -> XHDR_VERSION2 at %u\n", + j->filename, pos->serial); + j->xhdr_version = XHDR_VERSION2; + result = journal_next(j, pos, true); + if (result == ISC_R_SUCCESS) { + j->recovered = true; + } + return (result); + } else if (j->header_ver1 && j->xhdr_version == XHDR_VERSION2 && + xhdr.count == pos->serial && !retry) + { + /* XHDR_VERSION2 -> XHDR_VERSION1 */ + isc_log_write( + JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(3), + "%s: XHDR_VERSION2 -> XHDR_VERSION1 at %u\n", + j->filename, pos->serial); + j->xhdr_version = XHDR_VERSION1; + result = journal_next(j, pos, true); + if (result == ISC_R_SUCCESS) { + j->recovered = true; + } + return (result); + } else { + isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, + "%s: journal file corrupt: " + "expected serial %u, got %u", + j->filename, pos->serial, xhdr.serial0); + return (ISC_R_UNEXPECTED); + } } /* * Check for offset wraparound. */ - if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + - xhdr.size) < pos->offset) - { + hdrsize = (j->xhdr_version == XHDR_VERSION2) + ? sizeof(journal_rawxhdr_t) + : sizeof(journal_rawxhdr_ver1_t); + + if ((isc_offset_t)(pos->offset + hdrsize + xhdr.size) < pos->offset) { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, "%s: offset too large", j->filename); return (ISC_R_UNEXPECTED); } - pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size; + pos->offset += hdrsize + xhdr.size; pos->serial = xhdr.serial1; return (ISC_R_SUCCESS); } @@ -879,9 +978,11 @@ index_find(dns_journal_t *j, uint32_t serial, journal_pos_t *best_guess) { static void index_add(dns_journal_t *j, journal_pos_t *pos) { unsigned int i; + if (j->index == NULL) { return; } + /* * Search for a vacant position. */ @@ -953,6 +1054,7 @@ static isc_result_t journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) { isc_result_t result; journal_pos_t current_pos; + REQUIRE(DNS_JOURNAL_VALID(j)); if (DNS_SERIAL_GT(j->header.begin.serial, serial)) { @@ -973,7 +1075,7 @@ journal_find(dns_journal_t *j, uint32_t serial, journal_pos_t *pos) { if (DNS_SERIAL_GT(current_pos.serial, serial)) { return (ISC_R_NOTFOUND); } - result = journal_next(j, ¤t_pos); + result = journal_next(j, ¤t_pos, false); if (result != ISC_R_SUCCESS) { return (result); } @@ -1187,7 +1289,7 @@ dns_journal_commit(dns_journal_t *j) { if (!JOURNAL_EMPTY(&j->header)) { while (!DNS_SERIAL_GT(j->x.pos[1].serial, j->header.begin.serial)) { - CHECK(journal_next(j, &j->header.begin)); + CHECK(journal_next(j, &j->header.begin, false)); } index_invalidate(j, j->x.pos[1].serial); } @@ -1256,6 +1358,7 @@ failure: isc_result_t dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) { isc_result_t result; + CHECK(dns_diff_sort(diff, ixfr_order)); CHECK(dns_journal_begin_transaction(j)); CHECK(dns_journal_writediff(j, diff)); @@ -1267,9 +1370,13 @@ failure: void dns_journal_destroy(dns_journal_t **journalp) { - dns_journal_t *j = *journalp; + dns_journal_t *j = NULL; + + REQUIRE(journalp != NULL); + REQUIRE(DNS_JOURNAL_VALID(*journalp)); + + j = *journalp; *journalp = NULL; - REQUIRE(DNS_JOURNAL_VALID(j)); j->it.result = ISC_R_FAILURE; dns_name_invalidate(&j->it.name); @@ -1346,33 +1453,38 @@ roll_forward(dns_journal_t *j, dns_db_t *db, unsigned int options) { * Locate a journal entry for the current database serial. */ CHECK(journal_find(j, db_serial, &pos)); - /* - * XXX do more drastic things, like marking zone stale, - * if this fails? - */ - /* - * XXXRTH The zone code should probably mark the zone as bad and - * scream loudly into the log if this is a dynamic update - * log reply that failed. - */ end_serial = dns_journal_last_serial(j); + + /* + * If we're reading a version 1 file, scan all the transactions + * to see if the journal needs rewriting: if any outdated + * transaction headers are found, j->recovered will be set. + */ + if (j->header_ver1) { + uint32_t start_serial = dns_journal_first_serial(j); + + CHECK(dns_journal_iter_init(j, start_serial, db_serial, NULL)); + for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS; + result = dns_journal_next_rr(j)) + { + continue; + } + } + if (db_serial == end_serial) { CHECK(DNS_R_UPTODATE); } CHECK(dns_journal_iter_init(j, db_serial, end_serial, NULL)); - for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS; result = dns_journal_next_rr(j)) { - dns_name_t *name; - uint32_t ttl; - dns_rdata_t *rdata; + dns_name_t *name = NULL; + dns_rdata_t *rdata = NULL; dns_difftuple_t *tuple = NULL; + uint32_t ttl; - name = NULL; - rdata = NULL; dns_journal_current_rr(j, &name, &ttl, &rdata); if (rdata->type == dns_rdatatype_soa) { @@ -1450,13 +1562,12 @@ failure: isc_result_t dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, unsigned int options, const char *filename) { - dns_journal_t *j; + dns_journal_t *j = NULL; isc_result_t result; REQUIRE(DNS_DB_VALID(db)); REQUIRE(filename != NULL); - j = NULL; result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j); if (result == ISC_R_NOTFOUND) { isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file, but " @@ -1466,20 +1577,26 @@ dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, unsigned int options, if (result != ISC_R_SUCCESS) { return (result); } + if (JOURNAL_EMPTY(&j->header)) { - result = DNS_R_UPTODATE; - } else { - result = roll_forward(j, db, options); + CHECK(DNS_R_UPTODATE); } - dns_journal_destroy(&j); + result = roll_forward(j, db, options); + if ((result == ISC_R_SUCCESS || result == DNS_R_UPTODATE) && + j->recovered) { + result = DNS_R_RECOVERABLE; + } +failure: + dns_journal_destroy(&j); return (result); } isc_result_t -dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { - dns_journal_t *j; +dns_journal_print(isc_mem_t *mctx, uint32_t flags, const char *filename, + FILE *file) { + dns_journal_t *j = NULL; isc_buffer_t source; /* Transaction data from disk */ isc_buffer_t target; /* Ditto after _fromwire check */ uint32_t start_serial; /* Database SOA serial */ @@ -1488,17 +1605,15 @@ dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { dns_diff_t diff; unsigned int n_soa = 0; unsigned int n_put = 0; + bool printxhdr = ((flags & DNS_JOURNAL_PRINTXHDR) != 0); REQUIRE(filename != NULL); - j = NULL; result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j); if (result == ISC_R_NOTFOUND) { isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file"); return (DNS_R_NOJOURNAL); - } - - if (result != ISC_R_SUCCESS) { + } else if (result != ISC_R_SUCCESS) { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, "journal open failure: %s: %s", isc_result_totext(result), filename); @@ -1526,13 +1641,11 @@ dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { for (result = dns_journal_first_rr(j); result == ISC_R_SUCCESS; result = dns_journal_next_rr(j)) { - dns_name_t *name; - uint32_t ttl; - dns_rdata_t *rdata; + dns_name_t *name = NULL; + dns_rdata_t *rdata = NULL; dns_difftuple_t *tuple = NULL; + uint32_t ttl; - name = NULL; - rdata = NULL; dns_journal_current_rr(j, &name, &ttl, &rdata); if (rdata->type == dns_rdatatype_soa) { @@ -1549,12 +1662,21 @@ dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { j->filename); FAIL(ISC_R_UNEXPECTED); } + + if (printxhdr && n_soa == 1) { + fprintf(file, + "Transaction: version %d size %u rrcount %u " + "startserial %u endserial %u\n", + j->xhdr_version, j->curxhdr.size, + j->curxhdr.count, j->curxhdr.serial0, + j->curxhdr.serial1); + } CHECK(dns_difftuple_create( diff.mctx, n_soa == 1 ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD, name, ttl, rdata, &tuple)); dns_diff_append(&diff, &tuple); - if (++n_put > 100) { + if (++n_put != 0 || printxhdr) { result = dns_diff_print(&diff, file); dns_diff_clear(&diff); n_put = 0; @@ -1568,7 +1690,7 @@ dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { } CHECK(result); - if (n_put != 0) { + if (!printxhdr && n_put != 0) { result = dns_diff_print(&diff, file); dns_diff_clear(&diff); } @@ -1648,7 +1770,7 @@ dns_journal_get_sourceserial(dns_journal_t *j, uint32_t *sourceserial) { */ static isc_result_t -read_one_rr(dns_journal_t *j); +read_one_rr(dns_journal_t *j, bool retry); /* * Make sure the buffer 'b' is has at least 'size' bytes @@ -1706,7 +1828,7 @@ dns_journal_iter_init(dns_journal_t *j, uint32_t begin_serial, size += xhdr.size; count += xhdr.count; - result = journal_next(j, &pos); + result = journal_next(j, &pos, false); if (result == ISC_R_NOMORE) { result = ISC_R_SUCCESS; } @@ -1742,22 +1864,22 @@ dns_journal_first_rr(dns_journal_t *j) { j->it.xsize = 0; /* We have no transaction data yet... */ j->it.xpos = 0; /* ...and haven't used any of it. */ - return (read_one_rr(j)); + return (read_one_rr(j, false)); failure: return (result); } static isc_result_t -read_one_rr(dns_journal_t *j) { +read_one_rr(dns_journal_t *j, bool retry) { isc_result_t result; - dns_rdatatype_t rdtype; dns_rdataclass_t rdclass; unsigned int rdlen; uint32_t ttl; journal_xhdr_t xhdr; journal_rrhdr_t rrhdr; + dns_journal_t save = *j; if (j->offset > j->it.epos.offset) { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, @@ -1780,7 +1902,34 @@ read_one_rr(dns_journal_t *j) { j->filename); FAIL(ISC_R_UNEXPECTED); } - if (xhdr.serial0 != j->it.current_serial) { + if (xhdr.serial0 != j->it.current_serial || + isc_serial_le(xhdr.serial1, xhdr.serial0)) + { + if (!retry && j->header_ver1 && + j->xhdr_version == XHDR_VERSION2 && + xhdr.count == j->it.current_serial) + { + /* XHDR_VERSION2 -> XHDR_VERSION1 */ + j->xhdr_version = XHDR_VERSION1; + CHECK(journal_seek(j, save.offset)); + result = read_one_rr(j, true); + if (result == ISC_R_SUCCESS) { + j->recovered = true; + } + return (result); + } else if (!retry && j->header_ver1 && + j->xhdr_version == XHDR_VERSION1 && + xhdr.serial1 == j->it.current_serial) + { + /* XHDR_VERSION1 -> XHDR_VERSION2 */ + j->xhdr_version = XHDR_VERSION2; + CHECK(journal_seek(j, save.offset)); + result = read_one_rr(j, true); + if (result == ISC_R_SUCCESS) { + j->recovered = true; + } + return (result); + } isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, "%s: journal file corrupt: " "expected serial %u, got %u", @@ -1871,7 +2020,7 @@ failure: isc_result_t dns_journal_next_rr(dns_journal_t *j) { - j->it.result = read_one_rr(j); + j->it.result = read_one_rr(j, false); return (j->it.result); } @@ -2200,16 +2349,33 @@ failure: return (result); } +static uint32_t +rrcount(char *buf, unsigned int size) { + isc_buffer_t b; + uint32_t rrsize, count = 0; + + isc_buffer_init(&b, buf, size); + isc_buffer_add(&b, size); + while (isc_buffer_remaininglength(&b) > 0) { + rrsize = isc_buffer_getuint32(&b); + INSIST(isc_buffer_remaininglength(&b) >= rrsize); + isc_buffer_forward(&b, rrsize); + count++; + } + + return (count); +} + isc_result_t dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, - uint32_t target_size) { + uint32_t flags, uint32_t target_size) { unsigned int i; journal_pos_t best_guess; journal_pos_t current_pos; dns_journal_t *j1 = NULL; dns_journal_t *j2 = NULL; journal_rawheader_t rawheader; - unsigned int copy_length; + unsigned int len; size_t namelen; char *buf = NULL; unsigned int size = 0; @@ -2218,6 +2384,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, char newname[PATH_MAX]; char backup[PATH_MAX]; bool is_backup = false; + bool rewrite = false; REQUIRE(filename != NULL); @@ -2243,7 +2410,14 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, return (result); } - if (JOURNAL_EMPTY(&j1->header)) { + /* + * Check whether we need to rewrite the whole journal + * file (for example, to upversion it). + */ + if ((flags & DNS_JOURNAL_COMPACTALL) != 0) { + rewrite = true; + serial = dns_journal_first_serial(j1); + } else if (JOURNAL_EMPTY(&j1->header)) { dns_journal_destroy(&j1); return (ISC_R_SUCCESS); } @@ -2270,12 +2444,13 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, /* * See if there is any work to do. */ - if ((uint32_t)j1->header.end.offset < target_size) { + if (!rewrite && (uint32_t)j1->header.end.offset < target_size) { dns_journal_destroy(&j1); return (ISC_R_SUCCESS); } CHECK(journal_open(mctx, newname, true, true, &j2)); + CHECK(journal_seek(j2, indexend)); /* * Remove overhead so space test below can succeed. @@ -2301,7 +2476,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, current_pos = best_guess; while (current_pos.serial != serial) { - CHECK(journal_next(j1, ¤t_pos)); + CHECK(journal_next(j1, ¤t_pos, false)); if (current_pos.serial == j1->header.end.serial) { break; } @@ -2319,7 +2494,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, INSIST(best_guess.serial != j1->header.end.serial); if (best_guess.serial != serial) { - CHECK(journal_next(j1, &best_guess)); + CHECK(journal_next(j1, &best_guess, false)); } /* @@ -2327,40 +2502,98 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, * we did not reach 'serial'. If not we will just copy * all uncommitted deltas regardless of the size. */ - copy_length = j1->header.end.offset - best_guess.offset; - - if (copy_length != 0) { - /* - * Copy best_guess to end into space just freed. - */ - size = 64 * 1024; - if (copy_length < size) { - size = copy_length; - } - buf = isc_mem_get(mctx, size); - + len = j1->header.end.offset - best_guess.offset; + if (len != 0) { CHECK(journal_seek(j1, best_guess.offset)); - CHECK(journal_seek(j2, indexend)); - for (i = 0; i < copy_length; i += size) { - unsigned int len = (copy_length - i) > size - ? size - : (copy_length - i); - CHECK(journal_read(j1, buf, len)); - CHECK(journal_write(j2, buf, len)); + + /* Prepare new header */ + j2->header.begin.serial = best_guess.serial; + j2->header.begin.offset = indexend; + j2->header.sourceserial = j1->header.sourceserial; + j2->header.serialset = j1->header.serialset; + j2->header.end.serial = j1->header.end.serial; + + /* + * Only use this method if we're rewriting the + * journal to fix outdated transaction headers; + * otherwise we'll copy the whole journal without + * parsing individual deltas below. + */ + while (rewrite && len > 0) { + journal_xhdr_t xhdr; + isc_offset_t offset = j1->offset; + uint32_t count; + + result = journal_read_xhdr(j1, &xhdr); + if (rewrite && result == ISC_R_NOMORE) { + break; + } + CHECK(result); + + /* + * If we're repairing an outdated journal, the + * xhdr format may be wrong. + */ + if (rewrite && + (xhdr.serial0 != serial || + isc_serial_le(xhdr.serial1, xhdr.serial0))) + { + if (j1->xhdr_version == XHDR_VERSION2 && + xhdr.count == serial) { + /* XHDR_VERSION2 -> XHDR_VERSION1 */ + j1->xhdr_version = XHDR_VERSION1; + CHECK(journal_seek(j1, offset)); + CHECK(journal_read_xhdr(j1, &xhdr)); + } else if (j1->xhdr_version == XHDR_VERSION1 && + xhdr.serial1 == serial) { + /* XHDR_VERSION1 -> XHDR_VERSION2 */ + j1->xhdr_version = XHDR_VERSION2; + CHECK(journal_seek(j1, offset)); + CHECK(journal_read_xhdr(j1, &xhdr)); + } + + /* Check again */ + if (xhdr.serial0 != serial || + isc_serial_le(xhdr.serial1, xhdr.serial0)) { + CHECK(ISC_R_UNEXPECTED); + } + } + + size = xhdr.size; + buf = isc_mem_get(mctx, size); + CHECK(journal_read(j1, buf, size)); + + count = rrcount(buf, size); + CHECK(journal_write_xhdr(j2, xhdr.size, count, + xhdr.serial0, xhdr.serial1)); + CHECK(journal_write(j2, buf, size)); + + j2->header.end.offset = j2->offset; + + serial = xhdr.serial1; + + len = j1->header.end.offset - j1->offset; + isc_mem_put(mctx, buf, size); + } + + /* + * If we're not rewriting transaction headers, we can use + * this faster method instead. + */ + if (!rewrite) { + size = ISC_MIN(64 * 1024, len); + buf = isc_mem_get(mctx, size); + for (i = 0; i < len; i += size) { + unsigned int blob = ISC_MIN(size, len - i); + CHECK(journal_read(j1, buf, blob)); + CHECK(journal_write(j2, buf, blob)); + } + + j2->header.end.offset = indexend + len; } CHECK(journal_fsync(j2)); - /* - * Compute new header. - */ - j2->header.begin.serial = best_guess.serial; - j2->header.begin.offset = indexend; - j2->header.end.serial = j1->header.end.serial; - j2->header.end.offset = indexend + copy_length; - j2->header.sourceserial = j1->header.sourceserial; - j2->header.serialset = j1->header.serialset; - /* * Update the journal header. */ @@ -2375,7 +2608,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, uint32_t serial, current_pos = j2->header.begin; while (current_pos.serial != j2->header.end.serial) { index_add(j2, ¤t_pos); - CHECK(journal_next(j2, ¤t_pos)); + CHECK(journal_next(j2, ¤t_pos, false)); } /* diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 5b8b1f1ddc..ae21050106 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -503,7 +503,10 @@ typedef enum { DNS_ZONEFLG_NEEDSTARTUPNOTIFY = 0x80000000U, /*%< need to send out * notify due to the zone * just being loaded for - * the first time. */ + * the first time. */ + DNS_ZONEFLG_FIXJOURNAL = 0x100000000U, /*%< journal file had + * recoverable error, + * needs rewriting */ DNS_ZONEFLG___MAX = UINT64_MAX, /* trick to make the ENUM 64-bit wide */ } dns_zoneflg_t; @@ -890,6 +893,8 @@ static isc_result_t zone_send_securedb(dns_zone_t *zone, dns_db_t *db); static void setrl(isc_ratelimiter_t *rl, unsigned int *rate, unsigned int value); +static void +zone_journal_compact(dns_zone_t *zone, dns_db_t *db, uint32_t serial); #define ENTER zone_debuglog(zone, me, 1, "enter") @@ -4743,6 +4748,7 @@ zone_postload(dns_zone_t *zone, dns_db_t *db, isc_time_t loadtime, uint32_t serial, oldserial, refresh, retry, expire, minimum; isc_time_t now; bool needdump = false; + bool fixjournal = false; bool hasinclude = DNS_ZONE_FLAG(zone, DNS_ZONEFLG_HASINCLUDE); bool nomaster = false; bool had_db = false; @@ -4844,9 +4850,9 @@ zone_postload(dns_zone_t *zone, dns_db_t *db, isc_time_t loadtime, } result = dns_journal_rollforward(zone->mctx, db, options, zone->journal); - if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND && - result != DNS_R_UPTODATE && result != DNS_R_NOJOURNAL && - result != ISC_R_RANGE) + if (result != ISC_R_SUCCESS && result != DNS_R_RECOVERABLE && + result != ISC_R_NOTFOUND && result != DNS_R_UPTODATE && + result != DNS_R_NOJOURNAL && result != ISC_R_RANGE) { dns_zone_logc(zone, DNS_LOGCATEGORY_ZONELOAD, ISC_LOG_ERROR, @@ -4867,6 +4873,12 @@ zone_postload(dns_zone_t *zone, dns_db_t *db, isc_time_t loadtime, dns_result_totext(result)); if (result == ISC_R_SUCCESS) { needdump = true; + } else if (result == DNS_R_RECOVERABLE) { + dns_zone_logc(zone, DNS_LOGCATEGORY_ZONELOAD, + ISC_LOG_ERROR, + "retried using old journal format"); + needdump = true; + fixjournal = true; } } @@ -5192,7 +5204,13 @@ zone_postload(dns_zone_t *zone, dns_db_t *db, isc_time_t loadtime, if (zone->type == dns_zone_key) { zone_needdump(zone, 30); } else { - zone_needdump(zone, DNS_DUMP_DELAY); + if (fixjournal) { + DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_FIXJOURNAL); + zone_journal_compact(zone, zone->db, 0); + zone_needdump(zone, 0); + } else { + zone_needdump(zone, DNS_DUMP_DELAY); + } } } @@ -11435,6 +11453,7 @@ zone_journal_compact(dns_zone_t *zone, dns_db_t *db, uint32_t serial) { int32_t journalsize; dns_dbversion_t *ver = NULL; uint64_t dbsize; + uint32_t options = 0; INSIST(LOCKED_ZONE(zone)); if (inline_raw(zone)) { @@ -11456,9 +11475,16 @@ zone_journal_compact(dns_zone_t *zone, dns_db_t *db, uint32_t serial) { journalsize = (int32_t)dbsize * 2; } } - zone_debuglog(zone, "zone_journal_compact", 1, "target journal size %d", - journalsize); - result = dns_journal_compact(zone->mctx, zone->journal, serial, + if (DNS_ZONE_FLAG(zone, DNS_ZONEFLG_FIXJOURNAL)) { + options |= DNS_JOURNAL_COMPACTALL; + DNS_ZONE_CLRFLAG(zone, DNS_ZONEFLG_FIXJOURNAL); + zone_debuglog(zone, "zone_journal_compact", 1, + "repair full journal"); + } else { + zone_debuglog(zone, "zone_journal_compact", 1, + "target journal size %d", journalsize); + } + result = dns_journal_compact(zone->mctx, zone->journal, serial, options, journalsize); switch (result) { case ISC_R_SUCCESS: @@ -11486,6 +11512,7 @@ dns_zone_flush(dns_zone_t *zone) { DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_FLUSH); if (DNS_ZONE_FLAG(zone, DNS_ZONEFLG_NEEDDUMP) && zone->masterfile != NULL) { + DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_NEEDCOMPACT); result = ISC_R_ALREADYRUNNING; dumping = was_dumping(zone); } else {