2017-12-24 11:43:59 -08:00
|
|
|
|
/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2017 Nicira, Inc.
|
2009-11-04 15:11:44 -08:00
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
2009-11-13 13:23:35 -08:00
|
|
|
|
#include "log.h"
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
2010-05-26 10:37:39 -07:00
|
|
|
|
#include <sys/stat.h>
|
2009-11-04 15:11:44 -08:00
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
2017-12-07 16:01:01 -08:00
|
|
|
|
#include "lockfile.h"
|
2017-12-14 13:44:15 -08:00
|
|
|
|
#include "openvswitch/dynamic-string.h"
|
2016-07-12 16:37:34 -05:00
|
|
|
|
#include "openvswitch/json.h"
|
2017-10-06 11:14:21 -07:00
|
|
|
|
#include "openvswitch/vlog.h"
|
2017-12-07 16:01:01 -08:00
|
|
|
|
#include "ovs-atomic.h"
|
|
|
|
|
#include "ovs-rcu.h"
|
|
|
|
|
#include "ovs-thread.h"
|
2009-11-04 15:11:44 -08:00
|
|
|
|
#include "ovsdb-error.h"
|
2017-12-07 16:01:01 -08:00
|
|
|
|
#include "ovsdb.h"
|
|
|
|
|
#include "openvswitch/poll-loop.h"
|
|
|
|
|
#include "seq.h"
|
2009-11-04 15:11:44 -08:00
|
|
|
|
#include "sha1.h"
|
2010-02-11 15:35:46 -08:00
|
|
|
|
#include "socket-util.h"
|
2009-11-13 13:23:35 -08:00
|
|
|
|
#include "transaction.h"
|
2009-11-04 15:11:44 -08:00
|
|
|
|
#include "util.h"
|
2010-07-16 11:02:49 -07:00
|
|
|
|
|
2017-10-06 11:14:21 -07:00
|
|
|
|
VLOG_DEFINE_THIS_MODULE(ovsdb_log);
|
|
|
|
|
|
2017-12-07 14:33:28 -08:00
|
|
|
|
/* State in a log's state machine.
|
|
|
|
|
*
|
|
|
|
|
* OVSDB_LOG_READ is the initial state for a newly opened log. Log records may
|
|
|
|
|
* be read in this state only. Reaching end of file does not cause a state
|
|
|
|
|
* transition. A read error transitions to OVSDB_LOG_READ_ERROR.
|
|
|
|
|
*
|
|
|
|
|
* OVSDB_LOG_READ_ERROR prevents further reads from succeeding; they will
|
|
|
|
|
* report the same error as before. A log write transitions away to
|
|
|
|
|
* OVSDB_LOG_WRITE or OVSDB_LOG_WRITE_ERROR.
|
|
|
|
|
*
|
|
|
|
|
* OVSDB_LOG_WRITE is the state following a call to ovsdb_log_write(), when all
|
|
|
|
|
* goes well. Any state other than OVSDB_LOG_BROKEN may transition to this
|
|
|
|
|
* state. A write error transitions to OVSDB_LOG_WRITE_ERROR.
|
|
|
|
|
*
|
|
|
|
|
* OVSDB_LOG_WRITE_ERROR is the state following a write error. Further writes
|
|
|
|
|
* retry and might transition back to OVSDB_LOG_WRITE.
|
|
|
|
|
*
|
|
|
|
|
* OVSDB_LOG_BROKEN is the state following a call to ovsdb_log_replace() or
|
|
|
|
|
* ovsdb_log_replace_commit(), if it fails in a spectacular enough way that no
|
|
|
|
|
* further reads or writes can succeed. This is a terminal state.
|
|
|
|
|
*/
|
|
|
|
|
enum ovsdb_log_state {
|
|
|
|
|
OVSDB_LOG_READ, /* Ready to read. */
|
|
|
|
|
OVSDB_LOG_READ_ERROR, /* Read failed, see 'error' for details. */
|
|
|
|
|
OVSDB_LOG_WRITE, /* Ready to write. */
|
|
|
|
|
OVSDB_LOG_WRITE_ERROR, /* Write failed, see 'error' for details. */
|
|
|
|
|
OVSDB_LOG_BROKEN, /* Disk on fire, see 'error' for details. */
|
2009-11-04 15:11:44 -08:00
|
|
|
|
};
|
|
|
|
|
|
2009-11-13 13:23:35 -08:00
|
|
|
|
struct ovsdb_log {
|
2017-12-07 14:33:28 -08:00
|
|
|
|
enum ovsdb_log_state state;
|
|
|
|
|
struct ovsdb_error *error;
|
|
|
|
|
|
2011-03-28 13:05:40 -07:00
|
|
|
|
off_t prev_offset;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
off_t offset;
|
2017-10-06 15:15:31 -07:00
|
|
|
|
char *name; /* Absolute name of file. */
|
|
|
|
|
char *display_name; /* For use in log messages, etc. */
|
2017-12-24 11:43:59 -08:00
|
|
|
|
char *magic;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
struct lockfile *lockfile;
|
|
|
|
|
FILE *stream;
|
2017-12-24 17:52:34 -08:00
|
|
|
|
off_t base;
|
2017-12-07 16:01:01 -08:00
|
|
|
|
struct afsync *afsync;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
};
|
|
|
|
|
|
2017-12-07 14:33:28 -08:00
|
|
|
|
/* Whether the OS supports renaming open files.
|
|
|
|
|
*
|
|
|
|
|
* (Making this a variable makes it easier to test both strategies on Unix-like
|
|
|
|
|
* systems.) */
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
static bool rename_open_files = false;
|
|
|
|
|
#else
|
|
|
|
|
static bool rename_open_files = true;
|
|
|
|
|
#endif
|
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
static bool parse_header(char *header, const char **magicp,
|
|
|
|
|
unsigned long int *length,
|
|
|
|
|
uint8_t sha1[SHA1_DIGEST_SIZE]);
|
|
|
|
|
static bool is_magic_ok(const char *needle, const char *haystack);
|
|
|
|
|
|
2017-12-07 16:01:01 -08:00
|
|
|
|
static struct afsync *afsync_create(int fd, uint64_t initial_ticket);
|
|
|
|
|
static uint64_t afsync_destroy(struct afsync *);
|
|
|
|
|
|
2010-02-15 11:31:32 -08:00
|
|
|
|
/* Attempts to open 'name' with the specified 'open_mode'. On success, stores
|
|
|
|
|
* the new log into '*filep' and returns NULL; otherwise returns NULL and
|
|
|
|
|
* stores NULL into '*filep'.
|
|
|
|
|
*
|
2017-12-24 11:43:59 -08:00
|
|
|
|
* 'magic' is a short text string put at the beginning of every record and used
|
|
|
|
|
* to distinguish one kind of log file from another. For a conventional OVSDB
|
2017-12-24 16:19:24 -08:00
|
|
|
|
* log file, use the OVSDB_MAGIC macro. To accept more than one magic string,
|
|
|
|
|
* separate them with "|", e.g. "MAGIC 1|MAGIC 2".
|
2017-12-24 11:43:59 -08:00
|
|
|
|
*
|
2010-02-15 11:31:32 -08:00
|
|
|
|
* Whether the file will be locked using lockfile_lock() depends on 'locking':
|
|
|
|
|
* use true to lock it, false not to lock it, or -1 to lock it only if
|
|
|
|
|
* 'open_mode' is a mode that allows writing.
|
2017-12-24 17:52:34 -08:00
|
|
|
|
*
|
|
|
|
|
* A log consists of a series of records. After opening or creating a log with
|
|
|
|
|
* this function, the client may use ovsdb_log_read() to read any existing
|
|
|
|
|
* records, one by one. The client may also use ovsdb_log_write() to write new
|
|
|
|
|
* records (if some records have not yet been read at this point, then the
|
|
|
|
|
* first write truncates them).
|
2010-02-15 11:31:32 -08:00
|
|
|
|
*/
|
2009-11-04 15:11:44 -08:00
|
|
|
|
struct ovsdb_error *
|
2017-12-24 11:43:59 -08:00
|
|
|
|
ovsdb_log_open(const char *name, const char *magic,
|
|
|
|
|
enum ovsdb_log_open_mode open_mode,
|
2010-02-15 11:31:32 -08:00
|
|
|
|
int locking, struct ovsdb_log **filep)
|
2009-11-04 15:11:44 -08:00
|
|
|
|
{
|
|
|
|
|
struct lockfile *lockfile;
|
|
|
|
|
struct ovsdb_error *error;
|
|
|
|
|
struct stat s;
|
|
|
|
|
FILE *stream;
|
2010-02-15 11:31:32 -08:00
|
|
|
|
int flags;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
int fd;
|
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
/* If we can create a new file, we need to know what kind of magic to
|
|
|
|
|
* use, so there must be only one kind. */
|
|
|
|
|
if (open_mode == OVSDB_LOG_CREATE_EXCL || open_mode == OVSDB_LOG_CREATE) {
|
|
|
|
|
ovs_assert(!strchr(magic, '|'));
|
|
|
|
|
}
|
2017-10-06 15:15:31 -07:00
|
|
|
|
|
2009-11-04 15:11:44 -08:00
|
|
|
|
*filep = NULL;
|
|
|
|
|
|
2017-10-06 15:15:31 -07:00
|
|
|
|
/* Get the absolute name of the file because we might need to access it by
|
|
|
|
|
* name again later after the process has changed directory (e.g. because
|
|
|
|
|
* daemonize() chdirs to "/").
|
|
|
|
|
*
|
|
|
|
|
* We save the user-provided name of the file for use in log messages, to
|
|
|
|
|
* reduce user confusion. */
|
|
|
|
|
char *abs_name = abs_file_name(NULL, name);
|
|
|
|
|
if (!abs_name) {
|
|
|
|
|
error = ovsdb_io_error(0, "could not determine current "
|
|
|
|
|
"working directory");
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2012-11-06 13:14:55 -08:00
|
|
|
|
ovs_assert(locking == -1 || locking == false || locking == true);
|
2010-02-15 11:31:32 -08:00
|
|
|
|
if (locking < 0) {
|
|
|
|
|
locking = open_mode != OVSDB_LOG_READ_ONLY;
|
|
|
|
|
}
|
|
|
|
|
if (locking) {
|
2012-08-08 17:40:43 -07:00
|
|
|
|
int retval = lockfile_lock(name, &lockfile);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
if (retval) {
|
|
|
|
|
error = ovsdb_io_error(retval, "%s: failed to lock lockfile",
|
|
|
|
|
name);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
lockfile = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-05 09:30:26 -08:00
|
|
|
|
switch (open_mode) {
|
|
|
|
|
case OVSDB_LOG_READ_ONLY:
|
2010-02-15 11:31:32 -08:00
|
|
|
|
flags = O_RDONLY;
|
2017-12-05 09:30:26 -08:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_READ_WRITE:
|
2010-02-15 11:31:32 -08:00
|
|
|
|
flags = O_RDWR;
|
2017-12-05 09:30:26 -08:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_CREATE_EXCL:
|
2014-03-10 15:47:30 -07:00
|
|
|
|
#ifndef _WIN32
|
2012-07-26 14:42:58 -07:00
|
|
|
|
if (stat(name, &s) == -1 && errno == ENOENT
|
|
|
|
|
&& lstat(name, &s) == 0 && S_ISLNK(s.st_mode)) {
|
|
|
|
|
/* 'name' is a dangling symlink. We want to create the file that
|
|
|
|
|
* the symlink points to, but POSIX says that open() with O_EXCL
|
|
|
|
|
* must fail with EEXIST if the named file is a symlink. So, we
|
|
|
|
|
* have to leave off O_EXCL and accept the race. */
|
|
|
|
|
flags = O_RDWR | O_CREAT;
|
|
|
|
|
} else {
|
|
|
|
|
flags = O_RDWR | O_CREAT | O_EXCL;
|
|
|
|
|
}
|
2014-03-10 15:47:30 -07:00
|
|
|
|
#else
|
|
|
|
|
flags = O_RDWR | O_CREAT | O_EXCL;
|
|
|
|
|
#endif
|
2017-12-05 09:30:26 -08:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_CREATE:
|
|
|
|
|
flags = O_RDWR | O_CREAT;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
2013-12-17 10:32:12 -08:00
|
|
|
|
OVS_NOT_REACHED();
|
2010-02-15 11:31:32 -08:00
|
|
|
|
}
|
2014-05-08 13:31:18 -07:00
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
flags = flags | O_BINARY;
|
|
|
|
|
#endif
|
2017-10-06 11:40:00 -07:00
|
|
|
|
/* Special case for /dev/stdin to make it work even if the operating system
|
|
|
|
|
* doesn't support it under that name. */
|
|
|
|
|
if (!strcmp(name, "/dev/stdin") && open_mode == OVSDB_LOG_READ_ONLY) {
|
|
|
|
|
fd = dup(STDIN_FILENO);
|
|
|
|
|
} else {
|
2020-09-23 13:48:15 -07:00
|
|
|
|
fd = open(name, flags, 0660);
|
2017-10-06 11:40:00 -07:00
|
|
|
|
}
|
2009-11-04 15:11:44 -08:00
|
|
|
|
if (fd < 0) {
|
2017-12-05 09:30:26 -08:00
|
|
|
|
const char *op = (open_mode == OVSDB_LOG_CREATE_EXCL ? "create"
|
|
|
|
|
: open_mode == OVSDB_LOG_CREATE ? "create or open"
|
|
|
|
|
: "open");
|
2016-03-30 20:11:44 -07:00
|
|
|
|
error = ovsdb_io_error(errno, "%s: %s failed", name, op);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
goto error_unlock;
|
|
|
|
|
}
|
|
|
|
|
|
2010-02-15 11:31:32 -08:00
|
|
|
|
stream = fdopen(fd, open_mode == OVSDB_LOG_READ_ONLY ? "rb" : "w+b");
|
2009-11-04 15:11:44 -08:00
|
|
|
|
if (!stream) {
|
|
|
|
|
error = ovsdb_io_error(errno, "%s: fdopen failed", name);
|
2017-12-24 16:19:24 -08:00
|
|
|
|
close(fd);
|
|
|
|
|
goto error_unlock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Read the magic from the first log record. */
|
|
|
|
|
char header[128];
|
|
|
|
|
const char *actual_magic;
|
|
|
|
|
if (!fgets(header, sizeof header, stream)) {
|
|
|
|
|
if (ferror(stream)) {
|
|
|
|
|
error = ovsdb_io_error(errno, "%s: read error", name);
|
|
|
|
|
goto error_fclose;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* We need to be able to report what kind of file this is but we can't
|
|
|
|
|
* if it's empty and we accept more than one. */
|
|
|
|
|
if (strchr(magic, '|')) {
|
|
|
|
|
error = ovsdb_error(NULL, "%s: cannot identify file type", name);
|
|
|
|
|
goto error_fclose;
|
|
|
|
|
}
|
|
|
|
|
actual_magic = magic;
|
|
|
|
|
|
|
|
|
|
/* It's an empty file and therefore probably a new file, so fsync()
|
|
|
|
|
* its parent directory to ensure that its directory entry is
|
|
|
|
|
* committed to disk. */
|
|
|
|
|
fsync_parent_dir(name);
|
|
|
|
|
} else {
|
|
|
|
|
unsigned long int length;
|
|
|
|
|
uint8_t sha1[SHA1_DIGEST_SIZE];
|
|
|
|
|
if (!parse_header(header, &actual_magic, &length, sha1)) {
|
|
|
|
|
error = ovsdb_error(NULL, "%s: unexpected file format", name);
|
|
|
|
|
goto error_fclose;
|
|
|
|
|
} else if (!is_magic_ok(actual_magic, magic)) {
|
|
|
|
|
error = ovsdb_error(NULL, "%s: cannot identify file type", name);
|
|
|
|
|
goto error_fclose;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (fseek(stream, 0, SEEK_SET)) {
|
|
|
|
|
error = ovsdb_io_error(errno, "%s: seek failed", name);
|
|
|
|
|
goto error_fclose;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
struct ovsdb_log *file = xmalloc(sizeof *file);
|
2017-12-07 14:33:28 -08:00
|
|
|
|
file->state = OVSDB_LOG_READ;
|
|
|
|
|
file->error = NULL;
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->name = abs_name;
|
|
|
|
|
file->display_name = xstrdup(name);
|
2017-12-24 16:19:24 -08:00
|
|
|
|
file->magic = xstrdup(actual_magic);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
file->lockfile = lockfile;
|
|
|
|
|
file->stream = stream;
|
2011-03-28 13:05:40 -07:00
|
|
|
|
file->prev_offset = 0;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
file->offset = 0;
|
2017-12-24 17:52:34 -08:00
|
|
|
|
file->base = 0;
|
2017-12-07 16:01:01 -08:00
|
|
|
|
file->afsync = NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
*filep = file;
|
|
|
|
|
return NULL;
|
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
error_fclose:
|
|
|
|
|
fclose(stream);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
error_unlock:
|
|
|
|
|
lockfile_unlock(lockfile);
|
|
|
|
|
error:
|
2017-10-06 15:15:31 -07:00
|
|
|
|
free(abs_name);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
/* Returns true if 'needle' is one of the |-delimited words in 'haystack'. */
|
|
|
|
|
static bool
|
|
|
|
|
is_magic_ok(const char *needle, const char *haystack)
|
|
|
|
|
{
|
|
|
|
|
/* 'needle' can't be multiple words. */
|
|
|
|
|
if (strchr(needle, '|')) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t n = strlen(needle);
|
|
|
|
|
for (;;) {
|
|
|
|
|
if (!strncmp(needle, haystack, n) && strchr("|", haystack[n])) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
haystack = strchr(haystack, '|');
|
|
|
|
|
if (!haystack) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
haystack++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-04 15:11:44 -08:00
|
|
|
|
void
|
2009-11-13 13:23:35 -08:00
|
|
|
|
ovsdb_log_close(struct ovsdb_log *file)
|
2009-11-04 15:11:44 -08:00
|
|
|
|
{
|
|
|
|
|
if (file) {
|
2017-12-07 14:33:28 -08:00
|
|
|
|
ovsdb_error_destroy(file->error);
|
2017-12-07 16:01:01 -08:00
|
|
|
|
afsync_destroy(file->afsync);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
free(file->name);
|
2017-10-06 15:15:31 -07:00
|
|
|
|
free(file->display_name);
|
2017-12-24 11:43:59 -08:00
|
|
|
|
free(file->magic);
|
2017-12-07 14:33:28 -08:00
|
|
|
|
if (file->stream) {
|
|
|
|
|
fclose(file->stream);
|
|
|
|
|
}
|
2009-11-04 15:11:44 -08:00
|
|
|
|
lockfile_unlock(file->lockfile);
|
|
|
|
|
free(file);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
const char *
|
|
|
|
|
ovsdb_log_get_magic(const struct ovsdb_log *log)
|
|
|
|
|
{
|
|
|
|
|
return log->magic;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Attempts to parse 'header' as a header line for an OVSDB log record (as
|
|
|
|
|
* described in ovsdb(5)). Stores a pointer to the magic string in '*magicp',
|
|
|
|
|
* the length in *length, and the parsed sha1 value in sha1[].
|
|
|
|
|
*
|
|
|
|
|
* Modifies 'header' and points '*magicp' inside it.
|
|
|
|
|
*
|
|
|
|
|
* Returns true if successful, false on failure. */
|
2009-11-04 15:11:44 -08:00
|
|
|
|
static bool
|
2017-12-24 16:19:24 -08:00
|
|
|
|
parse_header(char *header, const char **magicp,
|
|
|
|
|
unsigned long int *length, uint8_t sha1[SHA1_DIGEST_SIZE])
|
2009-11-04 15:11:44 -08:00
|
|
|
|
{
|
2017-12-24 16:19:24 -08:00
|
|
|
|
/* 'header' must consist of "OVSDB "... */
|
|
|
|
|
const char lead[] = "OVSDB ";
|
|
|
|
|
if (strncmp(lead, header, strlen(lead))) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
/* ...followed by a magic string... */
|
|
|
|
|
char *magic = header + strlen(lead);
|
|
|
|
|
size_t magic_len = strcspn(magic, " ");
|
|
|
|
|
if (magic[magic_len] != ' ') {
|
2009-11-04 15:11:44 -08:00
|
|
|
|
return false;
|
|
|
|
|
}
|
2017-12-24 16:19:24 -08:00
|
|
|
|
magic[magic_len] = '\0';
|
|
|
|
|
*magicp = magic;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
|
|
|
|
/* ...followed by a length in bytes... */
|
2017-12-24 16:19:24 -08:00
|
|
|
|
char *p;
|
|
|
|
|
*length = strtoul(magic + magic_len + 1, &p, 10);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
if (!*length || *length == ULONG_MAX || *p != ' ') {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
p++;
|
|
|
|
|
|
|
|
|
|
/* ...followed by a SHA-1 hash... */
|
|
|
|
|
if (!sha1_from_hex(sha1, p)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
p += SHA1_HEX_DIGEST_LEN;
|
|
|
|
|
|
|
|
|
|
/* ...and ended by a new-line. */
|
|
|
|
|
if (*p != '\n') {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct ovsdb_error *
|
2009-11-13 13:23:35 -08:00
|
|
|
|
parse_body(struct ovsdb_log *file, off_t offset, unsigned long int length,
|
2009-11-04 15:11:44 -08:00
|
|
|
|
uint8_t sha1[SHA1_DIGEST_SIZE], struct json **jsonp)
|
|
|
|
|
{
|
|
|
|
|
struct json_parser *parser;
|
|
|
|
|
struct sha1_ctx ctx;
|
|
|
|
|
|
|
|
|
|
sha1_init(&ctx);
|
|
|
|
|
parser = json_parser_create(JSPF_TRAILER);
|
|
|
|
|
|
|
|
|
|
while (length > 0) {
|
|
|
|
|
char input[BUFSIZ];
|
|
|
|
|
int chunk;
|
|
|
|
|
|
|
|
|
|
chunk = MIN(length, sizeof input);
|
|
|
|
|
if (fread(input, 1, chunk, file->stream) != chunk) {
|
2022-05-07 00:55:21 +02:00
|
|
|
|
sha1_final(&ctx, sha1);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
json_parser_abort(parser);
|
|
|
|
|
return ovsdb_io_error(ferror(file->stream) ? errno : EOF,
|
|
|
|
|
"%s: error reading %lu bytes "
|
2017-10-06 15:15:31 -07:00
|
|
|
|
"starting at offset %lld",
|
|
|
|
|
file->display_name, length,
|
|
|
|
|
(long long int) offset);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
|
|
|
|
sha1_update(&ctx, input, chunk);
|
|
|
|
|
json_parser_feed(parser, input, chunk);
|
|
|
|
|
length -= chunk;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sha1_final(&ctx, sha1);
|
|
|
|
|
*jsonp = json_parser_finish(parser);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-06 10:33:21 -08:00
|
|
|
|
/* Attempts to read a log record from 'file'.
|
|
|
|
|
*
|
|
|
|
|
* If successful, returns NULL and stores in '*jsonp' the JSON object that the
|
|
|
|
|
* record contains. The caller owns the data and must eventually free it (with
|
|
|
|
|
* json_destroy()).
|
|
|
|
|
*
|
|
|
|
|
* If a read error occurs, returns the error and stores NULL in '*jsonp'.
|
|
|
|
|
*
|
|
|
|
|
* If the read reaches end of file, returns NULL and stores NULL in
|
|
|
|
|
* '*jsonp'. */
|
2009-11-04 15:11:44 -08:00
|
|
|
|
struct ovsdb_error *
|
2009-11-13 13:23:35 -08:00
|
|
|
|
ovsdb_log_read(struct ovsdb_log *file, struct json **jsonp)
|
2009-11-04 15:11:44 -08:00
|
|
|
|
{
|
2017-12-07 14:33:28 -08:00
|
|
|
|
*jsonp = NULL;
|
|
|
|
|
switch (file->state) {
|
|
|
|
|
case OVSDB_LOG_READ:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_READ_ERROR:
|
|
|
|
|
case OVSDB_LOG_WRITE_ERROR:
|
|
|
|
|
case OVSDB_LOG_BROKEN:
|
|
|
|
|
return ovsdb_error_clone(file->error);
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_WRITE:
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-04 15:11:44 -08:00
|
|
|
|
uint8_t expected_sha1[SHA1_DIGEST_SIZE];
|
|
|
|
|
uint8_t actual_sha1[SHA1_DIGEST_SIZE];
|
|
|
|
|
struct ovsdb_error *error;
|
|
|
|
|
unsigned long data_length;
|
|
|
|
|
struct json *json;
|
|
|
|
|
char header[128];
|
|
|
|
|
|
2017-12-07 14:33:28 -08:00
|
|
|
|
json = NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
|
|
|
|
if (!fgets(header, sizeof header, file->stream)) {
|
|
|
|
|
if (feof(file->stream)) {
|
2017-12-07 14:33:28 -08:00
|
|
|
|
return NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
2017-10-06 15:15:31 -07:00
|
|
|
|
error = ovsdb_io_error(errno, "%s: read failed", file->display_name);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
goto error;
|
|
|
|
|
}
|
2017-12-24 16:19:24 -08:00
|
|
|
|
off_t data_offset = file->offset + strlen(header);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
2017-12-24 16:19:24 -08:00
|
|
|
|
const char *magic;
|
|
|
|
|
if (!parse_header(header, &magic, &data_length, expected_sha1)
|
|
|
|
|
|| strcmp(magic, file->magic)) {
|
2009-11-04 15:11:44 -08:00
|
|
|
|
error = ovsdb_syntax_error(NULL, NULL, "%s: parse error at offset "
|
|
|
|
|
"%lld in header line \"%.*s\"",
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->display_name,
|
|
|
|
|
(long long int) file->offset,
|
2009-11-04 15:11:44 -08:00
|
|
|
|
(int) strcspn(header, "\n"), header);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
error = parse_body(file, data_offset, data_length, actual_sha1, &json);
|
|
|
|
|
if (error) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (memcmp(expected_sha1, actual_sha1, SHA1_DIGEST_SIZE)) {
|
|
|
|
|
error = ovsdb_syntax_error(NULL, NULL, "%s: %lu bytes starting at "
|
|
|
|
|
"offset %lld have SHA-1 hash "SHA1_FMT" "
|
|
|
|
|
"but should have hash "SHA1_FMT,
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->display_name, data_length,
|
2009-11-04 15:11:44 -08:00
|
|
|
|
(long long int) data_offset,
|
|
|
|
|
SHA1_ARGS(actual_sha1),
|
|
|
|
|
SHA1_ARGS(expected_sha1));
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (json->type == JSON_STRING) {
|
|
|
|
|
error = ovsdb_syntax_error(NULL, NULL, "%s: %lu bytes starting at "
|
|
|
|
|
"offset %lld are not valid JSON (%s)",
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->display_name, data_length,
|
2009-11-04 15:11:44 -08:00
|
|
|
|
(long long int) data_offset,
|
2025-06-24 21:54:31 +02:00
|
|
|
|
json_string(json));
|
2009-11-04 15:11:44 -08:00
|
|
|
|
goto error;
|
|
|
|
|
}
|
2017-12-06 10:33:21 -08:00
|
|
|
|
if (json->type != JSON_OBJECT) {
|
|
|
|
|
error = ovsdb_syntax_error(NULL, NULL, "%s: %lu bytes starting at "
|
|
|
|
|
"offset %lld are not a JSON object",
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->display_name, data_length,
|
2017-12-06 10:33:21 -08:00
|
|
|
|
(long long int) data_offset);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
2011-03-28 13:05:40 -07:00
|
|
|
|
file->prev_offset = file->offset;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
file->offset = data_offset + data_length;
|
|
|
|
|
*jsonp = json;
|
2011-05-04 13:49:42 -07:00
|
|
|
|
return NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
|
|
|
|
error:
|
2017-12-07 14:33:28 -08:00
|
|
|
|
file->state = OVSDB_LOG_READ_ERROR;
|
|
|
|
|
file->error = ovsdb_error_clone(error);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
json_destroy(json);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2011-03-28 13:05:40 -07:00
|
|
|
|
/* Causes the log record read by the previous call to ovsdb_log_read() to be
|
|
|
|
|
* effectively discarded. The next call to ovsdb_log_write() will overwrite
|
|
|
|
|
* that previously read record.
|
|
|
|
|
*
|
|
|
|
|
* Calling this function more than once has no additional effect.
|
|
|
|
|
*
|
|
|
|
|
* This function is useful when ovsdb_log_read() successfully reads a record
|
|
|
|
|
* but that record does not make sense at a higher level (e.g. it specifies an
|
|
|
|
|
* invalid transaction). */
|
|
|
|
|
void
|
|
|
|
|
ovsdb_log_unread(struct ovsdb_log *file)
|
|
|
|
|
{
|
2017-12-07 14:33:28 -08:00
|
|
|
|
ovs_assert(file->state == OVSDB_LOG_READ);
|
2011-03-28 13:05:40 -07:00
|
|
|
|
file->offset = file->prev_offset;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-06 15:15:31 -07:00
|
|
|
|
static struct ovsdb_error *
|
|
|
|
|
ovsdb_log_truncate(struct ovsdb_log *file)
|
|
|
|
|
{
|
|
|
|
|
file->state = OVSDB_LOG_WRITE;
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error *error = NULL;
|
|
|
|
|
if (fseeko(file->stream, file->offset, SEEK_SET)) {
|
|
|
|
|
error = ovsdb_io_error(errno, "%s: cannot seek to offset %lld",
|
|
|
|
|
file->display_name,
|
|
|
|
|
(long long int) file->offset);
|
|
|
|
|
} else if (ftruncate(fileno(file->stream), file->offset)) {
|
|
|
|
|
error = ovsdb_io_error(errno, "%s: cannot truncate to length %lld",
|
|
|
|
|
file->display_name,
|
|
|
|
|
(long long int) file->offset);
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-27 21:42:56 +02:00
|
|
|
|
/* Removes all the data from the log by moving current offset to zero and
|
|
|
|
|
* truncating the file to zero bytes. After this operation the file is empty
|
|
|
|
|
* and in a write state. */
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
ovsdb_log_reset(struct ovsdb_log *file)
|
|
|
|
|
{
|
|
|
|
|
ovsdb_error_destroy(file->error);
|
|
|
|
|
file->offset = file->prev_offset = 0;
|
|
|
|
|
file->error = ovsdb_log_truncate(file);
|
|
|
|
|
if (file->error) {
|
|
|
|
|
file->state = OVSDB_LOG_WRITE_ERROR;
|
|
|
|
|
return ovsdb_error_clone(file->error);
|
|
|
|
|
}
|
|
|
|
|
file->state = OVSDB_LOG_WRITE;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-14 13:44:15 -08:00
|
|
|
|
/* Composes a log record for 'json' by filling 'header' with a header line and
|
|
|
|
|
* 'data' with a data line (each ending with a new-line). To write the record
|
|
|
|
|
* to a file, write 'header' followed by 'data'.
|
|
|
|
|
*
|
2017-10-06 15:15:31 -07:00
|
|
|
|
* 'magic' is the magic to use in the header record, e.g. OVSDB_MAGIC.
|
|
|
|
|
*
|
2017-12-14 13:44:15 -08:00
|
|
|
|
* The caller must initialize 'header' and 'data' to empty strings. */
|
|
|
|
|
void
|
|
|
|
|
ovsdb_log_compose_record(const struct json *json,
|
2017-12-24 11:43:59 -08:00
|
|
|
|
const char *magic, struct ds *header, struct ds *data)
|
2017-12-14 13:44:15 -08:00
|
|
|
|
{
|
|
|
|
|
ovs_assert(json->type == JSON_OBJECT || json->type == JSON_ARRAY);
|
|
|
|
|
ovs_assert(!header->length);
|
|
|
|
|
ovs_assert(!data->length);
|
|
|
|
|
|
|
|
|
|
/* Compose content. */
|
|
|
|
|
json_to_ds(json, 0, data);
|
|
|
|
|
ds_put_char(data, '\n');
|
|
|
|
|
|
|
|
|
|
/* Compose header. */
|
|
|
|
|
uint8_t sha1[SHA1_DIGEST_SIZE];
|
|
|
|
|
sha1_bytes(data->string, data->length, sha1);
|
2017-12-24 16:19:24 -08:00
|
|
|
|
ds_put_format(header, "OVSDB %s %"PRIuSIZE" "SHA1_FMT"\n",
|
2017-12-14 13:44:15 -08:00
|
|
|
|
magic, data->length, SHA1_ARGS(sha1));
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-05 08:33:50 -08:00
|
|
|
|
/* Writes log record 'json' to 'file'. Returns NULL if successful or an error
|
2017-12-24 17:52:34 -08:00
|
|
|
|
* (which the caller must eventually destroy) on failure.
|
|
|
|
|
*
|
|
|
|
|
* If the log contains some records that have not yet been read, then calling
|
|
|
|
|
* this function truncates them.
|
|
|
|
|
*
|
|
|
|
|
* Log writes are atomic. A client may use ovsdb_log_commit() to ensure that
|
|
|
|
|
* they are durable.
|
|
|
|
|
*/
|
2009-11-04 15:11:44 -08:00
|
|
|
|
struct ovsdb_error *
|
2017-12-05 08:33:50 -08:00
|
|
|
|
ovsdb_log_write(struct ovsdb_log *file, const struct json *json)
|
2009-11-04 15:11:44 -08:00
|
|
|
|
{
|
2017-12-07 14:33:28 -08:00
|
|
|
|
switch (file->state) {
|
|
|
|
|
case OVSDB_LOG_WRITE:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_READ:
|
|
|
|
|
case OVSDB_LOG_READ_ERROR:
|
|
|
|
|
case OVSDB_LOG_WRITE_ERROR:
|
|
|
|
|
ovsdb_error_destroy(file->error);
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->error = ovsdb_log_truncate(file);
|
|
|
|
|
if (file->error) {
|
|
|
|
|
file->state = OVSDB_LOG_WRITE_ERROR;
|
|
|
|
|
return ovsdb_error_clone(file->error);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->state = OVSDB_LOG_WRITE;
|
2017-12-07 14:33:28 -08:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case OVSDB_LOG_BROKEN:
|
|
|
|
|
return ovsdb_error_clone(file->error);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (json->type != JSON_OBJECT && json->type != JSON_ARRAY) {
|
2017-10-06 15:15:31 -07:00
|
|
|
|
return OVSDB_BUG("bad JSON type");
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
|
|
|
|
|
2017-12-14 13:44:15 -08:00
|
|
|
|
struct ds header = DS_EMPTY_INITIALIZER;
|
|
|
|
|
struct ds data = DS_EMPTY_INITIALIZER;
|
2017-12-24 11:43:59 -08:00
|
|
|
|
ovsdb_log_compose_record(json, file->magic, &header, &data);
|
2017-12-14 13:44:15 -08:00
|
|
|
|
size_t total_length = header.length + data.length;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
|
|
|
|
/* Write. */
|
2017-12-14 13:44:15 -08:00
|
|
|
|
bool ok = (fwrite(header.string, header.length, 1, file->stream) == 1
|
|
|
|
|
&& fwrite(data.string, data.length, 1, file->stream) == 1
|
|
|
|
|
&& fflush(file->stream) == 0);
|
|
|
|
|
ds_destroy(&header);
|
|
|
|
|
ds_destroy(&data);
|
|
|
|
|
if (!ok) {
|
2017-10-06 15:15:31 -07:00
|
|
|
|
int error = errno;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
2017-10-06 11:14:21 -07:00
|
|
|
|
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
|
|
|
|
|
VLOG_WARN_RL(&rl, "%s: write failed (%s)",
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->name, ovs_strerror(error));
|
2017-10-06 11:14:21 -07:00
|
|
|
|
|
2009-11-04 15:11:44 -08:00
|
|
|
|
/* Remove any partially written data, ignoring errors since there is
|
|
|
|
|
* nothing further we can do. */
|
2009-12-14 23:08:10 -08:00
|
|
|
|
ignore(ftruncate(fileno(file->stream), file->offset));
|
2009-11-04 15:11:44 -08:00
|
|
|
|
|
2017-10-06 15:15:31 -07:00
|
|
|
|
file->error = ovsdb_io_error(error, "%s: write failed",
|
|
|
|
|
file->display_name);
|
|
|
|
|
file->state = OVSDB_LOG_WRITE_ERROR;
|
|
|
|
|
return ovsdb_error_clone(file->error);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
|
|
|
|
|
2017-12-14 13:44:15 -08:00
|
|
|
|
file->offset += total_length;
|
2011-05-04 13:49:42 -07:00
|
|
|
|
return NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
ovsdb_log_write_and_free(struct ovsdb_log *log, struct json *json)
|
|
|
|
|
{
|
|
|
|
|
struct ovsdb_error *error = ovsdb_log_write(log, json);
|
|
|
|
|
json_destroy(json);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-07 16:01:01 -08:00
|
|
|
|
/* Attempts to commit 'file' to disk. Waits for the commit to succeed or fail.
|
|
|
|
|
* Returns NULL if successful, otherwise the error that occurred. */
|
2009-11-04 15:11:44 -08:00
|
|
|
|
struct ovsdb_error *
|
2017-12-07 16:01:01 -08:00
|
|
|
|
ovsdb_log_commit_block(struct ovsdb_log *file)
|
2009-11-04 15:11:44 -08:00
|
|
|
|
{
|
2020-04-21 09:23:57 +01:00
|
|
|
|
#if (_POSIX_C_SOURCE >= 199309L || _XOPEN_SOURCE >= 500)
|
|
|
|
|
/* we do not check metadata - mtime, atime, anywhere, so we
|
|
|
|
|
* do not need to update it every time we sync the log.
|
|
|
|
|
* if the system supports it, the log update should be
|
|
|
|
|
* data only
|
|
|
|
|
*/
|
|
|
|
|
if (file->stream && fdatasync(fileno(file->stream))) {
|
|
|
|
|
#else
|
2017-12-07 14:33:28 -08:00
|
|
|
|
if (file->stream && fsync(fileno(file->stream))) {
|
2020-04-21 09:23:57 +01:00
|
|
|
|
#endif
|
2017-10-06 15:15:31 -07:00
|
|
|
|
return ovsdb_io_error(errno, "%s: fsync failed", file->display_name);
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
2011-05-04 13:49:42 -07:00
|
|
|
|
return NULL;
|
2009-11-04 15:11:44 -08:00
|
|
|
|
}
|
2009-11-13 13:23:35 -08:00
|
|
|
|
|
2017-12-24 17:52:34 -08:00
|
|
|
|
/* Sets the current position in 'log' as the "base", that is, the initial size
|
|
|
|
|
* of the log that ovsdb_log_grew_lots() uses to determine whether the log has
|
|
|
|
|
* grown enough to make compacting worthwhile. */
|
|
|
|
|
void
|
|
|
|
|
ovsdb_log_mark_base(struct ovsdb_log *log)
|
2010-03-18 11:24:55 -07:00
|
|
|
|
{
|
2017-12-24 17:52:34 -08:00
|
|
|
|
log->base = log->offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns true if 'log' has grown enough above the base that it's worthwhile
|
|
|
|
|
* to compact it, false otherwise. */
|
|
|
|
|
bool
|
|
|
|
|
ovsdb_log_grew_lots(const struct ovsdb_log *log)
|
|
|
|
|
{
|
2018-03-08 23:20:56 +01:00
|
|
|
|
return log->offset > 10 * 1024 * 1024 && log->offset / 2 > log->base;
|
2010-03-18 11:24:55 -07:00
|
|
|
|
}
|
2017-12-07 14:33:28 -08:00
|
|
|
|
|
|
|
|
|
/* Attempts to atomically replace the contents of 'log', on disk, by the 'n'
|
|
|
|
|
* entries in 'entries'. If successful, returns NULL, otherwise returns an
|
|
|
|
|
* error (which the caller must eventually free).
|
|
|
|
|
*
|
|
|
|
|
* If successful, 'log' will be in write mode at the end of the log. */
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
ovsdb_log_replace(struct ovsdb_log *log, struct json **entries, size_t n)
|
|
|
|
|
{
|
|
|
|
|
struct ovsdb_error *error;
|
|
|
|
|
struct ovsdb_log *new;
|
|
|
|
|
|
|
|
|
|
error = ovsdb_log_replace_start(log, &new);
|
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
|
error = ovsdb_log_write(new, entries[i]);
|
|
|
|
|
if (error) {
|
|
|
|
|
ovsdb_log_replace_abort(new);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-12-24 17:52:34 -08:00
|
|
|
|
ovsdb_log_mark_base(new);
|
2017-12-07 14:33:28 -08:00
|
|
|
|
|
|
|
|
|
return ovsdb_log_replace_commit(log, new);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
ovsdb_log_replace_start(struct ovsdb_log *old,
|
|
|
|
|
struct ovsdb_log **newp)
|
|
|
|
|
{
|
|
|
|
|
/* If old->name is a symlink, then we want the new file to be in the same
|
|
|
|
|
* directory as the symlink's referent. */
|
|
|
|
|
char *deref_name = follow_symlinks(old->name);
|
|
|
|
|
char *tmp_name = xasprintf("%s.tmp", deref_name);
|
|
|
|
|
free(deref_name);
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error *error;
|
|
|
|
|
|
|
|
|
|
ovs_assert(old->lockfile);
|
|
|
|
|
|
|
|
|
|
/* Remove temporary file. (It might not exist.) */
|
|
|
|
|
if (unlink(tmp_name) < 0 && errno != ENOENT) {
|
|
|
|
|
error = ovsdb_io_error(errno, "failed to remove %s", tmp_name);
|
|
|
|
|
free(tmp_name);
|
|
|
|
|
*newp = NULL;
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Create temporary file. */
|
|
|
|
|
error = ovsdb_log_open(tmp_name, old->magic, OVSDB_LOG_CREATE_EXCL,
|
|
|
|
|
false, newp);
|
|
|
|
|
free(tmp_name);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Rename 'old' to 'new', replacing 'new' if it exists. Returns NULL if
|
|
|
|
|
* successful, otherwise an ovsdb_error that the caller must destroy. */
|
|
|
|
|
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
ovsdb_rename(const char *old, const char *new)
|
|
|
|
|
{
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
/* Avoid rename() because it fails if the destination exists. */
|
|
|
|
|
int error = (MoveFileEx(old, new, MOVEFILE_REPLACE_EXISTING
|
|
|
|
|
| MOVEFILE_WRITE_THROUGH | MOVEFILE_COPY_ALLOWED)
|
|
|
|
|
? 0 : EACCES);
|
|
|
|
|
#else
|
|
|
|
|
int error = rename(old, new) ? errno : 0;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return (error
|
|
|
|
|
? ovsdb_io_error(error, "failed to rename \"%s\" to \"%s\"",
|
|
|
|
|
old, new)
|
|
|
|
|
: NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
ovsdb_log_replace_commit(struct ovsdb_log *old, struct ovsdb_log *new)
|
|
|
|
|
{
|
2017-12-07 16:01:01 -08:00
|
|
|
|
struct ovsdb_error *error = ovsdb_log_commit_block(new);
|
2017-12-07 14:33:28 -08:00
|
|
|
|
if (error) {
|
|
|
|
|
ovsdb_log_replace_abort(new);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Replace original file by the temporary file.
|
|
|
|
|
*
|
|
|
|
|
* We support two strategies:
|
|
|
|
|
*
|
|
|
|
|
* - The preferred strategy is to rename the temporary file over the
|
|
|
|
|
* original one in-place, then close the original one. This works on
|
|
|
|
|
* Unix-like systems. It does not work on Windows, which does not
|
|
|
|
|
* allow open files to be renamed. The approach has the advantage
|
|
|
|
|
* that, at any point, we can drop back to something that already
|
|
|
|
|
* works.
|
|
|
|
|
*
|
|
|
|
|
* - Alternatively, we can close both files, rename, then open the new
|
|
|
|
|
* file (which now has the original name). This works on all
|
|
|
|
|
* systems, but if reopening the file fails then 'old' is broken.
|
|
|
|
|
*
|
|
|
|
|
* We make the strategy a variable instead of an #ifdef to make it easier
|
|
|
|
|
* to test both strategies on Unix-like systems, and to make the code
|
|
|
|
|
* easier to read. */
|
|
|
|
|
if (!rename_open_files) {
|
|
|
|
|
fclose(old->stream);
|
|
|
|
|
old->stream = NULL;
|
|
|
|
|
|
|
|
|
|
fclose(new->stream);
|
|
|
|
|
new->stream = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Rename 'old' to 'new'. We dereference the old name because, if it is a
|
|
|
|
|
* symlink, we want to replace the referent of the symlink instead of the
|
|
|
|
|
* symlink itself. */
|
|
|
|
|
char *deref_name = follow_symlinks(old->name);
|
|
|
|
|
error = ovsdb_rename(new->name, deref_name);
|
|
|
|
|
free(deref_name);
|
|
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
|
ovsdb_log_replace_abort(new);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
if (rename_open_files) {
|
|
|
|
|
fsync_parent_dir(old->name);
|
|
|
|
|
fclose(old->stream);
|
|
|
|
|
old->stream = new->stream;
|
|
|
|
|
new->stream = NULL;
|
|
|
|
|
} else {
|
|
|
|
|
old->stream = fopen(old->name, "r+b");
|
|
|
|
|
if (!old->stream) {
|
|
|
|
|
old->error = ovsdb_io_error(errno, "%s: could not reopen log",
|
|
|
|
|
old->name);
|
|
|
|
|
old->state = OVSDB_LOG_BROKEN;
|
|
|
|
|
return ovsdb_error_clone(old->error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (fseek(old->stream, new->offset, SEEK_SET)) {
|
|
|
|
|
old->error = ovsdb_io_error(errno, "%s: seek failed", old->name);
|
|
|
|
|
old->state = OVSDB_LOG_BROKEN;
|
|
|
|
|
return ovsdb_error_clone(old->error);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Replace 'old' by 'new' in memory.
|
|
|
|
|
*
|
|
|
|
|
* 'old' transitions to OVSDB_LOG_WRITE (it was probably in that mode
|
|
|
|
|
* anyway). */
|
|
|
|
|
old->state = OVSDB_LOG_WRITE;
|
|
|
|
|
ovsdb_error_destroy(old->error);
|
|
|
|
|
old->error = NULL;
|
|
|
|
|
/* prev_offset only matters for OVSDB_LOG_READ. */
|
2017-12-07 16:01:01 -08:00
|
|
|
|
if (old->afsync) {
|
|
|
|
|
uint64_t ticket = afsync_destroy(old->afsync);
|
|
|
|
|
old->afsync = afsync_create(fileno(old->stream), ticket + 1);
|
|
|
|
|
}
|
2017-12-07 14:33:28 -08:00
|
|
|
|
old->offset = new->offset;
|
|
|
|
|
/* Keep old->name. */
|
|
|
|
|
free(old->magic);
|
|
|
|
|
old->magic = new->magic;
|
|
|
|
|
new->magic = NULL;
|
|
|
|
|
/* Keep old->lockfile. */
|
2017-12-24 17:52:34 -08:00
|
|
|
|
old->base = new->base;
|
|
|
|
|
|
2017-12-07 14:33:28 -08:00
|
|
|
|
/* Free 'new'. */
|
|
|
|
|
ovsdb_log_close(new);
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
ovsdb_log_replace_abort(struct ovsdb_log *new)
|
|
|
|
|
{
|
|
|
|
|
if (new) {
|
|
|
|
|
/* Unlink the new file, but only after we close it (because Windows
|
|
|
|
|
* does not allow removing an open file). */
|
|
|
|
|
char *name = xstrdup(new->name);
|
|
|
|
|
ovsdb_log_close(new);
|
|
|
|
|
unlink(name);
|
|
|
|
|
free(name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
ovsdb_log_disable_renaming_open_files(void)
|
|
|
|
|
{
|
|
|
|
|
rename_open_files = false;
|
|
|
|
|
}
|
2017-12-07 16:01:01 -08:00
|
|
|
|
|
|
|
|
|
struct afsync {
|
|
|
|
|
pthread_t thread;
|
|
|
|
|
atomic_uint64_t cur, next;
|
|
|
|
|
struct seq *request, *complete;
|
|
|
|
|
int fd;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void *
|
|
|
|
|
afsync_thread(void *afsync_)
|
|
|
|
|
{
|
|
|
|
|
struct afsync *afsync = afsync_;
|
|
|
|
|
uint64_t cur = 0;
|
|
|
|
|
for (;;) {
|
|
|
|
|
ovsrcu_quiesce_start();
|
|
|
|
|
|
|
|
|
|
uint64_t request_seq = seq_read(afsync->request);
|
|
|
|
|
|
|
|
|
|
uint64_t next;
|
|
|
|
|
atomic_read_explicit(&afsync->next, &next, memory_order_acquire);
|
|
|
|
|
if (next == UINT64_MAX) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (cur != next && afsync->fd != -1) {
|
|
|
|
|
int error = fsync(afsync->fd) ? errno : 0;
|
|
|
|
|
if (!error) {
|
|
|
|
|
cur = next;
|
|
|
|
|
atomic_store_explicit(&afsync->cur, cur, memory_order_release);
|
|
|
|
|
seq_change(afsync->complete);
|
|
|
|
|
} else {
|
|
|
|
|
VLOG_WARN("fsync failed (%s)", ovs_strerror(error));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
seq_wait(afsync->request, request_seq);
|
|
|
|
|
poll_block();
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct afsync *
|
|
|
|
|
afsync_create(int fd, uint64_t initial_ticket)
|
|
|
|
|
{
|
|
|
|
|
struct afsync *afsync = xzalloc(sizeof *afsync);
|
|
|
|
|
atomic_init(&afsync->cur, initial_ticket);
|
|
|
|
|
atomic_init(&afsync->next, initial_ticket);
|
|
|
|
|
afsync->request = seq_create();
|
|
|
|
|
afsync->complete = seq_create();
|
|
|
|
|
afsync->thread = ovs_thread_create("log_fsync", afsync_thread, afsync);
|
|
|
|
|
afsync->fd = fd;
|
|
|
|
|
return afsync;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint64_t
|
|
|
|
|
afsync_destroy(struct afsync *afsync)
|
|
|
|
|
{
|
|
|
|
|
if (!afsync) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t next;
|
|
|
|
|
atomic_read(&afsync->next, &next);
|
|
|
|
|
atomic_store(&afsync->next, UINT64_MAX);
|
|
|
|
|
seq_change(afsync->request);
|
|
|
|
|
xpthread_join(afsync->thread, NULL);
|
|
|
|
|
|
|
|
|
|
seq_destroy(afsync->request);
|
|
|
|
|
seq_destroy(afsync->complete);
|
|
|
|
|
|
|
|
|
|
free(afsync);
|
|
|
|
|
|
|
|
|
|
return next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct afsync *
|
|
|
|
|
ovsdb_log_get_afsync(struct ovsdb_log *log)
|
|
|
|
|
{
|
|
|
|
|
if (!log->afsync) {
|
|
|
|
|
log->afsync = afsync_create(log->stream ? fileno(log->stream) : -1, 0);
|
|
|
|
|
}
|
|
|
|
|
return log->afsync;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Starts committing 'log' to disk. Returns a ticket that can be passed to
|
|
|
|
|
* ovsdb_log_commit_wait() or compared against the return value of
|
|
|
|
|
* ovsdb_log_commit_progress() later. */
|
|
|
|
|
uint64_t
|
|
|
|
|
ovsdb_log_commit_start(struct ovsdb_log *log)
|
|
|
|
|
{
|
|
|
|
|
struct afsync *afsync = ovsdb_log_get_afsync(log);
|
|
|
|
|
|
|
|
|
|
uint64_t orig;
|
|
|
|
|
atomic_add_explicit(&afsync->next, 1, &orig, memory_order_acq_rel);
|
|
|
|
|
|
|
|
|
|
seq_change(afsync->request);
|
|
|
|
|
|
|
|
|
|
return orig + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns a ticket value that represents the current progress of commits to
|
|
|
|
|
* 'log'. Suppose that some call to ovsdb_log_commit_start() returns X and any
|
|
|
|
|
* call ovsdb_log_commit_progress() returns Y, for the same 'log'. Then commit
|
|
|
|
|
* X is complete if and only if X <= Y. */
|
|
|
|
|
uint64_t
|
|
|
|
|
ovsdb_log_commit_progress(struct ovsdb_log *log)
|
|
|
|
|
{
|
|
|
|
|
struct afsync *afsync = ovsdb_log_get_afsync(log);
|
|
|
|
|
uint64_t cur;
|
|
|
|
|
atomic_read_explicit(&afsync->cur, &cur, memory_order_acquire);
|
|
|
|
|
return cur;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Causes poll_block() to wake up if and when ovsdb_log_commit_progress(log)
|
|
|
|
|
* would return at least 'goal'. */
|
|
|
|
|
void
|
|
|
|
|
ovsdb_log_commit_wait(struct ovsdb_log *log, uint64_t goal)
|
|
|
|
|
{
|
|
|
|
|
struct afsync *afsync = ovsdb_log_get_afsync(log);
|
|
|
|
|
uint64_t complete = seq_read(afsync->complete);
|
|
|
|
|
uint64_t cur = ovsdb_log_commit_progress(log);
|
|
|
|
|
if (cur < goal) {
|
|
|
|
|
seq_wait(afsync->complete, complete);
|
|
|
|
|
} else {
|
|
|
|
|
poll_immediate_wake();
|
|
|
|
|
}
|
|
|
|
|
}
|