1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2016 Nicira, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
30 #include "ovsdb-error.h"
32 #include "socket-util.h"
35 #include "transaction.h"
38 #include "openvswitch/vlog.h"
40 VLOG_DEFINE_THIS_MODULE(ovsdb_file);
42 /* Minimum number of milliseconds between database compactions. */
43 #define COMPACT_MIN_MSEC (10 * 60 * 1000) /* 10 minutes. */
45 /* Minimum number of milliseconds between trying to compact the database if
46 * compacting fails. */
47 #define COMPACT_RETRY_MSEC (60 * 1000) /* 1 minute. */
49 /* A transaction being converted to JSON for writing to a file. */
50 struct ovsdb_file_txn {
51 struct json *json; /* JSON for the whole transaction. */
52 struct json *table_json; /* JSON for 'table''s transaction. */
53 struct ovsdb_table *table; /* Table described in 'table_json'. */
56 static void ovsdb_file_txn_init(struct ovsdb_file_txn *);
57 static void ovsdb_file_txn_add_row(struct ovsdb_file_txn *,
58 const struct ovsdb_row *old,
59 const struct ovsdb_row *new,
60 const unsigned long int *changed);
61 static struct ovsdb_error *ovsdb_file_txn_commit(struct json *,
66 static struct ovsdb_error *ovsdb_file_open__(const char *file_name,
67 const struct ovsdb_schema *,
68 bool read_only, struct ovsdb **,
69 struct ovsdb_file **);
70 static struct ovsdb_error *ovsdb_file_txn_from_json(
71 struct ovsdb *, const struct json *, bool converting, struct ovsdb_txn **);
72 static struct ovsdb_error *ovsdb_file_create(struct ovsdb *,
74 const char *file_name,
75 unsigned int n_transactions,
77 struct ovsdb_file **filep);
79 /* Opens database 'file_name' and stores a pointer to the new database in
80 * '*dbp'. If 'read_only' is false, then the database will be locked and
81 * changes to the database will be written to disk. If 'read_only' is true,
82 * the database will not be locked and changes to the database will persist
83 * only as long as the "struct ovsdb".
85 * If 'filep' is nonnull and 'read_only' is false, then on success sets
86 * '*filep' to an ovsdb_file that represents the open file. This ovsdb_file
87 * persists until '*dbp' is destroyed.
89 * On success, returns NULL. On failure, returns an ovsdb_error (which the
90 * caller must destroy) and sets '*dbp' and '*filep' to NULL. */
92 ovsdb_file_open(const char *file_name, bool read_only,
93 struct ovsdb **dbp, struct ovsdb_file **filep)
95 return ovsdb_file_open__(file_name, NULL, read_only, dbp, filep);
98 /* Opens database 'file_name' with an alternate schema. The specified 'schema'
99 * is used to interpret the data in 'file_name', ignoring the schema actually
100 * stored in the file. Data in the file for tables or columns that do not
101 * exist in 'schema' are ignored, but the ovsdb file format must otherwise be
102 * observed, including column constraints.
104 * This function can be useful for upgrading or downgrading databases to
105 * "almost-compatible" formats.
107 * The database will not be locked. Changes to the database will persist only
108 * as long as the "struct ovsdb".
110 * On success, stores a pointer to the new database in '*dbp' and returns a
111 * null pointer. On failure, returns an ovsdb_error (which the caller must
112 * destroy) and sets '*dbp' to NULL. */
114 ovsdb_file_open_as_schema(const char *file_name,
115 const struct ovsdb_schema *schema,
118 return ovsdb_file_open__(file_name, schema, true, dbp, NULL);
121 static struct ovsdb_error *
122 ovsdb_file_open_log(const char *file_name, enum ovsdb_log_open_mode open_mode,
123 struct ovsdb_log **logp, struct ovsdb_schema **schemap)
125 struct ovsdb_schema *schema = NULL;
126 struct ovsdb_log *log = NULL;
127 struct ovsdb_error *error;
128 struct json *json = NULL;
130 ovs_assert(logp || schemap);
132 error = ovsdb_log_open(file_name, open_mode, -1, &log);
137 error = ovsdb_log_read(log, &json);
141 error = ovsdb_io_error(EOF, "%s: database file contains no schema",
147 error = ovsdb_schema_from_json(json, &schema);
149 error = ovsdb_wrap_error(error,
150 "failed to parse \"%s\" as ovsdb schema",
160 ovsdb_log_close(log);
168 ovsdb_log_close(log);
179 static struct ovsdb_error *
180 ovsdb_file_open__(const char *file_name,
181 const struct ovsdb_schema *alternate_schema,
182 bool read_only, struct ovsdb **dbp,
183 struct ovsdb_file **filep)
185 enum ovsdb_log_open_mode open_mode;
186 struct ovsdb_schema *schema = NULL;
187 struct ovsdb_error *error;
188 struct ovsdb_log *log;
190 struct ovsdb *db = NULL;
192 /* In read-only mode there is no ovsdb_file so 'filep' must be null. */
193 ovs_assert(!(read_only && filep));
195 open_mode = read_only ? OVSDB_LOG_READ_ONLY : OVSDB_LOG_READ_WRITE;
196 error = ovsdb_file_open_log(file_name, open_mode, &log,
197 alternate_schema ? NULL : &schema);
202 db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema));
204 /* When a log gets big, we compact it into a new log that initially has
205 * only a single transaction that represents the entire state of the
206 * database. Thus, we consider the first transaction in the database to be
207 * the snapshot. We measure its size to later influence the minimum log
208 * size before compacting again.
210 * The schema precedes the snapshot in the log; we could compensate for its
211 * size, but it's just not that important. */
212 off_t snapshot_size = 0;
213 unsigned int n_transactions = 0;
214 while ((error = ovsdb_log_read(log, &json)) == NULL && json) {
215 struct ovsdb_txn *txn;
217 error = ovsdb_file_txn_from_json(db, json, alternate_schema != NULL,
221 ovsdb_log_unread(log);
226 error = ovsdb_txn_commit(txn, false);
228 ovsdb_log_unread(log);
232 if (n_transactions == 1) {
233 snapshot_size = ovsdb_log_get_offset(log);
237 /* Log error but otherwise ignore it. Probably the database just got
238 * truncated due to power failure etc. and we should use its current
240 char *msg = ovsdb_error_to_string(error);
244 ovsdb_error_destroy(error);
248 struct ovsdb_file *file;
250 error = ovsdb_file_create(db, log, file_name, n_transactions,
251 snapshot_size, &file);
259 ovsdb_log_close(log);
271 ovsdb_log_close(log);
275 static struct ovsdb_error *
276 ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting,
277 const struct json *json)
279 struct ovsdb_table_schema *schema = row->table->schema;
280 struct ovsdb_error *error;
281 struct shash_node *node;
283 if (json->type != JSON_OBJECT) {
284 return ovsdb_syntax_error(json, NULL, "row must be JSON object");
287 SHASH_FOR_EACH (node, json_object(json)) {
288 const char *column_name = node->name;
289 const struct ovsdb_column *column;
290 struct ovsdb_datum datum;
292 column = ovsdb_table_schema_get_column(schema, column_name);
297 return ovsdb_syntax_error(json, "unknown column",
298 "No column %s in table %s.",
299 column_name, schema->name);
302 error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL);
306 ovsdb_datum_swap(&row->fields[column->index], &datum);
307 ovsdb_datum_destroy(&datum, &column->type);
313 static struct ovsdb_error *
314 ovsdb_file_txn_row_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table,
316 const struct uuid *row_uuid, struct json *json)
318 const struct ovsdb_row *row = ovsdb_table_get_row(table, row_uuid);
319 if (json->type == JSON_NULL) {
321 return ovsdb_syntax_error(NULL, NULL, "transaction deletes "
322 "row "UUID_FMT" that does not exist",
323 UUID_ARGS(row_uuid));
325 ovsdb_txn_row_delete(txn, row);
328 return ovsdb_file_update_row_from_json(ovsdb_txn_row_modify(txn, row),
331 struct ovsdb_error *error;
332 struct ovsdb_row *new;
334 new = ovsdb_row_create(table);
335 *ovsdb_row_get_uuid_rw(new) = *row_uuid;
336 error = ovsdb_file_update_row_from_json(new, converting, json);
338 ovsdb_row_destroy(new);
340 ovsdb_txn_row_insert(txn, new);
346 static struct ovsdb_error *
347 ovsdb_file_txn_table_from_json(struct ovsdb_txn *txn,
348 struct ovsdb_table *table,
349 bool converting, struct json *json)
351 struct shash_node *node;
353 if (json->type != JSON_OBJECT) {
354 return ovsdb_syntax_error(json, NULL, "object expected");
357 SHASH_FOR_EACH (node, json->u.object) {
358 const char *uuid_string = node->name;
359 struct json *txn_row_json = node->data;
360 struct ovsdb_error *error;
361 struct uuid row_uuid;
363 if (!uuid_from_string(&row_uuid, uuid_string)) {
364 return ovsdb_syntax_error(json, NULL, "\"%s\" is not a valid UUID",
368 error = ovsdb_file_txn_row_from_json(txn, table, converting,
369 &row_uuid, txn_row_json);
378 /* Converts 'json' to an ovsdb_txn for 'db', storing the new transaction in
379 * '*txnp'. Returns NULL if successful, otherwise an error.
381 * If 'converting' is true, then unknown table and column names are ignored
382 * (which can ease upgrading and downgrading schemas); otherwise, they are
383 * treated as errors. */
384 static struct ovsdb_error *
385 ovsdb_file_txn_from_json(struct ovsdb *db, const struct json *json,
386 bool converting, struct ovsdb_txn **txnp)
388 struct ovsdb_error *error;
389 struct shash_node *node;
390 struct ovsdb_txn *txn;
394 if (json->type != JSON_OBJECT) {
395 return ovsdb_syntax_error(json, NULL, "object expected");
398 txn = ovsdb_txn_create(db);
399 SHASH_FOR_EACH (node, json->u.object) {
400 const char *table_name = node->name;
401 struct json *node_json = node->data;
402 struct ovsdb_table *table;
404 table = shash_find_data(&db->tables, table_name);
406 if (!strcmp(table_name, "_date")
407 && node_json->type == JSON_INTEGER) {
409 } else if (!strcmp(table_name, "_comment") || converting) {
413 error = ovsdb_syntax_error(json, "unknown table",
414 "No table named %s.", table_name);
418 error = ovsdb_file_txn_table_from_json(txn, table, converting,
428 ovsdb_txn_abort(txn);
432 static struct ovsdb_error *
433 ovsdb_file_save_copy__(const char *file_name, int locking,
434 const char *comment, const struct ovsdb *db,
435 struct ovsdb_log **logp)
437 const struct shash_node *node;
438 struct ovsdb_file_txn ftxn;
439 struct ovsdb_error *error;
440 struct ovsdb_log *log;
443 error = ovsdb_log_open(file_name, OVSDB_LOG_CREATE, locking, &log);
449 json = ovsdb_schema_to_json(db->schema);
450 error = ovsdb_log_write(log, json);
457 ovsdb_file_txn_init(&ftxn);
458 SHASH_FOR_EACH (node, &db->tables) {
459 const struct ovsdb_table *table = node->data;
460 const struct ovsdb_row *row;
462 HMAP_FOR_EACH (row, hmap_node, &table->rows) {
463 ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL);
466 error = ovsdb_file_txn_commit(ftxn.json, comment, true, log);
477 ovsdb_log_close(log);
484 /* Saves a snapshot of 'db''s current contents as 'file_name'. If 'comment' is
485 * nonnull, then it is added along with the data contents and can be viewed
486 * with "ovsdb-tool show-log".
488 * 'locking' is passed along to ovsdb_log_open() untouched. */
490 ovsdb_file_save_copy(const char *file_name, int locking,
491 const char *comment, const struct ovsdb *db)
493 return ovsdb_file_save_copy__(file_name, locking, comment, db, NULL);
496 /* Opens database 'file_name', reads its schema, and closes it. On success,
497 * stores the schema into '*schemap' and returns NULL; the caller then owns the
498 * schema. On failure, returns an ovsdb_error (which the caller must destroy)
499 * and sets '*dbp' to NULL. */
501 ovsdb_file_read_schema(const char *file_name, struct ovsdb_schema **schemap)
503 ovs_assert(schemap != NULL);
504 return ovsdb_file_open_log(file_name, OVSDB_LOG_READ_ONLY, NULL, schemap);
507 /* Replica implementation. */
510 struct ovsdb_replica replica;
512 struct ovsdb_log *log;
514 long long int last_compact;
515 long long int next_compact;
516 unsigned int n_transactions;
520 static const struct ovsdb_replica_class ovsdb_file_class;
522 static struct ovsdb_error *
523 ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
524 const char *file_name,
525 unsigned int n_transactions, off_t snapshot_size,
526 struct ovsdb_file **filep)
528 struct ovsdb_file *file;
532 /* Use the absolute name of the file because ovsdb-server opens its
533 * database before daemonize() chdirs to "/". */
534 deref_name = follow_symlinks(file_name);
535 abs_name = abs_file_name(NULL, deref_name);
539 return ovsdb_io_error(0, "could not determine current "
540 "working directory");
543 file = xmalloc(sizeof *file);
544 ovsdb_replica_init(&file->replica, &ovsdb_file_class);
547 file->file_name = abs_name;
548 file->last_compact = time_msec();
549 file->next_compact = file->last_compact + COMPACT_MIN_MSEC;
550 file->snapshot_size = snapshot_size;
551 file->n_transactions = n_transactions;
552 ovsdb_add_replica(db, &file->replica);
558 static struct ovsdb_file *
559 ovsdb_file_cast(struct ovsdb_replica *replica)
561 ovs_assert(replica->class == &ovsdb_file_class);
562 return CONTAINER_OF(replica, struct ovsdb_file, replica);
566 ovsdb_file_change_cb(const struct ovsdb_row *old,
567 const struct ovsdb_row *new,
568 const unsigned long int *changed,
571 struct ovsdb_file_txn *ftxn = ftxn_;
572 ovsdb_file_txn_add_row(ftxn, old, new, changed);
576 static struct ovsdb_error *
577 ovsdb_file_commit(struct ovsdb_replica *replica,
578 const struct ovsdb_txn *txn, bool durable)
580 struct ovsdb_file *file = ovsdb_file_cast(replica);
581 struct ovsdb_file_txn ftxn;
582 struct ovsdb_error *error;
584 ovsdb_file_txn_init(&ftxn);
585 ovsdb_txn_for_each_change(txn, ovsdb_file_change_cb, &ftxn);
587 /* Nothing to commit. */
591 error = ovsdb_file_txn_commit(ftxn.json, ovsdb_txn_get_comment(txn),
596 file->n_transactions++;
598 /* If it has been at least COMPACT_MIN_MSEC ms since the last time we
599 * compacted (or at least COMPACT_RETRY_MSEC ms since the last time we
600 * tried), and if there are at least 100 transactions in the database, and
601 * if the database is at least 10 MB, and the database is at least 4x the
602 * size of the previous snapshot, then compact the database. */
603 off_t log_size = ovsdb_log_get_offset(file->log);
604 if (time_msec() >= file->next_compact
605 && file->n_transactions >= 100
606 && log_size >= 10 * 1024 * 1024
607 && log_size / 4 >= file->snapshot_size)
609 error = ovsdb_file_compact(file);
611 char *s = ovsdb_error_to_string(error);
612 ovsdb_error_destroy(error);
613 VLOG_WARN("%s: compacting database failed (%s), retrying in "
615 file->file_name, s, COMPACT_RETRY_MSEC / 1000);
618 file->next_compact = time_msec() + COMPACT_RETRY_MSEC;
626 ovsdb_file_compact(struct ovsdb_file *file)
628 struct ovsdb_log *new_log = NULL;
629 struct lockfile *tmp_lock = NULL;
630 struct ovsdb_error *error;
631 char *tmp_name = NULL;
632 char *comment = NULL;
635 comment = xasprintf("compacting database online "
636 "(%.3f seconds old, %u transactions, %llu bytes)",
637 (time_wall_msec() - file->last_compact) / 1000.0,
638 file->n_transactions,
639 (unsigned long long) ovsdb_log_get_offset(file->log));
640 VLOG_INFO("%s: %s", file->file_name, comment);
642 /* Commit the old version, so that we can be assured that we'll eventually
643 * have either the old or the new version. */
644 error = ovsdb_log_commit(file->log);
649 /* Lock temporary file. */
650 tmp_name = xasprintf("%s.tmp", file->file_name);
651 retval = lockfile_lock(tmp_name, &tmp_lock);
653 error = ovsdb_io_error(retval, "could not get lock on %s", tmp_name);
657 /* Remove temporary file. (It might not exist.) */
658 if (unlink(tmp_name) < 0 && errno != ENOENT) {
659 error = ovsdb_io_error(errno, "failed to remove %s", tmp_name);
664 error = ovsdb_file_save_copy__(tmp_name, false, comment, file->db,
670 /* Replace original by temporary. */
671 if (rename(tmp_name, file->file_name)) {
672 error = ovsdb_io_error(errno, "failed to rename \"%s\" to \"%s\"",
673 tmp_name, file->file_name);
676 fsync_parent_dir(file->file_name);
680 ovsdb_log_close(file->log);
682 file->last_compact = time_msec();
683 file->next_compact = file->last_compact + COMPACT_MIN_MSEC;
684 file->n_transactions = 1;
686 ovsdb_log_close(new_log);
692 lockfile_unlock(tmp_lock);
700 ovsdb_file_destroy(struct ovsdb_replica *replica)
702 struct ovsdb_file *file = ovsdb_file_cast(replica);
704 ovsdb_log_close(file->log);
705 free(file->file_name);
709 static const struct ovsdb_replica_class ovsdb_file_class = {
715 ovsdb_file_txn_init(struct ovsdb_file_txn *ftxn)
718 ftxn->table_json = NULL;
723 ovsdb_file_txn_add_row(struct ovsdb_file_txn *ftxn,
724 const struct ovsdb_row *old,
725 const struct ovsdb_row *new,
726 const unsigned long int *changed)
731 row = json_null_create();
733 struct shash_node *node;
735 row = old ? NULL : json_object_create();
736 SHASH_FOR_EACH (node, &new->table->schema->columns) {
737 const struct ovsdb_column *column = node->data;
738 const struct ovsdb_type *type = &column->type;
739 unsigned int idx = column->index;
741 if (idx != OVSDB_COL_UUID && column->persistent
743 ? bitmap_is_set(changed, idx)
744 : !ovsdb_datum_is_default(&new->fields[idx], type)))
747 row = json_object_create();
749 json_object_put(row, column->name,
750 ovsdb_datum_to_json(&new->fields[idx], type));
756 struct ovsdb_table *table = new ? new->table : old->table;
757 char uuid[UUID_LEN + 1];
759 if (table != ftxn->table) {
760 /* Create JSON object for transaction overall. */
762 ftxn->json = json_object_create();
765 /* Create JSON object for transaction on this table. */
766 ftxn->table_json = json_object_create();
768 json_object_put(ftxn->json, table->schema->name, ftxn->table_json);
771 /* Add row to transaction for this table. */
772 snprintf(uuid, sizeof uuid,
773 UUID_FMT, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old)));
774 json_object_put(ftxn->table_json, uuid, row);
778 static struct ovsdb_error *
779 ovsdb_file_txn_commit(struct json *json, const char *comment,
780 bool durable, struct ovsdb_log *log)
782 struct ovsdb_error *error;
785 json = json_object_create();
788 json_object_put_string(json, "_comment", comment);
790 json_object_put(json, "_date", json_integer_create(time_wall_msec()));
792 error = ovsdb_log_write(log, json);
795 return ovsdb_wrap_error(error, "writing transaction failed");
799 error = ovsdb_log_commit(log);
801 return ovsdb_wrap_error(error, "committing transaction failed");