2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 09:58:01 +00:00
ovs/lib/json.c

1895 lines
46 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009-2012, 2014-2017 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "openvswitch/json.h"
#include <ctype.h>
#include <errno.h>
#include <float.h>
#include <limits.h>
#include <string.h>
#include "cooperative-multitasking.h"
#include "openvswitch/dynamic-string.h"
#include "hash.h"
#include "json.h"
#include "openvswitch/shash.h"
#include "unicode.h"
#include "util.h"
#include "uuid.h"
/* Non-public JSSF_* flags. Must not overlap with public ones defined
* in include/openvswitch/json.h. */
enum {
JSSF_YIELD = 1 << 7,
};
/* The type of a JSON token. */
enum json_token_type {
T_EOF = 0,
T_BEGIN_ARRAY = '[',
T_END_ARRAY = ']',
T_BEGIN_OBJECT = '{',
T_END_OBJECT = '}',
T_NAME_SEPARATOR = ':',
T_VALUE_SEPARATOR = ',',
T_FALSE = UCHAR_MAX + 1,
T_NULL,
T_TRUE,
T_INTEGER,
T_REAL,
T_STRING
};
/* A JSON token.
*
* RFC 4627 doesn't define a lexical structure for JSON but I believe this to
* be compliant with the standard.
*/
struct json_token {
enum json_token_type type;
union {
double real;
long long int integer;
const char *string;
};
};
enum json_lex_state {
JSON_LEX_START, /* Not inside a token. */
JSON_LEX_NUMBER, /* Reading a number. */
JSON_LEX_KEYWORD, /* Reading a keyword. */
JSON_LEX_STRING, /* Reading a quoted string. */
JSON_LEX_ESCAPE /* In a quoted string just after a "\". */
};
enum json_parse_state {
JSON_PARSE_START, /* Beginning of input. */
JSON_PARSE_END, /* End of input. */
/* Objects. */
JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */
JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */
JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */
JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */
JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */
/* Arrays. */
JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */
JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */
JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */
};
struct json_parser_node {
struct json *json;
};
/* A JSON parser. */
struct json_parser {
int flags;
/* Lexical analysis. */
enum json_lex_state lex_state;
struct ds buffer; /* Buffer for accumulating token text. */
2009-12-03 16:08:34 -08:00
int line_number;
int column_number;
int byte_number;
/* Parsing. */
enum json_parse_state parse_state;
#define JSON_MAX_HEIGHT 1000
struct json_parser_node *stack;
size_t height, allocated_height;
char *member_name;
/* Parse status. */
bool done;
char *error; /* Error message, if any, null if none yet. */
};
static struct json *json_create(enum json_type type);
static void json_parser_input(struct json_parser *, struct json_token *);
static void json_error(struct json_parser *p, const char *format, ...)
OVS_PRINTF_FORMAT(2, 3);
const char *
json_type_to_string(enum json_type type)
{
switch (type) {
case JSON_NULL:
return "null";
case JSON_FALSE:
return "false";
case JSON_TRUE:
return "true";
case JSON_OBJECT:
return "object";
case JSON_ARRAY:
return "array";
case JSON_INTEGER:
case JSON_REAL:
return "number";
case JSON_STRING:
return "string";
case JSON_SERIALIZED_OBJECT:
case JSON_N_TYPES:
default:
return "<invalid>";
}
}
/* Functions for manipulating struct json. */
struct json *
json_null_create(void)
{
return json_create(JSON_NULL);
}
struct json *
json_boolean_create(bool b)
{
return json_create(b ? JSON_TRUE : JSON_FALSE);
}
struct json *
json_string_create_nocopy(char *s)
{
struct json *json = json_create(JSON_STRING);
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
json->storage_type = JSON_STRING_DYNAMIC;
json->str_ptr = s;
return json;
}
struct json *
json_string_create(const char *s)
{
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
struct json *json = json_create(JSON_STRING);
size_t length = strlen(s);
if (length <= JSON_STRING_INLINE_LEN) {
json->storage_type = JSON_STRING_INLINE;
memcpy(json->str, s, length);
json->str[length] = '\0';
} else {
json->storage_type = JSON_STRING_DYNAMIC;
json->str_ptr = xmemdup0(s, length);
}
return json;
}
struct json *json_string_create_uuid(const struct uuid *uuid)
{
return json_string_create_nocopy(uuid_to_string(uuid));
}
struct json *
json_serialized_object_create(const struct json *src)
{
struct json *json = json_create(JSON_SERIALIZED_OBJECT);
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
json->str_ptr = json_to_string(src, JSSF_SORT);
return json;
}
struct json *
json_serialized_object_create_with_yield(const struct json *src)
{
struct json *json = json_create(JSON_SERIALIZED_OBJECT);
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
json->str_ptr = json_to_string(src, JSSF_SORT | JSSF_YIELD);
return json;
}
struct json *
json_array_create_empty(void)
{
struct json *json = json_create(JSON_ARRAY);
json->array.elements = NULL;
json->array.size = 0;
json->array.allocated = 0;
return json;
}
void
json_array_add(struct json *array_, struct json *element)
{
struct json_array *array = &array_->array;
if (array->size >= array->allocated) {
array->elements = x2nrealloc(array->elements, &array->allocated,
sizeof *array->elements);
}
array->elements[array->size++] = element;
}
void
json_array_set(struct json *json, size_t index, struct json *element)
{
ovs_assert(json_array_size(json) > index);
json->array.elements[index] = element;
}
struct json *
json_array_pop(struct json *json)
{
return json->array.size ? json->array.elements[--json->array.size] : NULL;
}
void
json_array_trim(struct json *array_)
{
struct json_array *array = &array_->array;
if (array->size < array->allocated) {
array->allocated = array->size;
array->elements = xrealloc(array->elements,
array->size * sizeof *array->elements);
}
}
struct json *
json_array_create(struct json **elements, size_t n)
{
struct json *json = json_create(JSON_ARRAY);
json->array.elements = elements;
json->array.size = n;
json->array.allocated = n;
return json;
}
struct json *
json_array_create_1(struct json *elem0)
{
struct json **elements = xmalloc(sizeof *elements);
elements[0] = elem0;
return json_array_create(elements, 1);
}
struct json *
json_array_create_2(struct json *elem0, struct json *elem1)
{
struct json **elements = xmalloc(2 * sizeof *elements);
elements[0] = elem0;
elements[1] = elem1;
return json_array_create(elements, 2);
}
struct json *
json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2)
{
struct json **elements = xmalloc(3 * sizeof *elements);
elements[0] = elem0;
elements[1] = elem1;
elements[2] = elem2;
return json_array_create(elements, 3);
}
bool
json_array_contains_string(const struct json *json, const char *str)
{
size_t n = json_array_size(json);
for (size_t i = 0; i < n; i++) {
const struct json *elem = json_array_at(json, i);
if (elem->type == JSON_STRING && !strcmp(json_string(elem), str)) {
return true;
}
}
return false;
}
struct json *
json_object_create(void)
{
struct json *json = json_create(JSON_OBJECT);
json->object = xmalloc(sizeof *json->object);
shash_init(json->object);
return json;
}
struct json *
json_integer_create(long long int integer)
{
struct json *json = json_create(JSON_INTEGER);
json->integer = integer;
return json;
}
struct json *
json_real_create(double real)
{
struct json *json = json_create(JSON_REAL);
json->real = real;
return json;
}
void
json_object_put(struct json *json, const char *name, struct json *value)
{
json_destroy(shash_replace(json->object, name, value));
}
void
json_object_put_nocopy(struct json *json, char *name, struct json *value)
{
json_destroy(shash_replace_nocopy(json->object, name, value));
}
void
json_object_put_string(struct json *json, const char *name, const char *value)
{
json_object_put(json, name, json_string_create(value));
}
void OVS_PRINTF_FORMAT(3, 4)
json_object_put_format(struct json *json,
const char *name, const char *format, ...)
{
va_list args;
va_start(args, format);
json_object_put(json, name,
json_string_create_nocopy(xvasprintf(format, args)));
va_end(args);
}
void json_object_put_uuid(struct json *json,
const char *name, const struct uuid *uuid)
{
json_object_put(json, name, json_string_create_uuid(uuid));
}
const char *
json_string(const struct json *json)
{
ovs_assert(json->type == JSON_STRING);
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
return json->storage_type == JSON_STRING_DYNAMIC
? json->str_ptr : json->str;
}
const char *
json_serialized_object(const struct json *json)
{
ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
return json->str_ptr;
}
size_t
json_array_size(const struct json *json)
{
ovs_assert(json->type == JSON_ARRAY);
return json->array.size;
}
const struct json *
json_array_at(const struct json *json, size_t index)
{
ovs_assert(json->type == JSON_ARRAY);
return (json->array.size > index) ? json->array.elements[index] : NULL;
}
struct shash *
json_object(const struct json *json)
{
ovs_assert(json->type == JSON_OBJECT);
return CONST_CAST(struct shash *, json->object);
}
bool
json_boolean(const struct json *json)
{
ovs_assert(json->type == JSON_TRUE || json->type == JSON_FALSE);
return json->type == JSON_TRUE;
}
double
json_real(const struct json *json)
{
ovs_assert(json->type == JSON_REAL || json->type == JSON_INTEGER);
return json->type == JSON_REAL ? json->real : json->integer;
}
int64_t
json_integer(const struct json *json)
{
ovs_assert(json->type == JSON_INTEGER);
return json->integer;
}
static void json_destroy_object(struct shash *object, bool yield);
static void json_destroy_array(struct json *json, bool yield);
/* Frees 'json' and everything it points to, recursively. */
void
json_destroy__(struct json *json, bool yield)
{
switch (json->type) {
case JSON_OBJECT:
json_destroy_object(json->object, yield);
break;
case JSON_ARRAY:
json_destroy_array(json, yield);
break;
case JSON_STRING:
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
if (json->storage_type == JSON_STRING_DYNAMIC) {
free(json->str_ptr);
}
break;
case JSON_SERIALIZED_OBJECT:
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
free(json->str_ptr);
break;
case JSON_NULL:
case JSON_FALSE:
case JSON_TRUE:
case JSON_INTEGER:
case JSON_REAL:
break;
case JSON_N_TYPES:
OVS_NOT_REACHED();
}
free(json);
}
static void
json_destroy_object(struct shash *object, bool yield)
{
struct shash_node *node;
if (yield) {
cooperative_multitasking_yield();
}
SHASH_FOR_EACH_SAFE (node, object) {
struct json *value = node->data;
if (yield) {
json_destroy_with_yield(value);
} else {
json_destroy(value);
}
shash_delete(object, node);
}
shash_destroy(object);
free(object);
}
static void
json_destroy_array(struct json *json, bool yield)
{
size_t i, n = json_array_size(json);
if (yield) {
cooperative_multitasking_yield();
}
for (i = 0; i < n; i++) {
if (yield) {
json_destroy_with_yield(
CONST_CAST(struct json *, json_array_at(json, i)));
} else {
json_destroy(CONST_CAST(struct json *, json_array_at(json, i)));
}
}
free(json->array.elements);
}
static struct json *json_deep_clone_object(const struct shash *object);
static struct json *json_deep_clone_array(const struct json *);
/* Returns a deep copy of 'json'. */
struct json *
json: Use reference counting in JSON objects After profiling OVSDB insert performance it was found that some significant portion of its time OVSDB is calling the function json_clone. Also, the current usages of json_clone never modify the json, just keeps it to prevent it to be freed. With that in mind the struct json, json_create, json_clone and json_destroy were modified to keep a count of how many references of the json struct are left. Only when that count reaches zero the json struct is freed. The old "json_clone" function was renamed as "json_deep_clone". Some examples of the performance difference: In these tests a test table with 4 columns (string, string, bool, integer) was used. All the tests used "commit block". *** 50 process each inserting 1000 rows *** Master OVS Test Duration 131 seconds Average Inserts Per second 746.2687 inserts/s Average Insert Duration 134.1382 ms Minimal Insert Duration 0.166202 ms Maximum Insert Duration 489.8593 ms JSON GC Patch Test Duration 86 seconds Average Inserts Per second 1176 inserts/s Average Insert Duration 82.26761 ms Minimal Insert Duration 0.165448 ms Maximum Insert Duration 751.2111 ms *** 5 process each inserting 10000 rows *** Master OVS Test Duration 8 seconds Average Inserts Per second 7142.857 inserts/s Average Insert Duration 0.656431 ms Minimal Insert Duration 0.125197 ms Maximum Insert Duration 11.93203 ms JSON GC Patch Test Duration 7 seconds Average Inserts Per second 8333.333 inserts/s Average Insert Duration 0.55688 ms Minimal Insert Duration 0.143233 ms Maximum Insert Duration 26.26319 ms Signed-off-by: Esteban Rodriguez Betancourt <estebarb@hpe.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2016-10-04 19:31:48 +00:00
json_deep_clone(const struct json *json)
{
switch (json->type) {
case JSON_OBJECT:
return json_deep_clone_object(json->object);
case JSON_ARRAY:
return json_deep_clone_array(json);
case JSON_STRING:
return json_string_create(json_string(json));
case JSON_SERIALIZED_OBJECT:
return json_serialized_object_create(json);
case JSON_NULL:
case JSON_FALSE:
case JSON_TRUE:
return json_create(json->type);
case JSON_INTEGER:
return json_integer_create(json->integer);
case JSON_REAL:
return json_real_create(json->real);
case JSON_N_TYPES:
default:
OVS_NOT_REACHED();
}
}
struct json *
json_nullable_clone(const struct json *json)
{
return json ? json_clone(json) : NULL;
}
static struct json *
json_deep_clone_object(const struct shash *object)
{
struct shash_node *node;
struct json *json;
json = json_object_create();
SHASH_FOR_EACH (node, object) {
struct json *value = node->data;
json_object_put(json, node->name, json_deep_clone(value));
}
return json;
}
static struct json *
json_deep_clone_array(const struct json *json)
{
struct json **elements;
size_t i, n;
n = json_array_size(json);
switch (n) {
case 0:
return json_array_create_empty();
case 1:
return json_array_create_1(json_deep_clone(json_array_at(json, 0)));
case 2:
return json_array_create_2(json_deep_clone(json_array_at(json, 0)),
json_deep_clone(json_array_at(json, 1)));
case 3:
return json_array_create_3(json_deep_clone(json_array_at(json, 0)),
json_deep_clone(json_array_at(json, 1)),
json_deep_clone(json_array_at(json, 2)));
default:
break;
}
elements = xmalloc(n * sizeof *elements);
for (i = 0; i < n; i++) {
elements[i] = json_deep_clone(json_array_at(json, i));
}
return json_array_create(elements, n);
}
static size_t
json_hash_object(const struct shash *object, size_t basis)
{
const struct shash_node **nodes;
size_t n, i;
nodes = shash_sort(object);
n = shash_count(object);
for (i = 0; i < n; i++) {
const struct shash_node *node = nodes[i];
basis = hash_string(node->name, basis);
basis = json_hash(node->data, basis);
}
free(nodes);
return basis;
}
static size_t
json_hash_array(const struct json *json, size_t basis)
{
size_t i, n = json_array_size(json);
basis = hash_int(n, basis);
for (i = 0; i < n; i++) {
basis = json_hash(json_array_at(json, i), basis);
}
return basis;
}
size_t
json_hash(const struct json *json, size_t basis)
{
switch (json->type) {
case JSON_OBJECT:
return json_hash_object(json->object, basis);
case JSON_ARRAY:
return json_hash_array(json, basis);
case JSON_STRING:
return hash_string(json_string(json), basis);
case JSON_SERIALIZED_OBJECT:
return hash_string(json_serialized_object(json), basis);
case JSON_NULL:
case JSON_FALSE:
case JSON_TRUE:
return hash_int(json->type << 8, basis);
case JSON_INTEGER:
return hash_int(json->integer, basis);
case JSON_REAL:
return hash_double(json->real, basis);
case JSON_N_TYPES:
default:
OVS_NOT_REACHED();
}
}
static bool
json_equal_object(const struct shash *a, const struct shash *b)
{
struct shash_node *a_node;
if (shash_count(a) != shash_count(b)) {
return false;
}
SHASH_FOR_EACH (a_node, a) {
struct shash_node *b_node = shash_find(b, a_node->name);
if (!b_node || !json_equal(a_node->data, b_node->data)) {
return false;
}
}
return true;
}
static bool
json_equal_array(const struct json *a, const struct json *b)
{
size_t i, n = json_array_size(a);
if (n != json_array_size(b)) {
return false;
}
for (i = 0; i < n; i++) {
if (!json_equal(json_array_at(a, i), json_array_at(b, i))) {
return false;
}
}
return true;
}
bool
json_equal(const struct json *a, const struct json *b)
{
json: Use reference counting in JSON objects After profiling OVSDB insert performance it was found that some significant portion of its time OVSDB is calling the function json_clone. Also, the current usages of json_clone never modify the json, just keeps it to prevent it to be freed. With that in mind the struct json, json_create, json_clone and json_destroy were modified to keep a count of how many references of the json struct are left. Only when that count reaches zero the json struct is freed. The old "json_clone" function was renamed as "json_deep_clone". Some examples of the performance difference: In these tests a test table with 4 columns (string, string, bool, integer) was used. All the tests used "commit block". *** 50 process each inserting 1000 rows *** Master OVS Test Duration 131 seconds Average Inserts Per second 746.2687 inserts/s Average Insert Duration 134.1382 ms Minimal Insert Duration 0.166202 ms Maximum Insert Duration 489.8593 ms JSON GC Patch Test Duration 86 seconds Average Inserts Per second 1176 inserts/s Average Insert Duration 82.26761 ms Minimal Insert Duration 0.165448 ms Maximum Insert Duration 751.2111 ms *** 5 process each inserting 10000 rows *** Master OVS Test Duration 8 seconds Average Inserts Per second 7142.857 inserts/s Average Insert Duration 0.656431 ms Minimal Insert Duration 0.125197 ms Maximum Insert Duration 11.93203 ms JSON GC Patch Test Duration 7 seconds Average Inserts Per second 8333.333 inserts/s Average Insert Duration 0.55688 ms Minimal Insert Duration 0.143233 ms Maximum Insert Duration 26.26319 ms Signed-off-by: Esteban Rodriguez Betancourt <estebarb@hpe.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2016-10-04 19:31:48 +00:00
if (a == b) {
return true;
} else if (!a || !b) {
return false;
} else if (a->type != b->type) {
return false;
}
switch (a->type) {
case JSON_OBJECT:
return json_equal_object(a->object, b->object);
case JSON_ARRAY:
return json_equal_array(a, b);
case JSON_STRING:
return !strcmp(json_string(a), json_string(b));
case JSON_SERIALIZED_OBJECT:
return !strcmp(json_serialized_object(a), json_serialized_object(b));
case JSON_NULL:
case JSON_FALSE:
case JSON_TRUE:
return true;
case JSON_INTEGER:
return a->integer == b->integer;
case JSON_REAL:
return a->real == b->real;
case JSON_N_TYPES:
default:
OVS_NOT_REACHED();
}
}
/* Lexical analysis. */
static void
json_lex_keyword(struct json_parser *p)
{
struct json_token token;
const char *s;
s = ds_cstr(&p->buffer);
if (!strcmp(s, "false")) {
token.type = T_FALSE;
} else if (!strcmp(s, "true")) {
token.type = T_TRUE;
} else if (!strcmp(s, "null")) {
token.type = T_NULL;
} else {
json_error(p, "invalid keyword '%s'", s);
return;
}
json_parser_input(p, &token);
}
static void
json_lex_number(struct json_parser *p)
{
const char *cp = ds_cstr(&p->buffer);
unsigned long long int significand = 0;
struct json_token token;
bool imprecise = false;
bool negative = false;
int pow10 = 0;
/* Leading minus sign. */
if (*cp == '-') {
negative = true;
cp++;
}
/* At least one integer digit, but 0 may not be used as a leading digit for
* a longer number. */
significand = 0;
if (*cp == '0') {
cp++;
if (isdigit((unsigned char) *cp)) {
json_error(p, "leading zeros not allowed");
return;
}
} else if (isdigit((unsigned char) *cp)) {
do {
if (significand <= ULLONG_MAX / 10) {
significand = significand * 10 + (*cp - '0');
} else {
pow10++;
if (*cp != '0') {
imprecise = true;
}
}
cp++;
} while (isdigit((unsigned char) *cp));
} else {
json_error(p, "'-' must be followed by digit");
return;
}
/* Optional fraction. */
if (*cp == '.') {
cp++;
if (!isdigit((unsigned char) *cp)) {
json_error(p, "decimal point must be followed by digit");
return;
}
do {
if (significand <= ULLONG_MAX / 10) {
significand = significand * 10 + (*cp - '0');
pow10--;
} else if (*cp != '0') {
imprecise = true;
}
cp++;
} while (isdigit((unsigned char) *cp));
}
/* Optional exponent. */
if (*cp == 'e' || *cp == 'E') {
bool negative_exponent = false;
int exponent;
cp++;
if (*cp == '+') {
cp++;
} else if (*cp == '-') {
negative_exponent = true;
cp++;
}
if (!isdigit((unsigned char) *cp)) {
json_error(p, "exponent must contain at least one digit");
return;
}
exponent = 0;
do {
if (exponent >= INT_MAX / 10) {
goto bad_exponent;
}
exponent = exponent * 10 + (*cp - '0');
cp++;
} while (isdigit((unsigned char) *cp));
if (negative_exponent) {
if (pow10 < INT_MIN + exponent) {
goto bad_exponent;
}
pow10 -= exponent;
} else {
if (pow10 > INT_MAX - exponent) {
goto bad_exponent;
}
pow10 += exponent;
}
}
if (*cp != '\0') {
json_error(p, "syntax error in number");
return;
}
/* Figure out number.
*
* We suppress negative zeros as a matter of policy. */
if (!significand) {
token.type = T_INTEGER;
token.integer = 0;
json_parser_input(p, &token);
return;
}
if (!imprecise) {
while (pow10 > 0 && significand < ULLONG_MAX / 10) {
significand *= 10;
pow10--;
}
while (pow10 < 0 && significand % 10 == 0) {
significand /= 10;
pow10++;
}
if (pow10 == 0
&& significand <= (negative
? (unsigned long long int) LLONG_MAX + 1
: LLONG_MAX)) {
token.type = T_INTEGER;
token.integer = negative ? -significand : significand;
json_parser_input(p, &token);
return;
}
}
token.type = T_REAL;
if (!str_to_double(ds_cstr(&p->buffer), &token.real)) {
json_error(p, "number outside valid range");
return;
}
/* Suppress negative zero. */
if (token.real == 0) {
token.real = 0;
}
json_parser_input(p, &token);
return;
bad_exponent:
json_error(p, "exponent outside valid range");
}
static const char *
json_lex_4hex(const char *cp, const char *end, int *valuep)
{
unsigned int value;
bool ok;
if (cp + 4 > end) {
return "quoted string ends within \\u escape";
}
value = hexits_value(cp, 4, &ok);
if (!ok) {
return "malformed \\u escape";
}
if (!value) {
return "null bytes not supported in quoted strings";
}
*valuep = value;
return NULL;
}
static const char *
json_lex_unicode(const char *cp, const char *end, struct ds *out)
{
const char *error;
int c0, c1;
error = json_lex_4hex(cp, end, &c0);
if (error) {
ds_clear(out);
ds_put_cstr(out, error);
return NULL;
}
cp += 4;
if (!uc_is_leading_surrogate(c0)) {
ds_put_utf8(out, c0);
return cp;
}
if (cp + 2 > end || *cp++ != '\\' || *cp++ != 'u') {
ds_clear(out);
ds_put_cstr(out, "malformed escaped surrogate pair");
return NULL;
}
error = json_lex_4hex(cp, end, &c1);
if (error) {
ds_clear(out);
ds_put_cstr(out, error);
return NULL;
}
cp += 4;
if (!uc_is_trailing_surrogate(c1)) {
ds_clear(out);
ds_put_cstr(out, "second half of escaped surrogate pair is not "
"trailing surrogate");
return NULL;
}
ds_put_utf8(out, utf16_decode_surrogate_pair(c0, c1));
return cp;
}
bool
json_string_unescape(const char *in, size_t in_len, char **outp)
{
const char *end = in + in_len;
bool ok = false;
struct ds out;
ds_init(&out);
ds_reserve(&out, in_len);
while (in < end) {
if (*in == '"') {
ds_clear(&out);
2010-03-02 13:38:47 -08:00
ds_put_cstr(&out, "quoted string may not include unescaped \"");
goto exit;
}
if (*in != '\\') {
ds_put_char(&out, *in++);
continue;
}
in++;
if (in >= end) {
/* The JSON parser will never trigger this message, because its
* lexer will never pass in a string that ends in a single
* backslash, but json_string_unescape() has other callers that
* are not as careful.*/
ds_clear(&out);
ds_put_cstr(&out, "quoted string may not end with backslash");
goto exit;
}
switch (*in++) {
case '"': case '\\': case '/':
ds_put_char(&out, in[-1]);
break;
case 'b':
ds_put_char(&out, '\b');
break;
case 'f':
ds_put_char(&out, '\f');
break;
case 'n':
ds_put_char(&out, '\n');
break;
case 'r':
ds_put_char(&out, '\r');
break;
case 't':
ds_put_char(&out, '\t');
break;
case 'u':
in = json_lex_unicode(in, end, &out);
if (!in) {
goto exit;
}
break;
default:
ds_clear(&out);
ds_put_format(&out, "bad escape \\%c", in[-1]);
goto exit;
}
}
ok = true;
exit:
*outp = ds_cstr(&out);
return ok;
}
void
json_string_escape(const char *in, struct ds *out)
{
struct json json = {
.type = JSON_STRING,
json: Store short strings in-place. The 'struct json' contains a union and the largest element of that union is 'struct json_array', which takes 24 bytes. It means, that a lot of space in this structure remains unused whenever the type is not JSON_ARRAY. For example, the 'string' pointer used for JSON_STRING only takes 8 bytes on a 64-bit system leaving 24 - 8 = 16 bytes unused. There is also a 4-byte hole between the 'type' and the 'count'. A pretty common optimization technique for storing strings is to store short ones in place of the pointer and only allocate dynamically the larger strings that do not fit. In our case, we have even larger space of 24 bytes to work with. So, we could use all 24 bytes to store the strings (23 string bytes + '\0') and use the 4 byte unused space outside the union to store the storage type. This approach should allow us to save on memory allocation for short strings and also save on accesses to them, as the content will fit into the same cache line as the 'struct json' itself. In practice, large OVN databases tend to operate with quite large strings. For example, all the logical flow matches and actions in OVN Southbound database would not fit. However, this approach still allows to improve performance with large OVN databases. With 350MB OVN Northbound database with 12M atoms: Before After Improvement ovsdb-client dump 18.6 sec 16.6 sec 10.7 % Compaction 14.0 sec 13.4 sec 4.2 % Memory usage (RSS) 2.28 GB 2.05 GB 10.0 % With 615MB OVN Southbound database with 23M atoms: Before After Improvement ovsdb-client dump 46.1 sec 43.7 sec 5.2 % Compaction 34.8 sec 32.5 sec 6.6 % Memory usage (RSS) 5.29 GB 4.80 GB 9.3 % In the results above, 'ovsdb-client dump' is measuring how log it takes for the server to prepare and send a reply, 'Memory usage (RSS)' reflects the RSS of the ovsdb-server after loading the full database. ovn-heater tests report similar reduction in CPU and memory usage on heavy operations like compaction. Acked-by: Mike Pattrick <mkp@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2025-06-24 21:54:32 +02:00
.storage_type = JSON_STRING_DYNAMIC,
.str_ptr = CONST_CAST(char *, in),
};
json_to_ds(&json, 0, out);
}
static void
json_parser_input_string(struct json_parser *p, const char *s)
{
struct json_token token;
token.type = T_STRING;
token.string = s;
json_parser_input(p, &token);
}
static void
json_lex_string(struct json_parser *p)
{
const char *raw = ds_cstr(&p->buffer);
if (!strchr(raw, '\\')) {
json_parser_input_string(p, raw);
} else {
char *cooked;
if (json_string_unescape(raw, strlen(raw), &cooked)) {
json_parser_input_string(p, cooked);
} else {
json_error(p, "%s", cooked);
}
free(cooked);
}
}
/* Parsing. */
/* Parses 'string' as a JSON object or array and returns a newly allocated
* 'struct json'. The caller must free the returned structure with
* json_destroy() when it is no longer needed.
*
* 'string' must be encoded in UTF-8.
*
* If 'string' is valid JSON, then the returned 'struct json' will be either an
* object (JSON_OBJECT) or an array (JSON_ARRAY).
*
* If 'string' is not valid JSON, then the returned 'struct json' will be a
* string (JSON_STRING) that describes the particular error encountered during
* parsing. (This is an acceptable means of error reporting because at its top
* level JSON must be either an object or an array; a bare string is not
* valid.) */
struct json *
json_from_string(const char *string)
{
struct json_parser *p = json_parser_create(JSPF_TRAILER);
json_parser_feed(p, string, strlen(string));
return json_parser_finish(p);
}
/* Parses data of JSON_SERIALIZED_OBJECT to the real JSON. */
struct json *
json_from_serialized_object(const struct json *json)
{
return json_from_string(json_serialized_object(json));
}
/* Reads the file named 'file_name', parses its contents as a JSON object or
* array, and returns a newly allocated 'struct json'. The caller must free
* the returned structure with json_destroy() when it is no longer needed.
*
* The file must be encoded in UTF-8.
*
* See json_from_string() for return value semantics.
*/
struct json *
json_from_file(const char *file_name)
{
struct json *json;
FILE *stream;
stream = fopen(file_name, "r");
if (!stream) {
return json_string_create_nocopy(
xasprintf("error opening \"%s\": %s", file_name,
ovs_strerror(errno)));
}
json = json_from_stream(stream);
fclose(stream);
return json;
}
/* Parses the contents of 'stream' as a JSON object or array, and returns a
* newly allocated 'struct json'. The caller must free the returned structure
* with json_destroy() when it is no longer needed.
*
* The file must be encoded in UTF-8.
*
* See json_from_string() for return value semantics.
*/
struct json *
json_from_stream(FILE *stream)
{
struct json_parser *p;
struct json *json;
p = json_parser_create(JSPF_TRAILER);
for (;;) {
char buffer[BUFSIZ];
size_t n;
n = fread(buffer, 1, sizeof buffer, stream);
if (!n || json_parser_feed(p, buffer, n) != n) {
break;
}
}
json = json_parser_finish(p);
if (ferror(stream)) {
json_destroy(json);
json = json_string_create_nocopy(
xasprintf("error reading JSON stream: %s", ovs_strerror(errno)));
}
return json;
}
struct json_parser *
json_parser_create(int flags)
{
struct json_parser *p = xzalloc(sizeof *p);
p->flags = flags;
return p;
}
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
static inline void ALWAYS_INLINE
json_parser_account_byte(struct json_parser *p, unsigned char c)
{
p->byte_number++;
if (OVS_UNLIKELY(c == '\n')) {
p->column_number = 0;
p->line_number++;
} else {
p->column_number++;
}
}
size_t
json_parser_feed(struct json_parser *p, const char *input, size_t n)
{
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
size_t token_start = 0;
size_t i;
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
for (i = 0; !p->done && i < n; ) {
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
bool consumed = true;
const char *start_p = &input[token_start];
unsigned char c = input[i];
struct json_token token;
switch (p->lex_state) {
case JSON_LEX_START:
switch (c) {
case ' ': case '\t': case '\n': case '\r':
/* Nothing to do. */
token_start = i + 1;
break;
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
p->lex_state = JSON_LEX_KEYWORD;
token_start = i;
break;
case '[': case '{': case ']': case '}': case ':': case ',':
token.type = c;
json_parser_input(p, &token);
token_start = i + 1;
break;
case '-':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
p->lex_state = JSON_LEX_NUMBER;
token_start = i;
break;
case '"':
p->lex_state = JSON_LEX_STRING;
token_start = i + 1;
break;
default:
if (isprint(c)) {
json_error(p, "invalid character '%c'", c);
} else {
json_error(p, "invalid character U+%04x", c);
}
break;
}
break;
case JSON_LEX_KEYWORD:
if (!isalpha((unsigned char) c)) {
ds_put_buffer(&p->buffer, start_p, i - token_start);
json_lex_keyword(p);
consumed = false;
break;
}
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
break;
case JSON_LEX_NUMBER:
if (!strchr(".0123456789eE-+", c)) {
ds_put_buffer(&p->buffer, start_p, i - token_start);
json_lex_number(p);
consumed = false;
break;
}
break;
case JSON_LEX_STRING:
if (c == '\\') {
p->lex_state = JSON_LEX_ESCAPE;
} else if (c == '"') {
ds_put_buffer(&p->buffer, start_p, i - token_start);
json_lex_string(p);
} else if (c < 0x20) {
json_error(p, "U+%04X must be escaped in quoted string", c);
}
break;
case JSON_LEX_ESCAPE:
p->lex_state = JSON_LEX_STRING;
break;
default:
ovs_abort(0, "unexpected lexer state");
}
if (consumed) {
json_parser_account_byte(p, c);
i++;
}
}
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
if (!p->done) {
ds_put_buffer(&p->buffer, &input[token_start], i - token_start);
}
return i;
}
bool
json_parser_is_done(const struct json_parser *p)
{
return p->done;
}
struct json *
json_parser_finish(struct json_parser *p)
{
struct json *json;
switch (p->lex_state) {
case JSON_LEX_START:
break;
case JSON_LEX_STRING:
case JSON_LEX_ESCAPE:
json_error(p, "unexpected end of input in quoted string");
break;
case JSON_LEX_NUMBER:
case JSON_LEX_KEYWORD:
json: Improve string parsing. To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done. Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com> Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-29 12:51:11 -04:00
json_parser_feed(p, " ", 1);
break;
}
if (p->parse_state == JSON_PARSE_START) {
json_error(p, "empty input stream");
} else if (p->parse_state != JSON_PARSE_END) {
json_error(p, "unexpected end of input");
}
if (!p->error) {
ovs_assert(p->height == 1);
ovs_assert(p->stack[0].json != NULL);
json = p->stack[--p->height].json;
} else {
json = json_string_create_nocopy(p->error);
p->error = NULL;
}
json_parser_abort(p);
return json;
}
void
json_parser_abort(struct json_parser *p)
{
if (p) {
ds_destroy(&p->buffer);
if (p->height) {
json_destroy(p->stack[0].json);
}
free(p->stack);
free(p->member_name);
free(p->error);
free(p);
}
}
static struct json_parser_node *
json_parser_top(struct json_parser *p)
{
return &p->stack[p->height - 1];
}
static void
json_parser_put_value(struct json_parser *p, struct json *value)
{
struct json_parser_node *node = json_parser_top(p);
if (node->json->type == JSON_OBJECT) {
json_object_put_nocopy(node->json, p->member_name, value);
p->member_name = NULL;
} else if (node->json->type == JSON_ARRAY) {
json_array_add(node->json, value);
} else {
OVS_NOT_REACHED();
}
}
static void
json_parser_push(struct json_parser *p,
struct json *new_json, enum json_parse_state new_state)
{
if (p->height < JSON_MAX_HEIGHT) {
struct json_parser_node *node;
if (p->height >= p->allocated_height) {
p->stack = x2nrealloc(p->stack, &p->allocated_height,
sizeof *p->stack);
}
if (p->height > 0) {
json_parser_put_value(p, new_json);
}
node = &p->stack[p->height++];
node->json = new_json;
p->parse_state = new_state;
} else {
json_destroy(new_json);
json_error(p, "input exceeds maximum nesting depth %d",
JSON_MAX_HEIGHT);
}
}
static void
json_parser_push_object(struct json_parser *p)
{
json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT);
}
static void
json_parser_push_array(struct json_parser *p)
{
json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT);
}
static void
json_parse_value(struct json_parser *p, struct json_token *token,
enum json_parse_state next_state)
{
struct json *value;
switch (token->type) {
case T_FALSE:
value = json_boolean_create(false);
break;
case T_NULL:
value = json_null_create();
break;
case T_TRUE:
value = json_boolean_create(true);
break;
case '{':
json_parser_push_object(p);
return;
case '[':
json_parser_push_array(p);
return;
case T_INTEGER:
value = json_integer_create(token->integer);
break;
case T_REAL:
value = json_real_create(token->real);
break;
case T_STRING:
value = json_string_create(token->string);
break;
case T_EOF:
case '}':
case ']':
case ':':
case ',':
default:
json_error(p, "syntax error expecting value");
return;
}
json_parser_put_value(p, value);
p->parse_state = next_state;
}
static void
json_parser_pop(struct json_parser *p)
{
struct json_parser_node *node;
/* Conserve memory. */
node = json_parser_top(p);
if (node->json->type == JSON_ARRAY) {
json_array_trim(node->json);
}
/* Pop off the top-of-stack. */
if (p->height == 1) {
p->parse_state = JSON_PARSE_END;
if (!(p->flags & JSPF_TRAILER)) {
p->done = true;
}
} else {
p->height--;
node = json_parser_top(p);
if (node->json->type == JSON_ARRAY) {
p->parse_state = JSON_PARSE_ARRAY_NEXT;
} else if (node->json->type == JSON_OBJECT) {
p->parse_state = JSON_PARSE_OBJECT_NEXT;
} else {
OVS_NOT_REACHED();
}
}
}
static void
json_parser_input(struct json_parser *p, struct json_token *token)
{
switch (p->parse_state) {
case JSON_PARSE_START:
if (token->type == '{') {
json_parser_push_object(p);
} else if (token->type == '[') {
json_parser_push_array(p);
} else {
json_error(p, "syntax error at beginning of input");
}
break;
case JSON_PARSE_END:
json_error(p, "trailing garbage at end of input");
break;
case JSON_PARSE_OBJECT_INIT:
if (token->type == '}') {
json_parser_pop(p);
break;
}
/* Fall through. */
case JSON_PARSE_OBJECT_NAME:
if (token->type == T_STRING) {
p->member_name = xstrdup(token->string);
p->parse_state = JSON_PARSE_OBJECT_COLON;
} else {
json_error(p, "syntax error parsing object expecting string");
}
break;
case JSON_PARSE_OBJECT_COLON:
if (token->type == ':') {
p->parse_state = JSON_PARSE_OBJECT_VALUE;
} else {
json_error(p, "syntax error parsing object expecting ':'");
}
break;
case JSON_PARSE_OBJECT_VALUE:
json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT);
break;
case JSON_PARSE_OBJECT_NEXT:
if (token->type == ',') {
p->parse_state = JSON_PARSE_OBJECT_NAME;
} else if (token->type == '}') {
json_parser_pop(p);
} else {
json_error(p, "syntax error expecting '}' or ','");
}
break;
case JSON_PARSE_ARRAY_INIT:
if (token->type == ']') {
json_parser_pop(p);
break;
}
/* Fall through. */
case JSON_PARSE_ARRAY_VALUE:
json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT);
break;
case JSON_PARSE_ARRAY_NEXT:
if (token->type == ',') {
p->parse_state = JSON_PARSE_ARRAY_VALUE;
} else if (token->type == ']') {
json_parser_pop(p);
} else {
json_error(p, "syntax error expecting ']' or ','");
}
break;
default:
abort();
}
p->lex_state = JSON_LEX_START;
ds_clear(&p->buffer);
}
static struct json *
json_create(enum json_type type)
{
struct json *json = xmalloc(sizeof *json);
json->type = type;
json: Use reference counting in JSON objects After profiling OVSDB insert performance it was found that some significant portion of its time OVSDB is calling the function json_clone. Also, the current usages of json_clone never modify the json, just keeps it to prevent it to be freed. With that in mind the struct json, json_create, json_clone and json_destroy were modified to keep a count of how many references of the json struct are left. Only when that count reaches zero the json struct is freed. The old "json_clone" function was renamed as "json_deep_clone". Some examples of the performance difference: In these tests a test table with 4 columns (string, string, bool, integer) was used. All the tests used "commit block". *** 50 process each inserting 1000 rows *** Master OVS Test Duration 131 seconds Average Inserts Per second 746.2687 inserts/s Average Insert Duration 134.1382 ms Minimal Insert Duration 0.166202 ms Maximum Insert Duration 489.8593 ms JSON GC Patch Test Duration 86 seconds Average Inserts Per second 1176 inserts/s Average Insert Duration 82.26761 ms Minimal Insert Duration 0.165448 ms Maximum Insert Duration 751.2111 ms *** 5 process each inserting 10000 rows *** Master OVS Test Duration 8 seconds Average Inserts Per second 7142.857 inserts/s Average Insert Duration 0.656431 ms Minimal Insert Duration 0.125197 ms Maximum Insert Duration 11.93203 ms JSON GC Patch Test Duration 7 seconds Average Inserts Per second 8333.333 inserts/s Average Insert Duration 0.55688 ms Minimal Insert Duration 0.143233 ms Maximum Insert Duration 26.26319 ms Signed-off-by: Esteban Rodriguez Betancourt <estebarb@hpe.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2016-10-04 19:31:48 +00:00
json->count = 1;
return json;
}
static void
json_error(struct json_parser *p, const char *format, ...)
{
if (!p->error) {
2009-12-03 16:08:34 -08:00
struct ds msg;
va_list args;
2009-12-03 16:08:34 -08:00
ds_init(&msg);
ds_put_format(&msg, "line %d, column %d, byte %d: ",
p->line_number, p->column_number, p->byte_number);
va_start(args, format);
2009-12-03 16:08:34 -08:00
ds_put_format_valist(&msg, format, args);
va_end(args);
2009-12-03 16:08:34 -08:00
p->error = ds_steal_cstr(&msg);
p->done = true;
}
}
#define SPACES_PER_LEVEL 2
struct json_serializer {
struct ds *ds;
int depth;
int flags;
};
static void json_serialize(const struct json *, struct json_serializer *);
static void json_serialize_object(const struct shash *object,
struct json_serializer *);
static void json_serialize_array(const struct json *,
struct json_serializer *);
static void json_serialize_string(const char *, struct ds *);
/* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns
* that string. The caller is responsible for freeing the returned string,
* with free(), when it is no longer needed.
*
* If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each
* nesting level introducing an additional indentation. Otherwise, the
* returned string does not contain any new-line characters.
*
* If 'flags' contains JSSF_SORT, members of objects in the output are sorted
* in bytewise lexicographic order for reproducibility. Otherwise, members of
* objects are output in an indeterminate order.
*
* The returned string is valid JSON only if 'json' represents an array or an
* object, since a bare literal does not satisfy the JSON grammar. */
char *
json_to_string(const struct json *json, int flags)
{
struct ds ds;
ds_init(&ds);
json_to_ds(json, flags, &ds);
return ds_steal_cstr(&ds);
}
/* Same as json_to_string(), but the output is appended to 'ds'. */
void
json_to_ds(const struct json *json, int flags, struct ds *ds)
{
struct json_serializer s;
s.ds = ds;
s.depth = 0;
s.flags = flags;
json_serialize(json, &s);
}
static void
json_serialize(const struct json *json, struct json_serializer *s)
{
struct ds *ds = s->ds;
switch (json->type) {
case JSON_NULL:
ds_put_cstr(ds, "null");
break;
case JSON_FALSE:
ds_put_cstr(ds, "false");
break;
case JSON_TRUE:
ds_put_cstr(ds, "true");
break;
case JSON_OBJECT:
json_serialize_object(json->object, s);
break;
case JSON_ARRAY:
json_serialize_array(json, s);
break;
case JSON_INTEGER:
ds_put_format(ds, "%lld", json->integer);
break;
case JSON_REAL:
ds_put_format(ds, "%.*g", DBL_DIG, json->real);
break;
case JSON_STRING:
json_serialize_string(json_string(json), ds);
break;
case JSON_SERIALIZED_OBJECT:
ds_put_cstr(ds, json_serialized_object(json));
break;
case JSON_N_TYPES:
default:
OVS_NOT_REACHED();
}
}
static void
indent_line(struct json_serializer *s)
{
if (s->flags & JSSF_PRETTY) {
ds_put_char(s->ds, '\n');
ds_put_char_multiple(s->ds, ' ', SPACES_PER_LEVEL * s->depth);
}
}
static void
json_serialize_object_member(size_t i, const struct shash_node *node,
struct json_serializer *s)
{
struct ds *ds = s->ds;
if (i) {
ds_put_char(ds, ',');
indent_line(s);
}
json_serialize_string(node->name, ds);
ds_put_char(ds, ':');
if (s->flags & JSSF_PRETTY) {
ds_put_char(ds, ' ');
}
json_serialize(node->data, s);
}
static void
json_serialize_object(const struct shash *object, struct json_serializer *s)
{
struct ds *ds = s->ds;
ds_put_char(ds, '{');
s->depth++;
indent_line(s);
if (s->flags & JSSF_YIELD) {
cooperative_multitasking_yield();
}
if (s->flags & JSSF_SORT) {
const struct shash_node **nodes;
size_t n, i;
nodes = shash_sort(object);
n = shash_count(object);
for (i = 0; i < n; i++) {
json_serialize_object_member(i, nodes[i], s);
}
free(nodes);
} else {
struct shash_node *node;
size_t i;
i = 0;
SHASH_FOR_EACH (node, object) {
json_serialize_object_member(i++, node, s);
}
}
ds_put_char(ds, '}');
s->depth--;
}
static void
json_serialize_array(const struct json *json, struct json_serializer *s)
{
size_t i, n = json_array_size(json);
struct ds *ds = s->ds;
ds_put_char(ds, '[');
s->depth++;
if (s->flags & JSSF_YIELD) {
cooperative_multitasking_yield();
}
if (n > 0) {
indent_line(s);
for (i = 0; i < n; i++) {
if (i) {
ds_put_char(ds, ',');
indent_line(s);
}
json_serialize(json_array_at(json, i), s);
}
}
s->depth--;
ds_put_char(ds, ']');
}
static const char *chars_escaping[256] = {
"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", "\\u0007",
"\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", "\\u000e", "\\u000f",
"\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017",
"\\u0018", "\\u0019", "\\u001a", "\\u001b", "\\u001c", "\\u001d", "\\u001e", "\\u001f",
" ", "!", "\\\"", "#", "$", "%", "&", "'",
"(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ":", ";", "<", "=", ">", "?",
"@", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O",
"P", "Q", "R", "S", "T", "U", "V", "W",
"X", "Y", "Z", "[", "\\\\", "]", "^", "_",
"`", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o",
"p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "{", "|", "}", "~", "\x7f",
"\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87",
"\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
"\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97",
"\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
"\xa0", "\xa1", "\xa2", "\xa3", "\xa4", "\xa5", "\xa6", "\xa7",
"\xa8", "\xa9", "\xaa", "\xab", "\xac", "\xad", "\xae", "\xaf",
"\xb0", "\xb1", "\xb2", "\xb3", "\xb4", "\xb5", "\xb6", "\xb7",
"\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe", "\xbf",
"\xc0", "\xc1", "\xc2", "\xc3", "\xc4", "\xc5", "\xc6", "\xc7",
"\xc8", "\xc9", "\xca", "\xcb", "\xcc", "\xcd", "\xce", "\xcf",
"\xd0", "\xd1", "\xd2", "\xd3", "\xd4", "\xd5", "\xd6", "\xd7",
"\xd8", "\xd9", "\xda", "\xdb", "\xdc", "\xdd", "\xde", "\xdf",
"\xe0", "\xe1", "\xe2", "\xe3", "\xe4", "\xe5", "\xe6", "\xe7",
"\xe8", "\xe9", "\xea", "\xeb", "\xec", "\xed", "\xee", "\xef",
"\xf0", "\xf1", "\xf2", "\xf3", "\xf4", "\xf5", "\xf6", "\xf7",
"\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff"
};
static void
json_serialize_string(const char *string, struct ds *ds)
{
uint8_t c;
uint8_t c2;
json: Optimize string serialization. Current string serialization code puts all characters one by one. This is slow because dynamic string needs to perform length checks on every ds_put_char() and it's also doesn't allow compiler to use better memory copy operations, i.e. doesn't allow copying few bytes at once. Special symbols are rare in a typical database. Quotes are frequent, but not too frequent. In databases created by ovn-kubernetes, for example, usually there are at least 10 to 50 chars between quotes. So, it's better to count characters that doesn't require escaping and use fast data copy for the whole sequential block. Testing with a synthetic benchmark (included) on my laptop shows following performance improvement: Size Q S Before After Diff ----------------------------------------------------- 100000 0 0 : 0.227 ms 0.142 ms -37.4 % 100000 2 1 : 0.277 ms 0.186 ms -32.8 % 100000 10 1 : 0.361 ms 0.309 ms -14.4 % 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % Here Q - probability (%) for a character to be a '\"' and S - probability (%) to be a special character ( < 32). Testing with a closer to real world scenario shows overall decrease of the time needed for database compaction by ~5-10 %. And this change also decreases CPU consumption in general, because string serialization is used in many different places including ovsdb monitors and raft. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Numan Siddique <numans@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
2021-08-24 23:07:22 +02:00
size_t count;
const char *escape;
json: Optimize string serialization. Current string serialization code puts all characters one by one. This is slow because dynamic string needs to perform length checks on every ds_put_char() and it's also doesn't allow compiler to use better memory copy operations, i.e. doesn't allow copying few bytes at once. Special symbols are rare in a typical database. Quotes are frequent, but not too frequent. In databases created by ovn-kubernetes, for example, usually there are at least 10 to 50 chars between quotes. So, it's better to count characters that doesn't require escaping and use fast data copy for the whole sequential block. Testing with a synthetic benchmark (included) on my laptop shows following performance improvement: Size Q S Before After Diff ----------------------------------------------------- 100000 0 0 : 0.227 ms 0.142 ms -37.4 % 100000 2 1 : 0.277 ms 0.186 ms -32.8 % 100000 10 1 : 0.361 ms 0.309 ms -14.4 % 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % Here Q - probability (%) for a character to be a '\"' and S - probability (%) to be a special character ( < 32). Testing with a closer to real world scenario shows overall decrease of the time needed for database compaction by ~5-10 %. And this change also decreases CPU consumption in general, because string serialization is used in many different places including ovsdb monitors and raft. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Numan Siddique <numans@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
2021-08-24 23:07:22 +02:00
const char *start;
ds_put_char(ds, '"');
json: Optimize string serialization. Current string serialization code puts all characters one by one. This is slow because dynamic string needs to perform length checks on every ds_put_char() and it's also doesn't allow compiler to use better memory copy operations, i.e. doesn't allow copying few bytes at once. Special symbols are rare in a typical database. Quotes are frequent, but not too frequent. In databases created by ovn-kubernetes, for example, usually there are at least 10 to 50 chars between quotes. So, it's better to count characters that doesn't require escaping and use fast data copy for the whole sequential block. Testing with a synthetic benchmark (included) on my laptop shows following performance improvement: Size Q S Before After Diff ----------------------------------------------------- 100000 0 0 : 0.227 ms 0.142 ms -37.4 % 100000 2 1 : 0.277 ms 0.186 ms -32.8 % 100000 10 1 : 0.361 ms 0.309 ms -14.4 % 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % Here Q - probability (%) for a character to be a '\"' and S - probability (%) to be a special character ( < 32). Testing with a closer to real world scenario shows overall decrease of the time needed for database compaction by ~5-10 %. And this change also decreases CPU consumption in general, because string serialization is used in many different places including ovsdb monitors and raft. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Numan Siddique <numans@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
2021-08-24 23:07:22 +02:00
count = 0;
start = string;
while ((c = *string++) != '\0') {
json: Optimize string serialization. Current string serialization code puts all characters one by one. This is slow because dynamic string needs to perform length checks on every ds_put_char() and it's also doesn't allow compiler to use better memory copy operations, i.e. doesn't allow copying few bytes at once. Special symbols are rare in a typical database. Quotes are frequent, but not too frequent. In databases created by ovn-kubernetes, for example, usually there are at least 10 to 50 chars between quotes. So, it's better to count characters that doesn't require escaping and use fast data copy for the whole sequential block. Testing with a synthetic benchmark (included) on my laptop shows following performance improvement: Size Q S Before After Diff ----------------------------------------------------- 100000 0 0 : 0.227 ms 0.142 ms -37.4 % 100000 2 1 : 0.277 ms 0.186 ms -32.8 % 100000 10 1 : 0.361 ms 0.309 ms -14.4 % 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % Here Q - probability (%) for a character to be a '\"' and S - probability (%) to be a special character ( < 32). Testing with a closer to real world scenario shows overall decrease of the time needed for database compaction by ~5-10 %. And this change also decreases CPU consumption in general, because string serialization is used in many different places including ovsdb monitors and raft. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Numan Siddique <numans@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
2021-08-24 23:07:22 +02:00
if (c >= ' ' && c != '"' && c != '\\') {
count++;
} else {
if (count) {
ds_put_buffer(ds, start, count);
count = 0;
}
start = string;
escape = chars_escaping[c];
while ((c2 = *escape++) != '\0') {
ds_put_char(ds, c2);
}
}
}
json: Optimize string serialization. Current string serialization code puts all characters one by one. This is slow because dynamic string needs to perform length checks on every ds_put_char() and it's also doesn't allow compiler to use better memory copy operations, i.e. doesn't allow copying few bytes at once. Special symbols are rare in a typical database. Quotes are frequent, but not too frequent. In databases created by ovn-kubernetes, for example, usually there are at least 10 to 50 chars between quotes. So, it's better to count characters that doesn't require escaping and use fast data copy for the whole sequential block. Testing with a synthetic benchmark (included) on my laptop shows following performance improvement: Size Q S Before After Diff ----------------------------------------------------- 100000 0 0 : 0.227 ms 0.142 ms -37.4 % 100000 2 1 : 0.277 ms 0.186 ms -32.8 % 100000 10 1 : 0.361 ms 0.309 ms -14.4 % 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % Here Q - probability (%) for a character to be a '\"' and S - probability (%) to be a special character ( < 32). Testing with a closer to real world scenario shows overall decrease of the time needed for database compaction by ~5-10 %. And this change also decreases CPU consumption in general, because string serialization is used in many different places including ovsdb monitors and raft. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Numan Siddique <numans@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
2021-08-24 23:07:22 +02:00
if (count) {
ds_put_buffer(ds, start, count);
}
ds_put_char(ds, '"');
}