mirror of
				https://github.com/openvswitch/ovs
				synced 2025-10-23 14:57:06 +00:00 
			
		
		
		
	Add optional C extension wrapper for Python JSON parsing
The pure Python in-tree JSON parser is *much* slower than the in-tree C JSON parser. A local test parsing a 100Mb JSON file showed the Python version taking 270 seconds. With the C wrapper, it took under 4 seconds. The C extension will be used automatically if it can be built. If the extension fails to build, a warning is displayed and the build is restarted without the extension. The Serializer class is replaced with Python's built-in JSON library since the ability to process chunked data is not needed in that case. The extension should work with both Python 2.7 and Python 3.3+. Signed-off-by: Terry Wilson <twilson@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
		| @@ -251,7 +251,7 @@ config-h-check: | ||||
| 	@cd $(srcdir); \ | ||||
| 	if test -e .git && (git --version) >/dev/null 2>&1 && \ | ||||
| 	   git --no-pager grep -L '#include <config\.h>' `git ls-files | grep '\.c$$' | \ | ||||
|                grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows'`; \ | ||||
|                grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows|^python'`; \ | ||||
| 	then \ | ||||
| 	    echo "See above for list of violations of the rule that"; \ | ||||
| 	    echo "every C source file must #include <config.h>."; \ | ||||
|   | ||||
| @@ -46,6 +46,9 @@ EXTRA_DIST += \ | ||||
| 	python/README.rst \ | ||||
| 	python/setup.py | ||||
|  | ||||
| # C extension support. | ||||
| EXTRA_DIST += python/ovs/_json.c | ||||
|  | ||||
| PYFILES = $(ovs_pyfiles) python/ovs/dirs.py $(ovstest_pyfiles) | ||||
| EXTRA_DIST += $(PYFILES) | ||||
| PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover) | ||||
|   | ||||
							
								
								
									
										268
									
								
								python/ovs/_json.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										268
									
								
								python/ovs/_json.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,268 @@ | ||||
| #include "Python.h" | ||||
| #include <openvswitch/lib/json.h> | ||||
| #include "structmember.h" | ||||
|  | ||||
| #if PY_MAJOR_VERSION >= 3 | ||||
| #define IS_PY3K | ||||
| #endif | ||||
|  | ||||
| typedef struct { | ||||
|     PyObject_HEAD | ||||
|     struct json_parser *_parser; | ||||
| } json_ParserObject; | ||||
|  | ||||
| static void | ||||
| Parser_dealloc(json_ParserObject * p) | ||||
| { | ||||
|     json_parser_abort(p->_parser); | ||||
|     Py_TYPE(p)->tp_free(p); | ||||
| } | ||||
|  | ||||
| static PyObject * | ||||
| Parser_new(PyTypeObject * type, PyObject * args, PyObject * kwargs) | ||||
| { | ||||
|     json_ParserObject *self; | ||||
|     static char *kwlist[] = { "check_trailer", NULL }; | ||||
|     PyObject *check_trailer = NULL; | ||||
|     int ct_int = 0; | ||||
|  | ||||
|     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist, | ||||
|                                      &check_trailer)) { | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     if (check_trailer != NULL) { | ||||
|         ct_int = PyObject_IsTrue(check_trailer); | ||||
|         if (ct_int < 0) { | ||||
|             return NULL; | ||||
|         } else if (ct_int) { | ||||
|             ct_int = JSPF_TRAILER; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     self = (json_ParserObject *) type->tp_alloc(type, 0); | ||||
|     if (self != NULL) { | ||||
|         self->_parser = json_parser_create(ct_int); | ||||
|     } | ||||
|  | ||||
|     return (PyObject *) self; | ||||
| } | ||||
|  | ||||
| static PyObject * | ||||
| Parser_feed(json_ParserObject * self, PyObject * args) | ||||
| { | ||||
|     Py_ssize_t input_sz; | ||||
|     PyObject *input; | ||||
|     size_t rd; | ||||
|     char *input_str; | ||||
|  | ||||
|     if (self->_parser == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     if (!PyArg_UnpackTuple(args, "input", 1, 1, &input)) { | ||||
|         return NULL; | ||||
|     } | ||||
| #ifdef IS_PY3K | ||||
|     if ((input_str = PyUnicode_AsUTF8AndSize(input, &input_sz)) == NULL) { | ||||
| #else | ||||
|     if (PyString_AsStringAndSize(input, &input_str, &input_sz) < 0) { | ||||
| #endif | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     rd = json_parser_feed(self->_parser, input_str, (size_t) input_sz); | ||||
|  | ||||
| #ifdef IS_PY3K | ||||
|     return PyLong_FromSize_t(rd); | ||||
| #else | ||||
|     return PyInt_FromSize_t(rd); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static PyObject * | ||||
| Parser_is_done(json_ParserObject * self) | ||||
| { | ||||
|     if (self->_parser == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
|     return PyBool_FromLong(json_parser_is_done(self->_parser)); | ||||
| } | ||||
|  | ||||
| static PyObject * | ||||
| json_to_python(struct json *json) | ||||
| { | ||||
|     switch (json->type) { | ||||
|     case JSON_NULL: | ||||
|         Py_RETURN_NONE; | ||||
|     case JSON_FALSE: | ||||
|         Py_RETURN_FALSE; | ||||
|     case JSON_TRUE: | ||||
|         Py_RETURN_TRUE; | ||||
|     case JSON_OBJECT:{ | ||||
|             struct shash_node *node; | ||||
|             PyObject *dict = PyDict_New(); | ||||
|  | ||||
|             if (dict == NULL) { | ||||
|                 return PyErr_NoMemory(); | ||||
|             } | ||||
|             SHASH_FOR_EACH(node, json->u.object) { | ||||
|                 PyObject *key = PyUnicode_FromString(node->name); | ||||
|                 PyObject *val = json_to_python(node->data); | ||||
|  | ||||
|                 if (!(key && val) || PyDict_SetItem(dict, key, val)) { | ||||
|                     Py_XDECREF(key); | ||||
|                     Py_XDECREF(val); | ||||
|                     Py_XDECREF(dict); | ||||
|                     return NULL; | ||||
|                 } | ||||
|  | ||||
|                 Py_XDECREF(key); | ||||
|                 Py_XDECREF(val); | ||||
|             } | ||||
|             return dict; | ||||
|         } | ||||
|     case JSON_ARRAY:{ | ||||
|             int i; | ||||
|             PyObject *arr = PyList_New(json->u.array.n); | ||||
|  | ||||
|             if (arr == NULL) { | ||||
|                 return PyErr_NoMemory(); | ||||
|             } | ||||
|             for (i = 0; i < json->u.array.n; i++) { | ||||
|                 PyObject *item = json_to_python(json->u.array.elems[i]); | ||||
|  | ||||
|                 if (!item || PyList_SetItem(arr, i, item)) { | ||||
|                     Py_XDECREF(arr); | ||||
|                     return NULL; | ||||
|                 } | ||||
|             } | ||||
|             return arr; | ||||
|         } | ||||
|     case JSON_REAL: | ||||
|         if (json->u.real != 0) { | ||||
|             return PyFloat_FromDouble(json->u.real); | ||||
|         } /* fall through to treat 0 as int */ | ||||
|     case JSON_INTEGER: | ||||
| #ifdef IS_PY3K | ||||
|         return PyLong_FromLong((long) json->u.integer); | ||||
| #else | ||||
|         return PyInt_FromLong((long) json->u.integer); | ||||
| #endif | ||||
|  | ||||
|     case JSON_STRING: | ||||
|         return PyUnicode_FromString(json->u.string); | ||||
|     default: | ||||
|         return NULL; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static PyObject * | ||||
| Parser_finish(json_ParserObject * self) | ||||
| { | ||||
|     struct json *json; | ||||
|     PyObject *obj; | ||||
|  | ||||
|     if (self->_parser == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     json = json_parser_finish(self->_parser); | ||||
|     self->_parser = NULL; | ||||
|     obj = json_to_python(json); | ||||
|     return obj; | ||||
| } | ||||
|  | ||||
| static PyMethodDef Parser_methods[] = { | ||||
|     {"feed", (PyCFunction) Parser_feed, METH_VARARGS, | ||||
|      "Feed data to the parser and return the index of the last object."}, | ||||
|     {"is_done", (PyCFunction) Parser_is_done, METH_NOARGS, | ||||
|      "Whether the parser has finished decoding an object."}, | ||||
|     {"finish", (PyCFunction) Parser_finish, METH_NOARGS, | ||||
|      "Finish parsing and return Python object parsed."}, | ||||
|     {NULL}, | ||||
| }; | ||||
|  | ||||
| static PyTypeObject json_ParserType = { | ||||
|     PyVarObject_HEAD_INIT(NULL, 0) | ||||
|         "ovs._json.Parser",     /* tp_name */ | ||||
|     sizeof (json_ParserObject), /* tp_basicsize */ | ||||
|     0,                          /* tp_itemsize */ | ||||
|     (destructor) Parser_dealloc,        /* tp_dealloc */ | ||||
|     0,                          /* tp_print */ | ||||
|     0,                          /* tp_getattr */ | ||||
|     0,                          /* tp_setattr */ | ||||
|     0,                          /* tp_compare */ | ||||
|     0,                          /* tp_repr */ | ||||
|     0,                          /* tp_as_number */ | ||||
|     0,                          /* tp_as_sequence */ | ||||
|     0,                          /* tp_as_mapping */ | ||||
|     0,                          /* tp_hash */ | ||||
|     0,                          /* tp_call */ | ||||
|     0,                          /* tp_str */ | ||||
|     0,                          /* tp_getattro */ | ||||
|     0,                          /* tp_setattro */ | ||||
|     0,                          /* tp_as_buffer */ | ||||
|     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,   /* tp_flags */ | ||||
|     "Parser objects",           /* tp_doc */ | ||||
|     0,                          /* tp_traverse */ | ||||
|     0,                          /* tp_clear */ | ||||
|     0,                          /* tp_richcompare */ | ||||
|     0,                          /* tp_weaklistoffset */ | ||||
|     0,                          /* tp_iter */ | ||||
|     0,                          /* tp_iternext */ | ||||
|     Parser_methods,             /* tp_methods */ | ||||
|     0,                          /* tp_members */ | ||||
|     0,                          /* tp_getset */ | ||||
|     0,                          /* tp_base */ | ||||
|     0,                          /* tp_dict */ | ||||
|     0,                          /* tp_descr_get */ | ||||
|     0,                          /* tp_descr_set */ | ||||
|     0,                          /* tp_dictoffset */ | ||||
|     0,                          /* tp_init */ | ||||
|     0,                          /* tp_alloc */ | ||||
|     Parser_new,                 /* tp_new */ | ||||
| }; | ||||
|  | ||||
| #ifdef IS_PY3K | ||||
| static struct PyModuleDef moduledef = { | ||||
|     PyModuleDef_HEAD_INIT, | ||||
|     "ovs._json",                /* m_name */ | ||||
|     "OVS JSON Parser module",   /* m_doc */ | ||||
|     0,                          /* m_size */ | ||||
|     0,                          /* m_methods */ | ||||
|     0,                          /* m_slots */ | ||||
|     0,                          /* m_traverse */ | ||||
|     0,                          /* m_clear */ | ||||
|     0,                          /* m_free */ | ||||
| }; | ||||
|  | ||||
| #define INITERROR return NULL | ||||
| #else /* !IS_PY3K */ | ||||
| #define INITERROR return | ||||
| #endif | ||||
|  | ||||
| PyMODINIT_FUNC | ||||
| #ifdef IS_PY3K | ||||
| PyInit__json(void) | ||||
| #else | ||||
| init_json(void) | ||||
| #endif | ||||
| { | ||||
|     PyObject *m; | ||||
|  | ||||
|     if (PyType_Ready(&json_ParserType) < 0) { | ||||
|         INITERROR; | ||||
|     } | ||||
| #ifdef IS_PY3K | ||||
|     m = PyModule_Create(&moduledef); | ||||
| #else | ||||
|     m = Py_InitModule3("ovs._json", NULL, "OVS JSON Parser module"); | ||||
| #endif | ||||
|  | ||||
|     Py_INCREF(&json_ParserType); | ||||
|     PyModule_AddObject(m, "Parser", (PyObject *) & json_ParserType); | ||||
| #ifdef IS_PY3K | ||||
|     return m; | ||||
| #endif | ||||
| } | ||||
| @@ -18,6 +18,11 @@ import sys | ||||
| import six | ||||
| from six.moves import range | ||||
|  | ||||
| try: | ||||
|     import ovs._json | ||||
| except ImportError: | ||||
|     pass | ||||
|  | ||||
| __pychecker__ = 'no-stringiter' | ||||
|  | ||||
| escapes = {ord('"'): u"\\\"", | ||||
| @@ -165,6 +170,12 @@ class Parser(object): | ||||
|     # Maximum height of parsing stack. # | ||||
|     MAX_HEIGHT = 1000 | ||||
|  | ||||
|     def __new__(cls, *args, **kwargs): | ||||
|         try: | ||||
|             return ovs._json.Parser(*args, **kwargs) | ||||
|         except NameError: | ||||
|             return super(Parser, cls).__new__(cls) | ||||
|  | ||||
|     def __init__(self, check_trailer=False): | ||||
|         self.check_trailer = check_trailer | ||||
|  | ||||
|   | ||||
| @@ -13,6 +13,10 @@ | ||||
| from __future__ import print_function | ||||
| import sys | ||||
|  | ||||
| from distutils.command.build_ext import build_ext | ||||
| from distutils.errors import CCompilerError, DistutilsExecError, \ | ||||
|     DistutilsPlatformError | ||||
|  | ||||
| import setuptools | ||||
|  | ||||
| VERSION = "unknown" | ||||
| @@ -25,8 +29,33 @@ except IOError: | ||||
|           file=sys.stderr) | ||||
|     sys.exit(-1) | ||||
|  | ||||
| ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) | ||||
| if sys.platform == 'win32': | ||||
|     ext_errors += (IOError, ValueError) | ||||
|  | ||||
| setuptools.setup( | ||||
|  | ||||
| class BuildFailed(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class try_build_ext(build_ext): | ||||
|     # This class allows C extension building to fail | ||||
|     # NOTE: build_ext is not a new-style class | ||||
|  | ||||
|     def run(self): | ||||
|         try: | ||||
|             build_ext.run(self) | ||||
|         except DistutilsPlatformError: | ||||
|             raise BuildFailed() | ||||
|  | ||||
|     def build_extension(self, ext): | ||||
|         try: | ||||
|             build_ext.build_extension(self, ext) | ||||
|         except ext_errors: | ||||
|             raise BuildFailed() | ||||
|  | ||||
|  | ||||
| setup_args = dict( | ||||
|     name='ovs', | ||||
|     description='Open vSwitch library', | ||||
|     version=VERSION, | ||||
| @@ -46,5 +75,23 @@ setuptools.setup( | ||||
|         'Programming Language :: Python :: 2.7', | ||||
|         'Programming Language :: Python :: 3', | ||||
|         'Programming Language :: Python :: 3.4', | ||||
|     ] | ||||
|     ], | ||||
|     ext_modules=[setuptools.Extension("ovs._json", sources=["ovs/_json.c"], | ||||
|                                       libraries=['openvswitch'])], | ||||
|     cmdclass={'build_ext': try_build_ext}, | ||||
| ) | ||||
|  | ||||
| try: | ||||
|     setuptools.setup(**setup_args) | ||||
| except BuildFailed: | ||||
|     BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, " | ||||
|                          "speedups are not enabled.") | ||||
|     print("*" * 75) | ||||
|     print(BUILD_EXT_WARNING) | ||||
|     print("Failure information, if any, is above.") | ||||
|     print("Retrying the build without the C extension.") | ||||
|     print("*" * 75) | ||||
|  | ||||
|     del(setup_args['cmdclass']) | ||||
|     del(setup_args['ext_modules']) | ||||
|     setuptools.setup(**setup_args) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user