2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-24 11:08:45 +00:00
bind/lib/isc/utf8.c
Ondřej Surý 58bd26b6cf Update the copyright information in all files in the repository
This commit converts the license handling to adhere to the REUSE
specification.  It specifically:

1. Adds used licnses to LICENSES/ directory

2. Add "isc" template for adding the copyright boilerplate

3. Changes all source files to include copyright and SPDX license
   header, this includes all the C sources, documentation, zone files,
   configuration files.  There are notes in the doc/dev/copyrights file
   on how to add correct headers to the new files.

4. Handle the rest that can't be modified via .reuse/dep5 file.  The
   binary (or otherwise unmodifiable) files could have license places
   next to them in <foo>.license file, but this would lead to cluttered
   repository and most of the files handled in the .reuse/dep5 file are
   system test files.
2022-01-11 09:05:02 +01:00

89 lines
2.2 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <string.h>
#include <isc/utf8.h>
#include <isc/util.h>
/*
* UTF-8 is defined in "The Unicode Standard -- Version 4.0"
* Also see RFC 3629.
*
* Char. number range | UTF-8 octet sequence
* (hexadecimal) | (binary)
* --------------------+---------------------------------------------
* 0000 0000-0000 007F | 0xxxxxxx
* 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
* 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
* 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
bool
isc_utf8_valid(const unsigned char *buf, size_t len) {
REQUIRE(buf != NULL);
for (size_t i = 0; i < len; i++) {
if (buf[i] <= 0x7f) {
continue;
}
if ((i + 1) < len && (buf[i] & 0xe0) == 0xc0 &&
(buf[i + 1] & 0xc0) == 0x80) {
unsigned int w;
w = (buf[i] & 0x1f) << 6;
w |= (buf[++i] & 0x3f);
if (w < 0x80) {
return (false);
}
continue;
}
if ((i + 2) < len && (buf[i] & 0xf0) == 0xe0 &&
(buf[i + 1] & 0xc0) == 0x80 && (buf[i + 2] & 0xc0) == 0x80)
{
unsigned int w;
w = (buf[i] & 0x0f) << 12;
w |= (buf[++i] & 0x3f) << 6;
w |= (buf[++i] & 0x3f);
if (w < 0x0800) {
return (false);
}
continue;
}
if ((i + 3) < len && (buf[i] & 0xf8) == 0xf0 &&
(buf[i + 1] & 0xc0) == 0x80 &&
(buf[i + 2] & 0xc0) == 0x80 && (buf[i + 3] & 0xc0) == 0x80)
{
unsigned int w;
w = (buf[i] & 0x07) << 18;
w |= (buf[++i] & 0x3f) << 12;
w |= (buf[++i] & 0x3f) << 6;
w |= (buf[++i] & 0x3f);
if (w < 0x10000 || w > 0x10FFFF) {
return (false);
}
continue;
}
return (false);
}
return (true);
}
bool
isc_utf8_bom(const unsigned char *buf, size_t len) {
REQUIRE(buf != NULL);
if (len >= 3U && !memcmp(buf, "\xef\xbb\xbf", 3)) {
return (true);
}
return (false);
}