Functions
constexpr auto	isHighSurrogate (char16_t codeUnit) noexcept -> bool
	Determines if a UTF-16 code unit is a high surrogate.

constexpr auto	isLowSurrogate (char16_t codeUnit) noexcept -> bool
	Determines if a UTF-16 code unit is a low surrogate.

constexpr auto	isSurrogate (char16_t codeUnit) noexcept -> bool
	Determines if a UTF-16 code unit is a high or low surrogate.

constexpr auto	combineSurrogates (char16_t high, char16_t low) noexcept -> char32_t
	Combines a high and a low surrogate into a single code point.

constexpr auto	makeSurrogatePair (char32_t codePoint) noexcept -> std::array< char16_t, 2 >
	Splits a code point into high and low surrogates.

constexpr auto	isBmp (char32_t codePoint) noexcept -> bool
	Determines if a code point lies in the basic multilingual plane.

constexpr auto	isOutOfRange (char32_t codePoint) noexcept -> bool
	Determines if a code point is outside the valid range for Unicode scalar values.

constexpr auto	isReserved (char32_t codeUnit) noexcept -> bool
	Determines if a UTF-32 code unit is reserved for a high or a low surrogate.

constexpr auto	isIllegal (char32_t codePoint) noexcept -> bool
	Determines if a code point is not a legal Unicode scalar value.

constexpr auto	replacementCharacter () noexcept -> char32_t
	Returns the replacement character.

constexpr auto	isSingleByteUtf8 (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a single UTF-8 code unit.

constexpr auto	isTwoByteUtf8 (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a sequence of two UTF-8 code units.

constexpr auto	isThreeByteUtf8 (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a sequence of three UTF-8 code units.

constexpr auto	isFourByteUtf8 (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a sequence of four UTF-8 code units.

constexpr auto	isTwoByteUtf8OrLess (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a sequence of two UTF-8 code units or less.

constexpr auto	isThreeByteUtf8OrLess (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a sequence of three UTF-8 code units or less.

constexpr auto	isFourByteUtf8OrLess (char32_t codePoint) noexcept -> bool
	Determines if a code point encodes to a sequence of four UTF-8 code units or less.

constexpr auto	utf8Size (char32_t codePoint) noexcept -> std::size_t
	Determines how many UTF-8 code units are needed to encode a code point.

constexpr auto	utf16Size (char32_t codePoint) noexcept -> std::size_t
	Determines how many UTF-16 code units are needed to encode a code point.

Function Documentation

◆ combineSurrogates()

constexpr auto xentara::utils::string::unicode::combineSurrogates	(	char16_t	high,
		char16_t	low
	)		-> char32_t

constexprnoexcept

Combines a high and a low surrogate into a single code point.

Parameters

high	The high surrogate
low	The low surrogate

Returns: the combined code point

◆ isBmp()

constexpr auto xentara::utils::string::unicode::isBmp ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point lies in the basic multilingual plane.

The basic multilingual plane consists of all code points less than or equal to U+FFFF. These characters can be represented by a single UTF-16 code unit. Characters outside the basic multilingual plane must be split up into high and low surrogates.

Parameters

codePoint The code point

Returns: true if the code unit is in the basic multilingual plane, and can thus be reporesented by a single UTF-16 code unit.

◆ isFourByteUtf8()

constexpr auto xentara::utils::string::unicode::isFourByteUtf8 ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a sequence of four UTF-8 code units.

◆ isFourByteUtf8OrLess()

constexpr auto xentara::utils::string::unicode::isFourByteUtf8OrLess ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a sequence of four UTF-8 code units or less.

This function can be used insead of isTwoByteUtf8() if it is already known that the code unit is not a three byte sequence or less, e.g. in chained if statements.

Note: No legal Unicode code point encodes to more than four characters, so this function is equivalent to !isOutOfRange().

◆ isHighSurrogate()

constexpr auto xentara::utils::string::unicode::isHighSurrogate ( char16_t codeUnit ) -> bool

constexprnoexcept

Determines if a UTF-16 code unit is a high surrogate.

Parameters

codeUnit The code unit

Returns: true if the code unit is a high surrogate

◆ isIllegal()

constexpr auto xentara::utils::string::unicode::isIllegal ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point is not a legal Unicode scalar value.

Parameters

codePoint The code point

Returns: true if the code point is a high or low surrogate, or if it is out of range

◆ isLowSurrogate()

constexpr auto xentara::utils::string::unicode::isLowSurrogate ( char16_t codeUnit ) -> bool

constexprnoexcept

Determines if a UTF-16 code unit is a low surrogate.

Parameters

codeUnit The code unit

Returns: true if the code unit is a low surrogate

◆ isOutOfRange()

constexpr auto xentara::utils::string::unicode::isOutOfRange ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point is outside the valid range for Unicode scalar values.

Parameters

codePoint The code point

Returns: true if the code point is out of range (i.e. greater than U+10FFFF)

◆ isReserved()

constexpr auto xentara::utils::string::unicode::isReserved ( char32_t codeUnit ) -> bool

constexprnoexcept

Determines if a UTF-32 code unit is reserved for a high or a low surrogate.

Parameters

codeUnit The code unit

Returns: true if the code unit is a high surrogate or a low surrogate

◆ isSingleByteUtf8()

constexpr auto xentara::utils::string::unicode::isSingleByteUtf8 ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a single UTF-8 code unit.

◆ isSurrogate()

constexpr auto xentara::utils::string::unicode::isSurrogate ( char16_t codeUnit ) -> bool

constexprnoexcept

Determines if a UTF-16 code unit is a high or low surrogate.

Parameters

codeUnit The code unit

Returns: true if the code unit is a high surrogate or a low surrogate

◆ isThreeByteUtf8()

constexpr auto xentara::utils::string::unicode::isThreeByteUtf8 ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a sequence of three UTF-8 code units.

◆ isThreeByteUtf8OrLess()

constexpr auto xentara::utils::string::unicode::isThreeByteUtf8OrLess ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a sequence of three UTF-8 code units or less.

This function can be used insead of isTwoByteUtf8() if it is already known that the code unit is not a two byte sequence or less, e.g. in chained if statements.

◆ isTwoByteUtf8()

constexpr auto xentara::utils::string::unicode::isTwoByteUtf8 ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a sequence of two UTF-8 code units.

◆ isTwoByteUtf8OrLess()

constexpr auto xentara::utils::string::unicode::isTwoByteUtf8OrLess ( char32_t codePoint ) -> bool

constexprnoexcept

Determines if a code point encodes to a sequence of two UTF-8 code units or less.

This function can be used insead of isTwoByteUtf8() if it is already known that the code unit is not a single byte sequence, e.g. in chained if statements.

◆ makeSurrogatePair()

constexpr auto xentara::utils::string::unicode::makeSurrogatePair ( char32_t codePoint ) -> std::array<char16_t, 2>

constexprnoexcept

Splits a code point into high and low surrogates.

Parameters

codePoint The code point. Must be between U+10000 and U+10FFFF (supplementary planes).

Returns: The code point's high surrogate followed by the its low surrogate

◆ replacementCharacter()

constexpr auto xentara::utils::string::unicode::replacementCharacter ( ) -> char32_t

constexprnoexcept

Returns the replacement character.

Returns: The Unicode replacement character (U+FFFD)

◆ utf16Size()

constexpr auto xentara::utils::string::unicode::utf16Size ( char32_t codePoint ) -> std::size_t

constexprnoexcept

Determines how many UTF-16 code units are needed to encode a code point.

Parameters

codePoint The code point. Must be a legal Unicode scalar value (less than or equal to U+10FFFF, and not a surrogate).

Returns: the number of UTF-16 code units needed to represent the code point. The return value will always be 1 or 2, as all Unicode code points encode to either a single code unit, or a surrogate pair.

◆ utf8Size()

constexpr auto xentara::utils::string::unicode::utf8Size ( char32_t codePoint ) -> std::size_t

constexprnoexcept

Determines how many UTF-8 code units are needed to encode a code point.

Parameters

codePoint The code point. Must be a legal Unicode scalar value (less than or equal to U+10FFFF, and not a surrogate).

Returns: the number of UTF-8 code units needed to represent the code point. The return value will always be between 1 and 4, as all Unicode code points encode to between one and four bytes.

Functions

Function Documentation

◆ combineSurrogates()

◆ isBmp()

◆ isFourByteUtf8()

◆ isFourByteUtf8OrLess()

◆ isHighSurrogate()

◆ isIllegal()

◆ isLowSurrogate()

◆ isOutOfRange()

◆ isReserved()

◆ isSingleByteUtf8()

◆ isSurrogate()

◆ isThreeByteUtf8()

◆ isThreeByteUtf8OrLess()

◆ isTwoByteUtf8()

◆ isTwoByteUtf8OrLess()

◆ makeSurrogatePair()

◆ replacementCharacter()

◆ utf16Size()

◆ utf8Size()