|
libunibreak 6.1
|
Header file for the line breaking algorithm. More...
Go to the source code of this file.
Macros | |
| #define | LINEBREAK_MUSTBREAK 0 |
| Break is mandatory. | |
| #define | LINEBREAK_ALLOWBREAK 1 |
| Break is allowed. | |
| #define | LINEBREAK_NOBREAK 2 |
| No break is possible. | |
| #define | LINEBREAK_INSIDEACHAR 3 |
| A UTF-8/16 sequence is unfinished. | |
| #define | LINEBREAK_INDETERMINATE 4 |
| End of input on a non-EOL char. | |
Functions | |
| void | init_linebreak (void) |
| Does nothing. | |
| void | set_linebreaks_utf8 (const utf8_t *s, size_t len, const char *lang, char *brks) |
| Sets the line breaking information for a UTF-8 input string. | |
| void | set_linebreaks_utf16 (const utf16_t *s, size_t len, const char *lang, char *brks) |
| Sets the line breaking information for a UTF-16 input string. | |
| void | set_linebreaks_utf32 (const utf32_t *s, size_t len, const char *lang, char *brks) |
| Sets the line breaking information for a UTF-32 input string. | |
| size_t | set_linebreaks_utf8_per_code_point (const utf8_t *s, size_t len, const char *lang, char *brks) |
| Sets the line breaking information for a UTF-8 input string. | |
| size_t | set_linebreaks_utf16_per_code_point (const utf16_t *s, size_t len, const char *lang, char *brks) |
| Sets the line breaking information for a UTF-16 input string. | |
| int | is_line_breakable (utf32_t char1, utf32_t char2, const char *lang) |
| Tells whether a line break can occur between two Unicode characters. | |
Header file for the line breaking algorithm.
| #define LINEBREAK_ALLOWBREAK 1 |
Break is allowed.
| #define LINEBREAK_INDETERMINATE 4 |
End of input on a non-EOL char.
| #define LINEBREAK_INSIDEACHAR 3 |
A UTF-8/16 sequence is unfinished.
| #define LINEBREAK_MUSTBREAK 0 |
Break is mandatory.
| #define LINEBREAK_NOBREAK 2 |
No break is possible.
| void init_linebreak | ( | void | ) |
Does nothing.
This is kept for binary compatibility.
Tells whether a line break can occur between two Unicode characters.
This is a wrapper function to expose a simple interface. Generally speaking, it is better to use set_linebreaks_utf32 instead, since complicated cases involving combining marks, spaces, etc. cannot be correctly processed.
| char1 | the first Unicode character |
| char2 | the second Unicode character |
| lang | language of the input |
| void set_linebreaks_utf16 | ( | const utf16_t * | s, |
| size_t | len, | ||
| const char * | lang, | ||
| char * | brks ) |
Sets the line breaking information for a UTF-16 input string.
| [in] | s | input UTF-16 string |
| [in] | len | length of the input |
| [in] | lang | language of the input |
| [out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
| size_t set_linebreaks_utf16_per_code_point | ( | const utf16_t * | s, |
| size_t | len, | ||
| const char * | lang, | ||
| char * | brks ) |
Sets the line breaking information for a UTF-16 input string.
| [in] | s | input UTF-16 string |
| [in] | len | length of the input |
| [in] | lang | language of the input |
| [out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK |
| void set_linebreaks_utf32 | ( | const utf32_t * | s, |
| size_t | len, | ||
| const char * | lang, | ||
| char * | brks ) |
Sets the line breaking information for a UTF-32 input string.
| [in] | s | input UTF-32 string |
| [in] | len | length of the input |
| [in] | lang | language of the input |
| [out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
| void set_linebreaks_utf8 | ( | const utf8_t * | s, |
| size_t | len, | ||
| const char * | lang, | ||
| char * | brks ) |
Sets the line breaking information for a UTF-8 input string.
| [in] | s | input UTF-8 string |
| [in] | len | length of the input |
| [in] | lang | language of the input |
| [out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
| size_t set_linebreaks_utf8_per_code_point | ( | const utf8_t * | s, |
| size_t | len, | ||
| const char * | lang, | ||
| char * | brks ) |
Sets the line breaking information for a UTF-8 input string.
| [in] | s | input UTF-8 string |
| [in] | len | length of the input |
| [in] | lang | language of the input |
| [out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK |