ICU 66.1 66.1
utext.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2004-2012, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: utext.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2004oct06
16* created by: Markus W. Scherer
17*/
18
19#ifndef __UTEXT_H__
20#define __UTEXT_H__
21
140#include "unicode/utypes.h"
141#include "unicode/uchar.h"
142#if U_SHOW_CPLUSPLUS_API
143#include "unicode/localpointer.h"
144#include "unicode/rep.h"
145#include "unicode/unistr.h"
146#include "unicode/chariter.h"
147#endif
148
149
151
152struct UText;
153typedef struct UText UText;
156/***************************************************************************************
157 *
158 * C Functions for creating UText wrappers around various kinds of text strings.
159 *
160 ****************************************************************************************/
161
162
183U_STABLE UText * U_EXPORT2
185
186#if U_SHOW_CPLUSPLUS_API
187
188U_NAMESPACE_BEGIN
189
200
201U_NAMESPACE_END
202
203#endif
204
226U_STABLE UText * U_EXPORT2
227utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
228
229
244U_STABLE UText * U_EXPORT2
245utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
246
247
248#if U_SHOW_CPLUSPLUS_API
261U_STABLE UText * U_EXPORT2
263
264
277U_STABLE UText * U_EXPORT2
279
280
293U_STABLE UText * U_EXPORT2
295
308U_STABLE UText * U_EXPORT2
310
311#endif
312
313
371U_STABLE UText * U_EXPORT2
372utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
373
374
386U_STABLE UBool U_EXPORT2
387utext_equals(const UText *a, const UText *b);
388
389
390/*****************************************************************************
391 *
392 * Functions to work with the text represented by a UText wrapper
393 *
394 *****************************************************************************/
395
407U_STABLE int64_t U_EXPORT2
409
423U_STABLE UBool U_EXPORT2
425
451U_STABLE UChar32 U_EXPORT2
452utext_char32At(UText *ut, int64_t nativeIndex);
453
454
465U_STABLE UChar32 U_EXPORT2
467
468
487U_STABLE UChar32 U_EXPORT2
489
490
508U_STABLE UChar32 U_EXPORT2
510
511
530U_STABLE UChar32 U_EXPORT2
531utext_next32From(UText *ut, int64_t nativeIndex);
532
533
534
550U_STABLE UChar32 U_EXPORT2
551utext_previous32From(UText *ut, int64_t nativeIndex);
552
565U_STABLE int64_t U_EXPORT2
567
591U_STABLE void U_EXPORT2
592utext_setNativeIndex(UText *ut, int64_t nativeIndex);
593
610U_STABLE UBool U_EXPORT2
611utext_moveIndex32(UText *ut, int32_t delta);
612
635U_STABLE int64_t U_EXPORT2
637
638
673U_STABLE int32_t U_EXPORT2
675 int64_t nativeStart, int64_t nativeLimit,
676 UChar *dest, int32_t destCapacity,
677 UErrorCode *status);
678
679
680
681/************************************************************************************
682 *
683 * #define inline versions of selected performance-critical text access functions
684 * Caution: do not use auto increment++ or decrement-- expressions
685 * as parameters to these macros.
686 *
687 * For most use, where there is no extreme performance constraint, the
688 * normal, non-inline functions are a better choice. The resulting code
689 * will be smaller, and, if the need ever arises, easier to debug.
690 *
691 * These are implemented as #defines rather than real functions
692 * because there is no fully portable way to do inline functions in plain C.
693 *
694 ************************************************************************************/
695
696#ifndef U_HIDE_INTERNAL_API
706#define UTEXT_CURRENT32(ut) \
707 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
708 ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
709#endif /* U_HIDE_INTERNAL_API */
710
722#define UTEXT_NEXT32(ut) \
723 ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
724 ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
725
736#define UTEXT_PREVIOUS32(ut) \
737 ((ut)->chunkOffset > 0 && \
738 (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
739 (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
740
753#define UTEXT_GETNATIVEINDEX(ut) \
754 ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
755 (ut)->chunkNativeStart+(ut)->chunkOffset : \
756 (ut)->pFuncs->mapOffsetToNative(ut))
757
769#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
770 int64_t __offset = (ix) - (ut)->chunkNativeStart; \
771 if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
772 (ut)->chunkOffset=(int32_t)__offset; \
773 } else { \
774 utext_setNativeIndex((ut), (ix)); \
775 } \
776} UPRV_BLOCK_MACRO_END
777
778
779
780/************************************************************************************
781 *
782 * Functions related to writing or modifying the text.
783 * These will work only with modifiable UTexts. Attempting to
784 * modify a read-only UText will return an error status.
785 *
786 ************************************************************************************/
787
788
807U_STABLE UBool U_EXPORT2
809
810
819U_STABLE UBool U_EXPORT2
821
822
850U_STABLE int32_t U_EXPORT2
852 int64_t nativeStart, int64_t nativeLimit,
853 const UChar *replacementText, int32_t replacementLength,
854 UErrorCode *status);
855
856
857
890U_STABLE void U_EXPORT2
892 int64_t nativeStart, int64_t nativeLimit,
893 int64_t destIndex,
894 UBool move,
895 UErrorCode *status);
896
897
919U_STABLE void U_EXPORT2
921
922
929enum {
964
1002typedef UText * U_CALLCONV
1003UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1004
1005
1014typedef int64_t U_CALLCONV
1016
1042typedef UBool U_CALLCONV
1043UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1044
1072typedef int32_t U_CALLCONV
1074 int64_t nativeStart, int64_t nativeLimit,
1075 UChar *dest, int32_t destCapacity,
1076 UErrorCode *status);
1077
1107typedef int32_t U_CALLCONV
1109 int64_t nativeStart, int64_t nativeLimit,
1110 const UChar *replacementText, int32_t replacmentLength,
1111 UErrorCode *status);
1112
1141typedef void U_CALLCONV
1143 int64_t nativeStart, int64_t nativeLimit,
1144 int64_t nativeDest,
1145 UBool move,
1146 UErrorCode *status);
1147
1161typedef int64_t U_CALLCONV
1163
1179typedef int32_t U_CALLCONV
1180UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1181
1182
1200typedef void U_CALLCONV
1202
1203
1228 int32_t tableSize;
1229
1236
1237
1245
1254
1262
1270
1278
1286
1294
1302
1310
1316
1322
1328
1329};
1334typedef struct UTextFuncs UTextFuncs;
1335
1347struct UText {
1360 uint32_t magic;
1361
1362
1368 int32_t flags;
1369
1370
1377
1385
1386 /* ------ 16 byte alignment boundary ----------- */
1387
1388
1395
1400 int32_t extraSize;
1401
1410
1411 /* ---- 16 byte alignment boundary------ */
1412
1418
1425
1431
1432 /* ---- 16 byte alignment boundary-- */
1433
1434
1442
1448
1454 void *pExtra;
1455
1462 const void *context;
1463
1464 /* --- 16 byte alignment boundary--- */
1465
1471 const void *p;
1477 const void *q;
1483 const void *r;
1484
1490 void *privP;
1491
1492
1493 /* --- 16 byte alignment boundary--- */
1494
1495
1501 int64_t a;
1502
1508 int32_t b;
1509
1515 int32_t c;
1516
1517 /* ---- 16 byte alignment boundary---- */
1518
1519
1525 int64_t privA;
1531 int32_t privB;
1537 int32_t privC;
1538};
1539
1540
1557U_STABLE UText * U_EXPORT2
1558utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1559
1560// do not use #ifndef U_HIDE_INTERNAL_API around the following!
1566enum {
1567 UTEXT_MAGIC = 0x345ad82c
1568};
1569
1577#define UTEXT_INITIALIZER { \
1578 UTEXT_MAGIC, /* magic */ \
1579 0, /* flags */ \
1580 0, /* providerProps */ \
1581 sizeof(UText), /* sizeOfStruct */ \
1582 0, /* chunkNativeLimit */ \
1583 0, /* extraSize */ \
1584 0, /* nativeIndexingLimit */ \
1585 0, /* chunkNativeStart */ \
1586 0, /* chunkOffset */ \
1587 0, /* chunkLength */ \
1588 NULL, /* chunkContents */ \
1589 NULL, /* pFuncs */ \
1590 NULL, /* pExtra */ \
1591 NULL, /* context */ \
1592 NULL, NULL, NULL, /* p, q, r */ \
1593 NULL, /* privP */ \
1594 0, 0, 0, /* a, b, c */ \
1595 0, 0, 0 /* privA,B,C, */ \
1596 }
1597
1598
1600
1601
1602
1603#endif
C++ API: Character Iterator.
"Smart pointer" class, closes a UText via utext_close().
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
C++ API: Replaceable String.
(public) Function dispatch table for UText.
Definition: utext.h:1213
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1235
int32_t reserved2
Definition: utext.h:1235
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1293
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1261
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1269
int32_t reserved3
Definition: utext.h:1235
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1327
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accommodat...
Definition: utext.h:1228
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1253
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1277
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1321
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1315
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1285
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1301
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1309
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1244
UText struct.
Definition: utext.h:1347
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1508
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1424
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1471
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1400
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1525
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond.
Definition: utext.h:1409
int32_t flags
Definition: utext.h:1368
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1384
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1454
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1394
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1483
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1531
uint32_t magic
Definition: utext.h:1360
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1447
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1430
int32_t providerProperties
Text provider properties.
Definition: utext.h:1376
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1441
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1537
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1477
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1490
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1417
const void * context
(protected) Pointer to string or text-containing object or similar.
Definition: utext.h:1462
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1515
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1501
C API: Unicode Properties.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
C++ API: Unicode String.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1108
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
void utext_freeze(UText *ut)
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:1003
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceding the current position.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:1015
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1073
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1201
UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
UText * utext_close(UText *ut)
Close function for UText instances.
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index.
@ UTEXT_PROVIDER_HAS_META_DATA
There is meta data associated with the text.
Definition: utext.h:954
@ UTEXT_PROVIDER_STABLE_CHUNKS
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:941
@ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:934
@ UTEXT_PROVIDER_OWNS_TEXT
Text provider owns the text storage.
Definition: utext.h:962
@ UTEXT_PROVIDER_WRITABLE
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:948
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1043
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1162
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1180
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1142
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position,...
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator position by delta code points.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415