ICU 53.1  53.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tblcoll.h
Go to the documentation of this file.
1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7 
60 #ifndef TBLCOLL_H
61 #define TBLCOLL_H
62 
63 #include "unicode/utypes.h"
64 
65 #if !UCONFIG_NO_COLLATION
66 
67 #include "unicode/coll.h"
68 #include "unicode/locid.h"
69 #include "unicode/uiter.h"
70 #include "unicode/ucol.h"
71 
73 
74 struct CollationData;
75 struct CollationSettings;
76 struct CollationTailoring;
80 class StringSearch;
84 class CollationElementIterator;
85 class CollationKey;
86 class SortKeyByteSink;
87 class UnicodeSet;
88 class UnicodeString;
89 class UVector64;
90 
111 public:
121  RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
122 
133  RuleBasedCollator(const UnicodeString& rules,
134  ECollationStrength collationStrength,
135  UErrorCode& status);
136 
147  RuleBasedCollator(const UnicodeString& rules,
148  UColAttributeValue decompositionMode,
149  UErrorCode& status);
150 
162  RuleBasedCollator(const UnicodeString& rules,
163  ECollationStrength collationStrength,
164  UColAttributeValue decompositionMode,
165  UErrorCode& status);
166 
167 #ifndef U_HIDE_INTERNAL_API
168 
172  RuleBasedCollator(const UnicodeString &rules,
173  UParseError &parseError, UnicodeString &reason,
174  UErrorCode &errorCode);
175 #endif /* U_HIDE_INTERNAL_API */
176 
183  RuleBasedCollator(const RuleBasedCollator& other);
184 
185 
203  RuleBasedCollator(const uint8_t *bin, int32_t length,
204  const RuleBasedCollator *base,
205  UErrorCode &status);
206 
211  virtual ~RuleBasedCollator();
212 
218  RuleBasedCollator& operator=(const RuleBasedCollator& other);
219 
226  virtual UBool operator==(const Collator& other) const;
227 
233  virtual Collator* clone(void) const;
234 
245  virtual CollationElementIterator* createCollationElementIterator(
246  const UnicodeString& source) const;
247 
257  virtual CollationElementIterator* createCollationElementIterator(
258  const CharacterIterator& source) const;
259 
260  // Make deprecated versions of Collator::compare() visible.
261  using Collator::compare;
262 
275  virtual UCollationResult compare(const UnicodeString& source,
276  const UnicodeString& target,
277  UErrorCode &status) const;
278 
292  virtual UCollationResult compare(const UnicodeString& source,
293  const UnicodeString& target,
294  int32_t length,
295  UErrorCode &status) const;
296 
313  virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
314  const UChar* target, int32_t targetLength,
315  UErrorCode &status) const;
316 
328  virtual UCollationResult compare(UCharIterator &sIter,
329  UCharIterator &tIter,
330  UErrorCode &status) const;
331 
345  virtual UCollationResult compareUTF8(const StringPiece &source,
346  const StringPiece &target,
347  UErrorCode &status) const;
348 
361  virtual CollationKey& getCollationKey(const UnicodeString& source,
362  CollationKey& key,
363  UErrorCode& status) const;
364 
378  virtual CollationKey& getCollationKey(const UChar *source,
379  int32_t sourceLength,
380  CollationKey& key,
381  UErrorCode& status) const;
382 
388  virtual int32_t hashCode() const;
389 
400  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
401 
407  const UnicodeString& getRules() const;
408 
414  virtual void getVersion(UVersionInfo info) const;
415 
416 #ifndef U_HIDE_DEPRECATED_API
417 
433  int32_t getMaxExpansion(int32_t order) const;
434 #endif /* U_HIDE_DEPRECATED_API */
435 
446  virtual UClassID getDynamicClassID(void) const;
447 
459  static UClassID U_EXPORT2 getStaticClassID(void);
460 
461 #ifndef U_HIDE_DEPRECATED_API
462 
472  uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
473 #endif /* U_HIDE_DEPRECATED_API */
474 
485  int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
486 
498  void getRules(UColRuleOption delta, UnicodeString &buffer) const;
499 
507  virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
508  UErrorCode &status);
509 
518  UErrorCode &status) const;
519 
536  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
537 
544  virtual UColReorderCode getMaxVariable() const;
545 
562  virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
563 
579  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
580 
592  virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
593 
601  virtual uint32_t getVariableTop(UErrorCode &status) const;
602 
612  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
613 
624  virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
625  int32_t resultLength) const;
626 
639  virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
640  uint8_t *result, int32_t resultLength) const;
641 
655  virtual int32_t getReorderCodes(int32_t *dest,
656  int32_t destCapacity,
657  UErrorCode& status) const;
658 
669  virtual void setReorderCodes(const int32_t* reorderCodes,
670  int32_t reorderCodesLength,
671  UErrorCode& status) ;
672 
678  const char *left, int32_t leftLength,
679  const char *right, int32_t rightLength,
680  UErrorCode &errorCode) const;
681 
705  virtual int32_t internalGetShortDefinitionString(const char *locale,
706  char *buffer,
707  int32_t capacity,
708  UErrorCode &status) const;
709 
714  virtual int32_t internalNextSortKeyPart(
715  UCharIterator *iter, uint32_t state[2],
716  uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
717 
718 #ifndef U_HIDE_INTERNAL_API
719 
724 
731  const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
732 
745  void internalGetContractionsAndExpansions(
746  UnicodeSet *contractions, UnicodeSet *expansions,
747  UBool addPrefixes, UErrorCode &errorCode) const;
748 
754  void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
755 
760  void internalBuildTailoring(
761  const UnicodeString &rules,
762  int32_t strength,
763  UColAttributeValue decompositionMode,
764  UParseError *outParseError, UnicodeString *outReason,
765  UErrorCode &errorCode);
766 
769  return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
770  }
772  static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
773  return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
774  }
775 
780  void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
781 #endif // U_HIDE_INTERNAL_API
782 
783 protected:
791  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
792 
793 private:
794  friend class CollationElementIterator;
795  friend class Collator;
796 
797  RuleBasedCollator(const CollationTailoring *t, const Locale &vl);
798 
804  enum Attributes {
805  ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
806  ATTR_LIMIT
807  };
808 
809  void adoptTailoring(CollationTailoring *t);
810 
811  // Both lengths must be <0 or else both must be >=0.
812  UCollationResult doCompare(const UChar *left, int32_t leftLength,
813  const UChar *right, int32_t rightLength,
814  UErrorCode &errorCode) const;
815  UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
816  const uint8_t *right, int32_t rightLength,
817  UErrorCode &errorCode) const;
818 
819  void writeSortKey(const UChar *s, int32_t length,
820  SortKeyByteSink &sink, UErrorCode &errorCode) const;
821 
822  void writeIdenticalLevel(const UChar *s, const UChar *limit,
823  SortKeyByteSink &sink, UErrorCode &errorCode) const;
824 
825  const CollationSettings &getDefaultSettings() const;
826 
827  void setAttributeDefault(int32_t attribute) {
828  explicitlySetAttributes &= ~((uint32_t)1 << attribute);
829  }
830  void setAttributeExplicitly(int32_t attribute) {
831  explicitlySetAttributes |= (uint32_t)1 << attribute;
832  }
833  UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
834  // assert(0 <= attribute < ATTR_LIMIT);
835  return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
836  }
837 
845  UBool isUnsafe(UChar32 c) const;
846 
847  static void computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
848  UBool initMaxExpansions(UErrorCode &errorCode) const;
849 
850  void setFastLatinOptions(CollationSettings &ownedSettings) const;
851 
852  const CollationData *data;
853  const CollationSettings *settings; // reference-counted
854  const CollationTailoring *tailoring; // reference-counted
855  Locale validLocale;
856  uint32_t explicitlySetAttributes;
857 
858  UBool actualLocaleIsSameAsValid;
859 };
860 
862 
863 #endif // !UCONFIG_NO_COLLATION
864 #endif // TBLCOLL_H
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:57
virtual UBool operator==(const Collator &other) const
Returns TRUE if "other" is the same as "this".
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
virtual int32_t hashCode(void) const =0
Generates the hash code for the collation object.
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:171
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:71
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
C API for code unit iteration.
Definition: uiter.h:339
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:201
static const RuleBasedCollator * rbcFromUCollator(const UCollator *uc)
Definition: tblcoll.h:772
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes...
Definition: ucol.h:139
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:234
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:358
virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const =0
Universal attribute getter.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables...
Definition: tblcoll.h:110
C++ API: Collation Service.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
virtual Collator * clone(void) const =0
Makes a copy of this object.
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID POLYMORPHICALLY.
The CollationElementIterator class is used as an iterator to walk through each character of an intern...
Definition: coleitr.h:116
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:298
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
C API: Collator.
Collation keys are generated by the Collator class.
Definition: sortkey.h:97
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a UnicodeString.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:276
C API: Unicode Character Iteration.
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:278
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:336
static RuleBasedCollator * rbcFromUCollator(UCollator *uc)
Definition: tblcoll.h:768
C++ API: Locale ID object.
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:56
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1143
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:245
UColAttributeValue
Enum containing attribute values for controling collation behavior.
Definition: ucol.h:87
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:52
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:346
The number of UColAttribute constants.
Definition: ucol.h:340
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:185