ucnvsel.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. /*
  2. *******************************************************************************
  3. *
  4. * Copyright (C) 2008-2011, International Business Machines
  5. * Corporation, Google and others. All Rights Reserved.
  6. *
  7. *******************************************************************************
  8. */
  9. /*
  10. * Author : eldawy@google.com (Mohamed Eldawy)
  11. * ucnvsel.h
  12. *
  13. * Purpose: To generate a list of encodings capable of handling
  14. * a given Unicode text
  15. *
  16. * Started 09-April-2008
  17. */
  18. #ifndef __ICU_UCNV_SEL_H__
  19. #define __ICU_UCNV_SEL_H__
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_CONVERSION
  22. #include "unicode/uset.h"
  23. #include "unicode/utf16.h"
  24. #include "unicode/uenum.h"
  25. #include "unicode/ucnv.h"
  26. #include "unicode/localpointer.h"
  27. /**
  28. * \file
  29. *
  30. * A converter selector is built with a set of encoding/charset names
  31. * and given an input string returns the set of names of the
  32. * corresponding converters which can convert the string.
  33. *
  34. * A converter selector can be serialized into a buffer and reopened
  35. * from the serialized form.
  36. */
  37. /**
  38. * @{
  39. * The selector data structure
  40. */
  41. struct UConverterSelector;
  42. typedef struct UConverterSelector UConverterSelector;
  43. /** @} */
  44. /**
  45. * Open a selector.
  46. * If converterListSize is 0, build for all available converters.
  47. * If excludedCodePoints is NULL, don't exclude any code points.
  48. *
  49. * @param converterList a pointer to encoding names needed to be involved.
  50. * Can be NULL if converterListSize==0.
  51. * The list and the names will be cloned, and the caller
  52. * retains ownership of the original.
  53. * @param converterListSize number of encodings in above list.
  54. * If 0, builds a selector for all available converters.
  55. * @param excludedCodePoints a set of code points to be excluded from consideration.
  56. * That is, excluded code points in a string do not change
  57. * the selection result. (They might be handled by a callback.)
  58. * Use NULL to exclude nothing.
  59. * @param whichSet what converter set to use? Use this to determine whether
  60. * to consider only roundtrip mappings or also fallbacks.
  61. * @param status an in/out ICU UErrorCode
  62. * @return the new selector
  63. *
  64. * @stable ICU 4.2
  65. */
  66. U_STABLE UConverterSelector* U_EXPORT2
  67. ucnvsel_open(const char* const* converterList, int32_t converterListSize,
  68. const USet* excludedCodePoints,
  69. const UConverterUnicodeSet whichSet, UErrorCode* status);
  70. /**
  71. * Closes a selector.
  72. * If any Enumerations were returned by ucnv_select*, they become invalid.
  73. * They can be closed before or after calling ucnv_closeSelector,
  74. * but should never be used after the selector is closed.
  75. *
  76. * @see ucnv_selectForString
  77. * @see ucnv_selectForUTF8
  78. *
  79. * @param sel selector to close
  80. *
  81. * @stable ICU 4.2
  82. */
  83. U_STABLE void U_EXPORT2
  84. ucnvsel_close(UConverterSelector *sel);
  85. #if U_SHOW_CPLUSPLUS_API
  86. U_NAMESPACE_BEGIN
  87. /**
  88. * \class LocalUConverterSelectorPointer
  89. * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
  90. * For most methods see the LocalPointerBase base class.
  91. *
  92. * @see LocalPointerBase
  93. * @see LocalPointer
  94. * @stable ICU 4.4
  95. */
  96. U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
  97. U_NAMESPACE_END
  98. #endif
  99. /**
  100. * Open a selector from its serialized form.
  101. * The buffer must remain valid and unchanged for the lifetime of the selector.
  102. * This is much faster than creating a selector from scratch.
  103. * Using a serialized form from a different machine (endianness/charset) is supported.
  104. *
  105. * @param buffer pointer to the serialized form of a converter selector;
  106. * must be 32-bit-aligned
  107. * @param length the capacity of this buffer (can be equal to or larger than
  108. * the actual data length)
  109. * @param status an in/out ICU UErrorCode
  110. * @return the new selector
  111. *
  112. * @stable ICU 4.2
  113. */
  114. U_STABLE UConverterSelector* U_EXPORT2
  115. ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
  116. /**
  117. * Serialize a selector into a linear buffer.
  118. * The serialized form is portable to different machines.
  119. *
  120. * @param sel selector to consider
  121. * @param buffer pointer to 32-bit-aligned memory to be filled with the
  122. * serialized form of this converter selector
  123. * @param bufferCapacity the capacity of this buffer
  124. * @param status an in/out ICU UErrorCode
  125. * @return the required buffer capacity to hold serialize data (even if the call fails
  126. * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
  127. *
  128. * @stable ICU 4.2
  129. */
  130. U_STABLE int32_t U_EXPORT2
  131. ucnvsel_serialize(const UConverterSelector* sel,
  132. void* buffer, int32_t bufferCapacity, UErrorCode* status);
  133. /**
  134. * Select converters that can map all characters in a UTF-16 string,
  135. * ignoring the excluded code points.
  136. *
  137. * @param sel a selector
  138. * @param s UTF-16 string
  139. * @param length length of the string, or -1 if NUL-terminated
  140. * @param status an in/out ICU UErrorCode
  141. * @return an enumeration containing encoding names.
  142. * The returned encoding names and their order will be the same as
  143. * supplied when building the selector.
  144. *
  145. * @stable ICU 4.2
  146. */
  147. U_STABLE UEnumeration * U_EXPORT2
  148. ucnvsel_selectForString(const UConverterSelector* sel,
  149. const UChar *s, int32_t length, UErrorCode *status);
  150. /**
  151. * Select converters that can map all characters in a UTF-8 string,
  152. * ignoring the excluded code points.
  153. *
  154. * @param sel a selector
  155. * @param s UTF-8 string
  156. * @param length length of the string, or -1 if NUL-terminated
  157. * @param status an in/out ICU UErrorCode
  158. * @return an enumeration containing encoding names.
  159. * The returned encoding names and their order will be the same as
  160. * supplied when building the selector.
  161. *
  162. * @stable ICU 4.2
  163. */
  164. U_STABLE UEnumeration * U_EXPORT2
  165. ucnvsel_selectForUTF8(const UConverterSelector* sel,
  166. const char *s, int32_t length, UErrorCode *status);
  167. #endif /* !UCONFIG_NO_CONVERSION */
  168. #endif /* __ICU_UCNV_SEL_H__ */