123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- #include "cm_codecvt.hxx"
- #if defined(_WIN32)
- #include <assert.h>
- #include <string.h>
- #include <windows.h>
- #undef max
- #include "cmsys/Encoding.hxx"
- #endif
- #if defined(_WIN32)
- extern "C" unsigned char const cm_utf8_ones[256];
- #endif
- codecvt::codecvt(Encoding e)
- #if defined(_WIN32)
- : m_codepage(0)
- #endif
- {
- switch (e) {
- case codecvt::ANSI:
- #if defined(_WIN32)
- m_noconv = false;
- m_codepage = CP_ACP;
- break;
- #endif
-
-
- case codecvt::UTF8:
-
- case codecvt::None:
-
- default:
- m_noconv = true;
- }
- }
- codecvt::~codecvt()
- {
- }
- bool codecvt::do_always_noconv() const throw()
- {
- return m_noconv;
- }
- std::codecvt_base::result codecvt::do_out(mbstate_t& state, const char* from,
- const char* from_end,
- const char*& from_next, char* to,
- char* to_end, char*& to_next) const
- {
- from_next = from;
- to_next = to;
- if (m_noconv) {
- return std::codecvt_base::noconv;
- }
- #if defined(_WIN32)
-
-
-
- State const& lstate = reinterpret_cast<State&>(state);
- while (from_next != from_end) {
-
- unsigned char const ones =
- cm_utf8_ones[static_cast<unsigned char>(*from_next)];
- if (ones != 1 && lstate.buffered != 0) {
-
- return std::codecvt_base::error;
- } else if (ones == 1 && lstate.buffered == 0) {
-
- return std::codecvt_base::error;
- }
-
- int need = 0;
- switch (ones) {
- case 0:
- need = 1;
- break;
- case 1:
- assert(lstate.size != 0);
- need = lstate.size;
- break;
- case 2:
- need = 2;
- break;
- case 3:
- need = 3;
- break;
- case 4:
- need = 4;
- break;
- default:
- return std::codecvt_base::error;
- }
- assert(need > 0);
- if (lstate.buffered + 1 == need) {
-
- std::codecvt_base::result decode_result =
- this->Decode(state, need, from_next, to_next, to_end);
- if (decode_result != std::codecvt_base::ok) {
- return decode_result;
- }
- } else {
-
- this->BufferPartial(state, need, from_next);
- }
- }
- return std::codecvt_base::ok;
- #else
- static_cast<void>(state);
- static_cast<void>(from);
- static_cast<void>(from_end);
- static_cast<void>(from_next);
- static_cast<void>(to);
- static_cast<void>(to_end);
- static_cast<void>(to_next);
- return std::codecvt_base::noconv;
- #endif
- }
- std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,
- char* to_end,
- char*& to_next) const
- {
- to_next = to;
- if (m_noconv) {
- return std::codecvt_base::noconv;
- }
- #if defined(_WIN32)
- State& lstate = reinterpret_cast<State&>(state);
- if (lstate.buffered != 0) {
- return this->DecodePartial(state, to_next, to_end);
- }
- return std::codecvt_base::ok;
- #else
- static_cast<void>(state);
- static_cast<void>(to_end);
- return std::codecvt_base::ok;
- #endif
- }
- #if defined(_WIN32)
- std::codecvt_base::result codecvt::Decode(mbstate_t& state, int size,
- const char*& from_next,
- char*& to_next, char* to_end) const
- {
- State& lstate = reinterpret_cast<State&>(state);
-
- char buf[4];
- memcpy(buf, lstate.partial, lstate.buffered);
- buf[lstate.buffered] = *from_next;
-
- wchar_t wbuf[2];
- int wlen =
- MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, size, wbuf, 2);
- if (wlen <= 0) {
- return std::codecvt_base::error;
- }
- int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
- to_end - to_next, NULL, NULL);
- if (tlen <= 0) {
- if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
- return std::codecvt_base::partial;
- }
- return std::codecvt_base::error;
- }
-
- ++from_next;
-
- to_next += tlen;
-
- lstate = State();
- return std::codecvt_base::ok;
- }
- std::codecvt_base::result codecvt::DecodePartial(mbstate_t& state,
- char*& to_next,
- char* to_end) const
- {
- State& lstate = reinterpret_cast<State&>(state);
-
- wchar_t wbuf[2];
- int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lstate.partial,
- lstate.buffered, wbuf, 2);
- if (wlen <= 0) {
- return std::codecvt_base::error;
- }
- int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
- to_end - to_next, NULL, NULL);
- if (tlen <= 0) {
- if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
- return std::codecvt_base::partial;
- }
- return std::codecvt_base::error;
- }
-
- to_next += tlen;
-
- lstate = State();
- return std::codecvt_base::ok;
- }
- void codecvt::BufferPartial(mbstate_t& state, int size,
- const char*& from_next) const
- {
- State& lstate = reinterpret_cast<State&>(state);
-
- lstate.partial[lstate.buffered++] = *from_next;
- lstate.size = size;
-
- ++from_next;
- }
- #endif
- int codecvt::do_max_length() const throw()
- {
- return 4;
- }
- int codecvt::do_encoding() const throw()
- {
- return 0;
- }
|