2

So we have such function:

std::string url_encode_wstring(const std::wstring &input)
     {
         std::string output;
         int cbNeeded = WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, NULL, 0, NULL, NULL);
         if (cbNeeded > 0) {
             char *utf8 = new char[cbNeeded];
             if (WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, utf8, cbNeeded, NULL, NULL) != 0) {
                 for (char *p = utf8; *p; *p++) {
                     char onehex[5];
                     _snprintf(onehex, sizeof(onehex), "%%%02.2X", (unsigned char)*p);
                     output.append(onehex);
                 }
             }
             delete[] utf8;
         }
         return output;
     }

Its grate for windows but I wonder how (and is it possible) to make it work under linux?

1
  • 1
    Use wcstombs and mbstowcs. Take a look at this answer for some example code. Commented Aug 26, 2011 at 20:59

1 Answer 1

3

IMHO you should use a portable character codec library. Here's an example of minimal portable code using iconv, which should be more than enough. It's supposed to work on Windows (if it does, you can get rid of your windows-specific code altogether). I follow the GNU guidelines not to use the wcstombs & co functions ( https://www.gnu.org/s/hello/manual/libc/iconv-Examples.html ) Depending on the use case, handle errors appropriately... and to enhance performance, you can create a class out of it.

#include <iostream>

#include <iconv.h>
#include <cerrno>
#include <cstring>
#include <stdexcept>

std::string wstring_to_utf8_string(const std::wstring &input)
{
    size_t in_size = input.length() * sizeof(wchar_t);
    char * in_buf = (char*)input.data();
    size_t buf_size = input.length() * 6; // pessimistic: max UTF-8 char size
    char * buf = new char[buf_size];
    memset(buf, 0, buf_size);
    char * out_buf(buf);
    size_t out_size(buf_size);
    iconv_t conv_desc = iconv_open("UTF-8", "wchar_t");
    if (conv_desc == iconv_t(-1))
        throw std::runtime_error(std::string("Could not open iconv: ") + strerror(errno));
    size_t iconv_value = iconv(conv_desc, &in_buf, &in_size, &out_buf, &out_size);
    if (iconv_value == -1)
        throw std::runtime_error(std::string("When converting: ") + strerror(errno));
    int ret = iconv_close(conv_desc);
    if (ret != 0)
        throw std::runtime_error(std::string("Could not close iconv: ") + strerror(errno));
    std::string s(buf);
    delete [] buf;
    return s;
 }


int main() {
    std::wstring in(L"hello world");
    std::wcout << L"input: [" << in << L"]" << std::endl;
    std::string out(wstring_to_utf8_string(in));
    std::cerr << "output: [" << out << "]" << std::endl;
    return 0;
}
Sign up to request clarification or add additional context in comments.

1 Comment

IMHO many of the objections against wctombs don't apply to std::locale and co. Using iconv is good advice, though.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.