To give you an idea of what it is like to convert between Unicode and UTF-8, consider the following program:
#include <stdio.h>
#include <locale.h>
#include <stdlib.h>
#include <stdio.h>
int main(void){
wchar_t ucs2[5] = {0};
if( !setlocale( LC_ALL , "en_AU.UTF-8" ) ){
printf( "Unable to set locale to Australian English in UTF-8\n" );
exit( 1 );
}
// The UTF-8 representation of string "æ°´è°ƒæ*Œå¤´"
// (four Chinese characters pronounced shui3 diao4 ge1 tou2) */
char utf8[] = "\xE6\xB0\xB4\xE8\xB0\x83\xE6\xAD\x8C\xE5\xA4\xB4" ;
mbstowcs( ucs2 , utf8 , sizeof(ucs2) / sizeof(*ucs2) );
printf( "UTF-8: " );
for( char *p = utf8 ; *p ; p++ )
printf( "%02X ", (unsigned)(unsigned char)*p );
printf( "\n" );
printf( "Unicode: " );
for( wchar_t *p = ucs2 ; *p ; p++ )
printf( "U+%04lX ", (unsigned long) *p ...