25 #define VLC_CHARSET_H 1 62 VLC_USED
static inline const char *
IsUTF8(
const char *str)
67 while ((n =
vlc_towc(str, &cp)) != 0)
68 if (
likely(n != (
size_t)-1))
86 VLC_USED
static inline const char *
IsASCII(
const char *str)
90 for (
const char *
p = str; (c = *
p) !=
'\0';
p++)
120 while ((n =
vlc_towc(str, &cp)) != 0)
121 if (
likely(n != (
size_t)-1))
132 #define VLC_ICONV_ERR ((size_t) -1) 144 VLC_API char *
FromCharset( const
char *charset, const
void *data,
size_t data_size ) VLC_USED;
145 VLC_API void *
ToCharset( const
char *charset, const
char *in,
size_t *outsize ) VLC_USED;
148 # include <CoreFoundation/CoreFoundation.h> 153 VLC_USED
static inline char *FromCFString(
const CFStringRef cfString,
154 const CFStringEncoding cfStringEncoding)
157 const char *tmpBuffer = CFStringGetCStringPtr(cfString, cfStringEncoding);
159 if (tmpBuffer != NULL) {
164 CFIndex length = CFStringGetLength(cfString);
166 CFStringGetMaximumSizeForEncoding(length, cfStringEncoding);
169 if (
unlikely(maxSize == kCFNotFound)) {
176 char *buffer = (
char *)malloc(maxSize);
183 Boolean success = CFStringGetCString(cfString, buffer, maxSize, cfStringEncoding);
193 static inline char *FromWide (
const wchar_t *wide)
195 size_t len = WideCharToMultiByte (CP_UTF8, 0, wide, -1, NULL, 0, NULL, NULL);
199 char *out = (
char *)malloc (len);
202 WideCharToMultiByte (CP_UTF8, 0, wide, -1, out, len, NULL, NULL);
207 static inline wchar_t *ToWide (
const char *utf8)
209 int len = MultiByteToWideChar (CP_UTF8, 0, utf8, -1, NULL, 0);
213 wchar_t *out = (
wchar_t *)malloc (len *
sizeof (
wchar_t));
216 MultiByteToWideChar (CP_UTF8, 0, utf8, -1, out, len);
221 static inline char *ToCodePage (
unsigned cp,
const char *utf8)
223 wchar_t *wide = ToWide (utf8);
227 size_t len = WideCharToMultiByte (cp, 0, wide, -1, NULL, 0, NULL, NULL);
233 char *out = (
char *)malloc (len);
235 WideCharToMultiByte (cp, 0, wide, -1, out, len, NULL, NULL);
241 static inline char *FromCodePage (
unsigned cp,
const char *mb)
243 int len = MultiByteToWideChar (cp, 0, mb, -1, NULL, 0);
247 wchar_t *wide = (
wchar_t *)malloc (len *
sizeof (
wchar_t));
250 MultiByteToWideChar (cp, 0, mb, -1, wide, len);
252 char *utf8 = FromWide (wide);
258 static inline char *FromANSI (
const char *ansi)
260 return FromCodePage (GetACP (), ansi);
264 static inline char *ToANSI (
const char *utf8)
266 return ToCodePage (GetACP (), utf8);
269 # define FromLocale FromANSI 270 # define ToLocale ToANSI 271 # define LocaleFree(s) free((char *)(s)) 272 # define FromLocaleDup FromANSI 273 # define ToLocaleDup ToANSI 275 #elif defined(__OS2__) 277 VLC_USED
static inline char *
FromLocale (
const char *locale)
279 return locale ?
FromCharset ((
char *)
"", locale, strlen(locale)) : NULL;
282 VLC_USED
static inline char *
ToLocale (
const char *utf8)
285 return utf8 ? (
char *)
ToCharset (
"", utf8, &outsize) : NULL;
288 VLC_USED
static inline void LocaleFree (
const char *str)
293 VLC_USED
static inline char *
FromLocaleDup (
const char *locale)
298 VLC_USED
static inline char *
ToLocaleDup (
const char *utf8)
301 return (
char *)
ToCharset (
"", utf8, &outsize);
306 # define FromLocale(l) (l) 307 # define ToLocale(u) (u) 308 # define LocaleFree(s) ((void)(s)) 309 # define FromLocaleDup strdup 310 # define ToLocaleDup strdup 316 static inline char *
FromLatin1 (
const char *latin)
318 char *str = (
char *)malloc (2 * strlen (latin) + 1), *utf8 = str;
324 while ((c = *(latin++)) !=
'\0')
328 *(utf8++) = 0xC0 | (c >> 6);
329 *(utf8++) = 0x80 | (c & 0x3F);
336 utf8 = (
char *)realloc (str, utf8 - str);
337 return utf8 ? utf8 : str;
static char * FromLatin1(const char *latin)
Converts a nul-terminated string from ISO-8859-1 to UTF-8.
Definition: vlc_charset.h:317
int utf8_vfprintf(FILE *stream, const char *fmt, va_list ap)
Formats an UTF-8 string as vfprintf(), then print it, with appropriate conversion to local encoding...
Definition: unicode.c:52
vlc_iconv_t vlc_iconv_open(const char *, const char *)
char * strdup(const char *)
int utf8_fprintf(FILE *, const char *,...)
Formats an UTF-8 string as fprintf(), then print it, with appropriate conversion to local encoding...
Definition: unicode.c:102
#define ToLocale(u)
Definition: vlc_charset.h:308
static const char * IsUTF8(const char *str)
Checks UTF-8 validity.
Definition: vlc_charset.h:63
#define ToLocaleDup
Definition: vlc_charset.h:311
#define FromLocale(l)
Definition: vlc_charset.h:307
#define VLC_MALLOC
Heap allocated result function annotation.
Definition: vlc_common.h:167
#define LocaleFree(s)
Definition: vlc_charset.h:309
double us_atof(const char *)
us_atof() has the same prototype as ANSI C atof() but it expects a dot as decimal separator...
Definition: charset.c:88
char * FromCharset(const char *charset, const void *data, size_t data_size)
Converts a string from the given character encoding to utf-8.
Definition: unicode.c:237
static char * EnsureUTF8(char *str)
Removes non-UTF-8 sequences.
Definition: vlc_charset.h:115
#define unlikely(p)
Predicted false condition.
Definition: vlc_common.h:223
#define FREENULL(a)
Definition: vlc_common.h:958
void * vlc_iconv_t
Definition: vlc_charset.h:134
#define likely(p)
Predicted true condition.
Definition: vlc_common.h:214
#define FromLocaleDup
Definition: vlc_charset.h:310
char * vlc_strcasestr(const char *, const char *)
Look for an UTF-8 string within another one in a case-insensitive fashion.
Definition: unicode.c:198
float us_strtof(const char *, char **)
us_strtof() has the same prototype as ANSI C strtof() but it uses the POSIX/C decimal format...
Definition: charset.c:69
#define VLC_API
Definition: fourcc_gen.c:31
static const char * IsASCII(const char *str)
Checks ASCII validity.
Definition: vlc_charset.h:87
#define VLC_FORMAT(x, y)
String format function annotation.
Definition: vlc_common.h:141
void * ToCharset(const char *charset, const char *in, size_t *outsize)
Converts a nul-terminated UTF-8 string to a given character encoding.
Definition: unicode.c:279
int us_asprintf(char **, const char *,...)
us_asprintf() has the same prototype as asprintf(), but doesn't use the system locale.
Definition: charset.c:119
int us_vasprintf(char **, const char *, va_list)
us_vasprintf() has the same prototype as vasprintf(), but doesn't use the system locale.
Definition: charset.c:98
size_t vlc_iconv(vlc_iconv_t, const char **, size_t *, char **, size_t *)
size_t vlc_towc(const char *str, uint32_t *restrict pwc)
Decodes a code point from UTF-8.
Definition: unicode.c:113
#define VLC_USED
Definition: fourcc_gen.c:32
int vlc_iconv_close(vlc_iconv_t)
double us_strtod(const char *, char **)
us_strtod() has the same prototype as ANSI C strtod() but it uses the POSIX/C decimal format...
Definition: charset.c:50