utf8lib.c

   1 #include "quakedef.h"
   2 #include "utf8lib.h"
   3
   4 /*
   5 ================================================================================
   6 Initialization of UTF-8 support and new cvars.
   7 ================================================================================
   8 */
   9 // for compatibility this defaults to 0
  10 cvar_t    utf8_enable = {CVAR_SAVE, "utf8_enable", "0", "Enable UTF-8 support. For compatibility, this is disabled by default in most games."};
  11
  12 void   u8_Init(void)
  13 {
  14         Cvar_RegisterVariable(&utf8_enable);
  15 }
  16
  17 /*
  18 ================================================================================
  19 UTF-8 encoding and decoding functions follow.
  20 ================================================================================
  21 */
  22
  23 /** Analyze the next character and return various information if requested.
  24  * @param _s      An utf-8 string.
  25  * @param _start  Filled with the start byte-offset of the next valid character
  26  * @param _len    Fileed with the length of the next valid character
  27  * @param _ch     Filled with the unicode value of the next character
  28  * @param _maxlen Maximum number of bytes to read from _s
  29  * @return        Whether or not another valid character is in the string
  30  */
  31 #define U8_ANALYZE_INFINITY 7
  32 static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch, size_t _maxlen)
  33 {
  34         const unsigned char *s = (const unsigned char*)_s;
  35         unsigned char bt, bc;
  36         size_t i;
  37         size_t bits, j;
  38         Uchar ch;
  39
  40         i = 0;
  41 findchar:
  42
  43         // <0xC2 is always an overlong encoding, they're invalid, thus skipped
  44         while (i < _maxlen && s[i] && s[i] >= 0x80 && s[i] < 0xC2) {
  45                 //fprintf(stderr, "skipping\n");
  46                 ++i;
  47         }
  48
  49         //fprintf(stderr, "checking\n");
  50         // If we hit the end, well, we're out and invalid
  51         if(i >= _maxlen || !s[i]) {
  52                 if (_start) *_start = i;
  53                 if (_len) *_len = 0;
  54                 return false;
  55         }
  56
  57         //fprintf(stderr, "checking ascii\n");
  58         // ascii characters
  59         if (s[i] < 0x80)
  60         {
  61                 if (_start) *_start = i;
  62                 if (_len) *_len = 1;
  63                 if (_ch) *_ch = (Uchar)s[i];
  64                 //fprintf(stderr, "valid ascii\n");
  65                 return true;
  66         }
  67         //fprintf(stderr, "checking length\n");
  68
  69         // Figure out the next char's length
  70         bc = s[i];
  71         bits = 1;
  72         // count the 1 bits, they're the # of bytes
  73         for (bt = 0x40; bt && (bc & bt); bt >>= 1, ++bits);
  74         if (!bt)
  75         {
  76                 //fprintf(stderr, "superlong\n");
  77                 ++i;
  78                 goto findchar;
  79         }
  80         if(i + bits > _maxlen) {
  81                 if (_start) *_start = i;
  82                 if (_len) *_len = 0;
  83                 return false;
  84         }
  85         // turn bt into a mask and give ch a starting value
  86         --bt;
  87         ch = (s[i] & bt);
  88         // check the byte sequence for invalid bytes
  89         for (j = 1; j < bits; ++j)
  90         {
  91                 // valid bit value: 10xx xxxx
  92                 //if (s[i+j] < 0x80 || s[i+j] >= 0xC0)
  93                 if ( (s[i+j] & 0xC0) != 0x80 )
  94                 {
  95                         //fprintf(stderr, "sequence of %i f'd at %i by %x\n", bits, j, (unsigned int)s[i+j]);
  96                         // this byte sequence is invalid, skip it
  97                         i += j;
  98                         // find a character after it
  99                         goto findchar;
 100                 }
 101                 // at the same time, decode the character
 102                 ch = (ch << 6) | (s[i+j] & 0x3F);
 103         }
 104
 105         // Now check the decoded byte for an overlong encoding
 106         if ( (bits >= 2 && ch < 0x80) ||
 107              (bits >= 3 && ch < 0x800) ||
 108              (bits >= 4 && ch < 0x10000) ||
 109              ch >= 0x10FFFF // RFC 3629
 110                 )
 111         {
 112                 i += bits;
 113                 //fprintf(stderr, "overlong: %i bytes for %x\n", bits, ch);
 114                 goto findchar;
 115         }
 116
 117         if (_start)
 118                 *_start = i;
 119         if (_len)
 120                 *_len = bits;
 121         if (_ch)
 122                 *_ch = ch;
 123         //fprintf(stderr, "valid utf8\n");
 124         return true;
 125 }
 126
 127 /** Get the number of characters in an UTF-8 string.
 128  * @param _s    An utf-8 encoded null-terminated string.
 129  * @return      The number of unicode characters in the string.
 130  */
 131 size_t u8_strlen(const char *_s)
 132 {
 133         size_t st, ln;
 134         size_t len = 0;
 135         const unsigned char *s = (const unsigned char*)_s;
 136
 137         if (!utf8_enable.integer)
 138                 return strlen(_s);
 139
 140         while (*s)
 141         {
 142                 // ascii char, skip u8_analyze
 143                 if (*s < 0x80)
 144                 {
 145                         ++len;
 146                         ++s;
 147                         continue;
 148                 }
 149
 150                 // invalid, skip u8_analyze
 151                 if (*s < 0xC2)
 152                 {
 153                         ++s;
 154                         continue;
 155                 }
 156
 157                 if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 158                         break;
 159                 // valid character, skip after it
 160                 s += st + ln;
 161                 ++len;
 162         }
 163         return len;
 164 }
 165
 166 /** Get the number of characters in a part of an UTF-8 string.
 167  * @param _s    An utf-8 encoded null-terminated string.
 168  * @param n     The maximum number of bytes.
 169  * @return      The number of unicode characters in the string.
 170  */
 171 size_t u8_strnlen(const char *_s, size_t n)
 172 {
 173         size_t st, ln;
 174         size_t len = 0;
 175         const unsigned char *s = (const unsigned char*)_s;
 176
 177         if (!utf8_enable.integer)
 178         {
 179                 len = strlen(_s);
 180                 return (len < n) ? len : n;
 181         }
 182
 183         while (*s && n)
 184         {
 185                 // ascii char, skip u8_analyze
 186                 if (*s < 0x80)
 187                 {
 188                         ++len;
 189                         ++s;
 190                         --n;
 191                         continue;
 192                 }
 193
 194                 // invalid, skip u8_analyze
 195                 if (*s < 0xC2)
 196                 {
 197                         ++s;
 198                         --n;
 199                         continue;
 200                 }
 201
 202                 if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
 203                         break;
 204                 // valid character, see if it's still inside the range specified by n:
 205                 if (n < st + ln)
 206                         return len;
 207                 ++len;
 208                 n -= st + ln;
 209                 s += st + ln;
 210         }
 211         return len;
 212 }
 213
 214 /** Get the number of bytes used in a string to represent an amount of characters.
 215  * @param _s    An utf-8 encoded null-terminated string.
 216  * @param n     The number of characters we want to know the byte-size for.
 217  * @return      The number of bytes used to represent n characters.
 218  */
 219 size_t u8_bytelen(const char *_s, size_t n)
 220 {
 221         size_t st, ln;
 222         size_t len = 0;
 223         const unsigned char *s = (const unsigned char*)_s;
 224
 225         if (!utf8_enable.integer)
 226                 return n;
 227
 228         while (*s && n)
 229         {
 230                 // ascii char, skip u8_analyze
 231                 if (*s < 0x80)
 232                 {
 233                         ++len;
 234                         ++s;
 235                         --n;
 236                         continue;
 237                 }
 238
 239                 // invalid, skip u8_analyze
 240                 if (*s < 0xC2)
 241                 {
 242                         ++s;
 243                         ++len;
 244                         continue;
 245                 }
 246
 247                 if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 248                         break;
 249                 --n;
 250                 s += st + ln;
 251                 len += st + ln;
 252         }
 253         return len;
 254 }
 255
 256 /** Get the byte-index for a character-index.
 257  * @param _s      An utf-8 encoded string.
 258  * @param i       The character-index for which you want the byte offset.
 259  * @param len     If not null, character's length will be stored in there.
 260  * @return        The byte-index at which the character begins, or -1 if the string is too short.
 261  */
 262 int u8_byteofs(const char *_s, size_t i, size_t *len)
 263 {
 264         size_t st, ln;
 265         size_t ofs = 0;
 266         const unsigned char *s = (const unsigned char*)_s;
 267
 268         if (!utf8_enable.integer)
 269         {
 270                 if (len) *len = 1;
 271                 return i;
 272         }
 273
 274         st = ln = 0;
 275         do
 276         {
 277                 ofs += ln;
 278                 if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 279                         return -1;
 280                 ofs += st;
 281         } while(i-- > 0);
 282         if (len)
 283                 *len = ln;
 284         return ofs;
 285 }
 286
 287 /** Get the char-index for a byte-index.
 288  * @param _s      An utf-8 encoded string.
 289  * @param i       The byte offset for which you want the character index.
 290  * @param len     If not null, the offset within the character is stored here.
 291  * @return        The character-index, or -1 if the string is too short.
 292  */
 293 int u8_charidx(const char *_s, size_t i, size_t *len)
 294 {
 295         size_t st, ln;
 296         size_t ofs = 0;
 297         size_t pofs = 0;
 298         int idx = 0;
 299         const unsigned char *s = (const unsigned char*)_s;
 300
 301         if (!utf8_enable.integer)
 302         {
 303                 if (len) *len = 0;
 304                 return i;
 305         }
 306
 307         while (ofs < i && s[ofs])
 308         {
 309                 // ascii character, skip u8_analyze
 310                 if (s[ofs] < 0x80)
 311                 {
 312                         pofs = ofs;
 313                         ++idx;
 314                         ++ofs;
 315                         continue;
 316                 }
 317
 318                 // invalid, skip u8_analyze
 319                 if (s[ofs] < 0xC2)
 320                 {
 321                         ++ofs;
 322                         continue;
 323                 }
 324
 325                 if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 326                         return -1;
 327                 // see if next char is after the bytemark
 328                 if (ofs + st > i)
 329                 {
 330                         if (len)
 331                                 *len = i - pofs;
 332                         return idx;
 333                 }
 334                 ++idx;
 335                 pofs = ofs + st;
 336                 ofs += st + ln;
 337                 // see if bytemark is within the char
 338                 if (ofs > i)
 339                 {
 340                         if (len)
 341                                 *len = i - pofs;
 342                         return idx;
 343                 }
 344         }
 345         if (len) *len = 0;
 346         return idx;
 347 }
 348
 349 /** Get the byte offset of the previous byte.
 350  * The result equals:
 351  * prevchar_pos = u8_byteofs(text, u8_charidx(text, thischar_pos, NULL) - 1, NULL)
 352  * @param _s      An utf-8 encoded string.
 353  * @param i       The current byte offset.
 354  * @return        The byte offset of the previous character
 355  */
 356 size_t u8_prevbyte(const char *_s, size_t i)
 357 {
 358         size_t st, ln;
 359         const unsigned char *s = (const unsigned char*)_s;
 360         size_t lastofs = 0;
 361         size_t ofs = 0;
 362
 363         if (!utf8_enable.integer)
 364         {
 365                 if (i > 0)
 366                         return i-1;
 367                 return 0;
 368         }
 369
 370         while (ofs < i && s[ofs])
 371         {
 372                 // ascii character, skip u8_analyze
 373                 if (s[ofs] < 0x80)
 374                 {
 375                         lastofs = ofs++;
 376                         continue;
 377                 }
 378
 379                 // invalid, skip u8_analyze
 380                 if (s[ofs] < 0xC2)
 381                 {
 382                         ++ofs;
 383                         continue;
 384                 }
 385
 386                 if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY))
 387                         return lastofs;
 388                 if (ofs + st > i)
 389                         return lastofs;
 390                 if (ofs + st + ln >= i)
 391                         return ofs + st;
 392
 393                 lastofs = ofs;
 394                 ofs += st + ln;
 395         }
 396         return lastofs;
 397 }
 398
 399 static int char_usefont[256] = {
 400         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 401         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 402         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // shift+digit line
 403         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // digits
 404         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // caps
 405         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // caps
 406         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // small
 407         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // small
 408         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // specials
 409         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // faces
 410         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 411         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 412         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 413         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 414         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 415         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 416 };
 417
 418
 419 /** Fetch a character from an utf-8 encoded string.
 420  * @param _s      The start of an utf-8 encoded multi-byte character.
 421  * @param _end    Will point to after the first multi-byte character.
 422  * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
 423  */
 424 Uchar u8_getchar(const char *_s, const char **_end)
 425 {
 426         size_t st, ln;
 427         Uchar ch;
 428
 429         if (!utf8_enable.integer)
 430         {
 431                 if (_end)
 432                         *_end = _s + 1;
 433                 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
 434                  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
 435                  * rest:
 436                  */
 437                 if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
 438                         return 0xE000 + (Uchar)*(const unsigned char*)_s;
 439                 return (Uchar)*(const unsigned char*)_s;
 440         }
 441
 442         if (!u8_analyze(_s, &st, &ln, &ch, U8_ANALYZE_INFINITY))
 443                 ch = 0;
 444         if (_end)
 445                 *_end = _s + st + ln;
 446         return ch;
 447 }
 448
 449 /** Fetch a character from an utf-8 encoded string.
 450  * @param _s      The start of an utf-8 encoded multi-byte character.
 451  * @param _end    Will point to after the first multi-byte character.
 452  * @return        The 32-bit integer representation of the first multi-byte character or 0 for invalid characters.
 453  */
 454 Uchar u8_getnchar(const char *_s, const char **_end, size_t _maxlen)
 455 {
 456         size_t st, ln;
 457         Uchar ch;
 458
 459         if (!utf8_enable.integer)
 460         {
 461                 if (_end)
 462                         *_end = _s + 1;
 463                 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
 464                  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
 465                  * rest:
 466                  */
 467                 if (!char_usefont[(unsigned int)*(const unsigned char*)_s])
 468                         return 0xE000 + (Uchar)*(const unsigned char*)_s;
 469                 return (Uchar)*(const unsigned char*)_s;
 470         }
 471
 472         if (!u8_analyze(_s, &st, &ln, &ch, _maxlen))
 473                 ch = 0;
 474         if (_end)
 475                 *_end = _s + st + ln;
 476         return ch;
 477 }
 478
 479 /** Encode a wide-character into utf-8.
 480  * @param w        The wide character to encode.
 481  * @param to       The target buffer the utf-8 encoded string is stored to.
 482  * @param maxlen   The maximum number of bytes that fit into the target buffer.
 483  * @return         Number of bytes written to the buffer not including the terminating null.
 484  *                 Less or equal to 0 if the buffer is too small.
 485  */
 486 int u8_fromchar(Uchar w, char *to, size_t maxlen)
 487 {
 488         if (maxlen < 1)
 489                 return 0;
 490
 491         if (!w)
 492                 return 0;
 493
 494         if (w >= 0xE000 && !utf8_enable.integer)
 495                 w -= 0xE000;
 496
 497         if (w < 0x80 || !utf8_enable.integer)
 498         {
 499                 to[0] = (char)w;
 500                 if (maxlen < 2)
 501                         return -1;
 502                 to[1] = 0;
 503                 return 1;
 504         }
 505         // for a little speedup
 506         if (w < 0x800)
 507         {
 508                 if (maxlen < 3)
 509                 {
 510                         to[0] = 0;
 511                         return -1;
 512                 }
 513                 to[2] = 0;
 514                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 515                 to[0] = 0xC0 | w;
 516                 return 2;
 517         }
 518         if (w < 0x10000)
 519         {
 520                 if (maxlen < 4)
 521                 {
 522                         to[0] = 0;
 523                         return -1;
 524                 }
 525                 to[3] = 0;
 526                 to[2] = 0x80 | (w & 0x3F); w >>= 6;
 527                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 528                 to[0] = 0xE0 | w;
 529                 return 3;
 530         }
 531
 532         // RFC 3629
 533         if (w <= 0x10FFFF)
 534         {
 535                 if (maxlen < 5)
 536                 {
 537                         to[0] = 0;
 538                         return -1;
 539                 }
 540                 to[4] = 0;
 541                 to[3] = 0x80 | (w & 0x3F); w >>= 6;
 542                 to[2] = 0x80 | (w & 0x3F); w >>= 6;
 543                 to[1] = 0x80 | (w & 0x3F); w >>= 6;
 544                 to[0] = 0xE0 | w;
 545                 return 4;
 546         }
 547         return 0;
 548 }
 549
 550 /** uses u8_fromchar on a static buffer
 551  * @param ch        The unicode character to convert to encode
 552  * @param l         The number of bytes without the terminating null.
 553  * @return          A statically allocated buffer containing the character's utf8 representation, or NULL if it fails.
 554  */
 555 char *u8_encodech(Uchar ch, size_t *l)
 556 {
 557         static char buf[16];
 558         size_t len;
 559         len = u8_fromchar(ch, buf, sizeof(buf));
 560         if (len > 0)
 561         {
 562                 if (l) *l = len;
 563                 return buf;
 564         }
 565         return NULL;
 566 }
 567
 568 /** Convert a utf-8 multibyte string to a wide character string.
 569  * @param wcs       The target wide-character buffer.
 570  * @param mb        The utf-8 encoded multibyte string to convert.
 571  * @param maxlen    The maximum number of wide-characters that fit into the target buffer.
 572  * @return          The number of characters written to the target buffer.
 573  */
 574 size_t u8_mbstowcs(Uchar *wcs, const char *mb, size_t maxlen)
 575 {
 576         size_t i;
 577         Uchar ch;
 578         if (maxlen < 1)
 579                 return 0;
 580         for (i = 0; *mb && i < maxlen-1; ++i)
 581         {
 582                 ch = u8_getchar(mb, &mb);
 583                 if (!ch)
 584                         break;
 585                 wcs[i] = ch;
 586         }
 587         wcs[i] = 0;
 588         return i;
 589 }
 590
 591 /** Convert a wide-character string to a utf-8 multibyte string.
 592  * @param mb      The target buffer the utf-8 string is written to.
 593  * @param wcs     The wide-character string to convert.
 594  * @param maxlen  The number bytes that fit into the multibyte target buffer.
 595  * @return        The number of bytes written, not including the terminating \0
 596  */
 597 size_t u8_wcstombs(char *mb, const Uchar *wcs, size_t maxlen)
 598 {
 599         size_t i;
 600         const char *start = mb;
 601         if (maxlen < 2)
 602                 return 0;
 603         for (i = 0; wcs[i] && i < maxlen-1; ++i)
 604         {
 605                 /*
 606                 int len;
 607                 if ( (len = u8_fromchar(wcs[i], mb, maxlen - i)) < 0)
 608                         return (mb - start);
 609                 mb += len;
 610                 */
 611                 mb += u8_fromchar(wcs[i], mb, maxlen - i);
 612         }
 613         *mb = 0;
 614         return (mb - start);
 615 }
 616
 617 /*
 618 ============
 619 UTF-8 aware COM_StringLengthNoColors
 620
 621 calculates the visible width of a color coded string.
 622
 623 *valid is filled with TRUE if the string is a valid colored string (that is, if
 624 it does not end with an unfinished color code). If it gets filled with FALSE, a
 625 fix would be adding a STRING_COLOR_TAG at the end of the string.
 626
 627 valid can be set to NULL if the caller doesn't care.
 628
 629 For size_s, specify the maximum number of characters from s to use, or 0 to use
 630 all characters until the zero terminator.
 631 ============
 632 */
 633 size_t
 634 COM_StringLengthNoColors(const char *s, size_t size_s, qboolean *valid);
 635 size_t
 636 u8_COM_StringLengthNoColors(const char *_s, size_t size_s, qboolean *valid)
 637 {
 638         const unsigned char *s = (const unsigned char*)_s;
 639         const unsigned char *end;
 640         size_t len = 0;
 641
 642         if (!utf8_enable.integer)
 643                 return COM_StringLengthNoColors(_s, size_s, valid);
 644
 645         end = size_s ? (s + size_s) : NULL;
 646
 647         for(;;)
 648         {
 649                 switch((s == end) ? 0 : *s)
 650                 {
 651                         case 0:
 652                                 if(valid)
 653                                         *valid = TRUE;
 654                                 return len;
 655                         case STRING_COLOR_TAG:
 656                                 ++s;
 657                                 switch((s == end) ? 0 : *s)
 658                                 {
 659                                         case STRING_COLOR_RGB_TAG_CHAR:
 660                                                 if (s+1 != end && isxdigit(s[1]) &&
 661                                                         s+2 != end && isxdigit(s[2]) &&
 662                                                         s+3 != end && isxdigit(s[3]) )
 663                                                 {
 664                                                         s+=3;
 665                                                         break;
 666                                                 }
 667                                                 ++len; // STRING_COLOR_TAG
 668                                                 ++len; // STRING_COLOR_RGB_TAG_CHAR
 669                                                 break;
 670                                         case 0: // ends with unfinished color code!
 671                                                 ++len;
 672                                                 if(valid)
 673                                                         *valid = FALSE;
 674                                                 return len;
 675                                         case STRING_COLOR_TAG: // escaped ^
 676                                                 ++len;
 677                                                 break;
 678                                         case '0': case '1': case '2': case '3': case '4':
 679                                         case '5': case '6': case '7': case '8': case '9': // color code
 680                                                 break;
 681                                         default: // not a color code
 682                                                 ++len; // STRING_COLOR_TAG
 683                                                 ++len; // the character
 684                                                 break;
 685                                 }
 686                                 break;
 687                         default:
 688                                 ++len;
 689                                 break;
 690                 }
 691
 692                 // start of a wide character
 693                 if (*s & 0xC0)
 694                 {
 695                         for (++s; *s >= 0x80 && *s <= 0xC0; ++s);
 696                         continue;
 697                 }
 698                 // part of a wide character, we ignore that one
 699                 if (*s <= 0xBF)
 700                         --len;
 701                 ++s;
 702         }
 703         // never get here
 704 }
 705
 706 /** Pads a utf-8 string
 707  * @param out     The target buffer the utf-8 string is written to.
 708  * @param outsize The size of the target buffer, including the final NUL
 709  * @param in      The input utf-8 buffer
 710  * @param leftalign Left align the output string (by default right alignment is done)
 711  * @param minwidth The minimum output width
 712  * @param maxwidth The maximum output width
 713  * @return        The number of bytes written, not including the terminating \0
 714  */
 715 size_t u8_strpad(char *out, size_t outsize, const char *in, qboolean leftalign, size_t minwidth, size_t maxwidth)
 716 {
 717         if(!utf8_enable.integer)
 718         {
 719                 return dpsnprintf(out, outsize, "%*.*s", leftalign ? -(int) minwidth : (int) minwidth, (int) maxwidth, in);
 720         }
 721         else
 722         {
 723                 size_t l = u8_bytelen(in, maxwidth);
 724                 size_t actual_width = u8_strnlen(in, l);
 725                 int pad = (actual_width >= minwidth) ? 0 : (minwidth - actual_width);
 726                 int prec = l;
 727                 int lpad = leftalign ? 0 : pad;
 728                 int rpad = leftalign ? pad : 0;
 729                 return dpsnprintf(out, outsize, "%*s%.*s%*s", lpad, "", prec, in, rpad, "");
 730         }
 731 }