00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00036 #include <str.h>
00037 #include <stdlib.h>
00038 #include <assert.h>
00039 #include <stdint.h>
00040 #include <ctype.h>
00041 #include <malloc.h>
00042 #include <errno.h>
00043 #include <align.h>
00044 #include <mem.h>
00045 #include <str.h>
00046
00048 #define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
00049
00051 #define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
00052
00054 #define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
00055
00057 #define CONT_BITS 6
00058
00074 wchar_t str_decode(const char *str, size_t *offset, size_t size)
00075 {
00076 if (*offset + 1 > size)
00077 return 0;
00078
00079
00080 uint8_t b0 = (uint8_t) str[(*offset)++];
00081
00082
00083
00084 unsigned int b0_bits;
00085 unsigned int cbytes;
00086
00087 if ((b0 & 0x80) == 0) {
00088
00089 b0_bits = 7;
00090 cbytes = 0;
00091 } else if ((b0 & 0xe0) == 0xc0) {
00092
00093 b0_bits = 5;
00094 cbytes = 1;
00095 } else if ((b0 & 0xf0) == 0xe0) {
00096
00097 b0_bits = 4;
00098 cbytes = 2;
00099 } else if ((b0 & 0xf8) == 0xf0) {
00100
00101 b0_bits = 3;
00102 cbytes = 3;
00103 } else {
00104
00105 return U_SPECIAL;
00106 }
00107
00108 if (*offset + cbytes > size)
00109 return U_SPECIAL;
00110
00111 wchar_t ch = b0 & LO_MASK_8(b0_bits);
00112
00113
00114 while (cbytes > 0) {
00115 uint8_t b = (uint8_t) str[(*offset)++];
00116
00117
00118 if ((b & 0xc0) != 0x80)
00119 return U_SPECIAL;
00120
00121
00122 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
00123 cbytes--;
00124 }
00125
00126 return ch;
00127 }
00128
00144 int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
00145 {
00146 if (*offset >= size)
00147 return EOVERFLOW;
00148
00149 if (!chr_check(ch))
00150 return EINVAL;
00151
00152
00153
00154 uint32_t cc = (uint32_t) ch;
00155
00156
00157
00158 unsigned int b0_bits;
00159 unsigned int cbytes;
00160
00161 if ((cc & ~LO_MASK_32(7)) == 0) {
00162 b0_bits = 7;
00163 cbytes = 0;
00164 } else if ((cc & ~LO_MASK_32(11)) == 0) {
00165 b0_bits = 5;
00166 cbytes = 1;
00167 } else if ((cc & ~LO_MASK_32(16)) == 0) {
00168 b0_bits = 4;
00169 cbytes = 2;
00170 } else if ((cc & ~LO_MASK_32(21)) == 0) {
00171 b0_bits = 3;
00172 cbytes = 3;
00173 } else {
00174
00175 return EINVAL;
00176 }
00177
00178
00179 if (*offset + cbytes >= size)
00180 return EOVERFLOW;
00181
00182
00183 unsigned int i;
00184 for (i = cbytes; i > 0; i--) {
00185 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
00186 cc = cc >> CONT_BITS;
00187 }
00188
00189
00190 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
00191
00192
00193 *offset += cbytes + 1;
00194
00195 return EOK;
00196 }
00197
00208 size_t str_size(const char *str)
00209 {
00210 size_t size = 0;
00211
00212 while (*str++ != 0)
00213 size++;
00214
00215 return size;
00216 }
00217
00228 size_t wstr_size(const wchar_t *str)
00229 {
00230 return (wstr_length(str) * sizeof(wchar_t));
00231 }
00232
00246 size_t str_lsize(const char *str, size_t max_len)
00247 {
00248 size_t len = 0;
00249 size_t offset = 0;
00250
00251 while (len < max_len) {
00252 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
00253 break;
00254
00255 len++;
00256 }
00257
00258 return offset;
00259 }
00260
00274 size_t wstr_lsize(const wchar_t *str, size_t max_len)
00275 {
00276 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
00277 }
00278
00286 size_t str_length(const char *str)
00287 {
00288 size_t len = 0;
00289 size_t offset = 0;
00290
00291 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
00292 len++;
00293
00294 return len;
00295 }
00296
00304 size_t wstr_length(const wchar_t *wstr)
00305 {
00306 size_t len = 0;
00307
00308 while (*wstr++ != 0)
00309 len++;
00310
00311 return len;
00312 }
00313
00322 size_t str_nlength(const char *str, size_t size)
00323 {
00324 size_t len = 0;
00325 size_t offset = 0;
00326
00327 while (str_decode(str, &offset, size) != 0)
00328 len++;
00329
00330 return len;
00331 }
00332
00341 size_t wstr_nlength(const wchar_t *str, size_t size)
00342 {
00343 size_t len = 0;
00344 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
00345 size_t offset = 0;
00346
00347 while ((offset < limit) && (*str++ != 0)) {
00348 len++;
00349 offset += sizeof(wchar_t);
00350 }
00351
00352 return len;
00353 }
00354
00360 bool ascii_check(wchar_t ch)
00361 {
00362 if ((ch >= 0) && (ch <= 127))
00363 return true;
00364
00365 return false;
00366 }
00367
00373 bool chr_check(wchar_t ch)
00374 {
00375 if ((ch >= 0) && (ch <= 1114111))
00376 return true;
00377
00378 return false;
00379 }
00380
00394 int str_cmp(const char *s1, const char *s2)
00395 {
00396 wchar_t c1 = 0;
00397 wchar_t c2 = 0;
00398
00399 size_t off1 = 0;
00400 size_t off2 = 0;
00401
00402 while (true) {
00403 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
00404 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
00405
00406 if (c1 < c2)
00407 return -1;
00408
00409 if (c1 > c2)
00410 return 1;
00411
00412 if (c1 == 0 || c2 == 0)
00413 break;
00414 }
00415
00416 return 0;
00417 }
00418
00433 int str_lcmp(const char *s1, const char *s2, size_t max_len)
00434 {
00435 wchar_t c1 = 0;
00436 wchar_t c2 = 0;
00437
00438 size_t off1 = 0;
00439 size_t off2 = 0;
00440
00441 size_t len = 0;
00442
00443 while (true) {
00444 if (len >= max_len)
00445 break;
00446
00447 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
00448 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
00449
00450 if (c1 < c2)
00451 return -1;
00452
00453 if (c1 > c2)
00454 return 1;
00455
00456 if (c1 == 0 || c2 == 0)
00457 break;
00458
00459 ++len;
00460 }
00461
00462 return 0;
00463
00464 }
00465
00477 void str_cpy(char *dest, size_t size, const char *src)
00478 {
00479
00480 assert(size > 0);
00481
00482 size_t src_off = 0;
00483 size_t dest_off = 0;
00484
00485 wchar_t ch;
00486 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
00487 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
00488 break;
00489 }
00490
00491 dest[dest_off] = '\0';
00492 }
00493
00509 void str_ncpy(char *dest, size_t size, const char *src, size_t n)
00510 {
00511
00512 assert(size > 0);
00513
00514 size_t src_off = 0;
00515 size_t dest_off = 0;
00516
00517 wchar_t ch;
00518 while ((ch = str_decode(src, &src_off, n)) != 0) {
00519 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
00520 break;
00521 }
00522
00523 dest[dest_off] = '\0';
00524 }
00525
00537 void str_append(char *dest, size_t size, const char *src)
00538 {
00539 size_t dstr_size;
00540
00541 dstr_size = str_size(dest);
00542 str_cpy(dest + dstr_size, size - dstr_size, src);
00543 }
00544
00555 void wstr_to_str(char *dest, size_t size, const wchar_t *src)
00556 {
00557 wchar_t ch;
00558 size_t src_idx;
00559 size_t dest_off;
00560
00561
00562 assert(size > 0);
00563
00564 src_idx = 0;
00565 dest_off = 0;
00566
00567 while ((ch = src[src_idx++]) != 0) {
00568 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
00569 break;
00570 }
00571
00572 dest[dest_off] = '\0';
00573 }
00574
00583 char *wstr_to_astr(const wchar_t *src)
00584 {
00585 char dbuf[STR_BOUNDS(1)];
00586 char *str;
00587 wchar_t ch;
00588
00589 size_t src_idx;
00590 size_t dest_off;
00591 size_t dest_size;
00592
00593
00594
00595 src_idx = 0;
00596 dest_size = 0;
00597
00598 while ((ch = src[src_idx++]) != 0) {
00599 dest_off = 0;
00600 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
00601 break;
00602 dest_size += dest_off;
00603 }
00604
00605 str = malloc(dest_size + 1);
00606 if (str == NULL)
00607 return NULL;
00608
00609
00610
00611 src_idx = 0;
00612 dest_off = 0;
00613
00614 while ((ch = src[src_idx++]) != 0) {
00615 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
00616 break;
00617 }
00618
00619 str[dest_size] = '\0';
00620 return str;
00621 }
00622
00623
00634 void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
00635 {
00636 size_t offset;
00637 size_t di;
00638 wchar_t c;
00639
00640 assert(dlen > 0);
00641
00642 offset = 0;
00643 di = 0;
00644
00645 do {
00646 if (di >= dlen - 1)
00647 break;
00648
00649 c = str_decode(src, &offset, STR_NO_LIMIT);
00650 dest[di++] = c;
00651 } while (c != '\0');
00652
00653 dest[dlen - 1] = '\0';
00654 }
00655
00663 char *str_chr(const char *str, wchar_t ch)
00664 {
00665 wchar_t acc;
00666 size_t off = 0;
00667 size_t last = 0;
00668
00669 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
00670 if (acc == ch)
00671 return (char *) (str + last);
00672 last = off;
00673 }
00674
00675 return NULL;
00676 }
00677
00685 char *str_rchr(const char *str, wchar_t ch)
00686 {
00687 wchar_t acc;
00688 size_t off = 0;
00689 size_t last = 0;
00690 const char *res = NULL;
00691
00692 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
00693 if (acc == ch)
00694 res = (str + last);
00695 last = off;
00696 }
00697
00698 return (char *) res;
00699 }
00700
00715 bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
00716 {
00717 size_t len = wstr_length(str);
00718
00719 if ((pos > len) || (pos + 1 > max_pos))
00720 return false;
00721
00722 size_t i;
00723 for (i = len; i + 1 > pos; i--)
00724 str[i + 1] = str[i];
00725
00726 str[pos] = ch;
00727
00728 return true;
00729 }
00730
00743 bool wstr_remove(wchar_t *str, size_t pos)
00744 {
00745 size_t len = wstr_length(str);
00746
00747 if (pos >= len)
00748 return false;
00749
00750 size_t i;
00751 for (i = pos + 1; i <= len; i++)
00752 str[i - 1] = str[i];
00753
00754 return true;
00755 }
00756
00757 int stricmp(const char *a, const char *b)
00758 {
00759 int c = 0;
00760
00761 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
00762 c++;
00763
00764 return (tolower(a[c]) - tolower(b[c]));
00765 }
00766
00777 static unsigned long
00778 _strtoul(const char *nptr, char **endptr, int base, char *sgn)
00779 {
00780 unsigned char c;
00781 unsigned long result = 0;
00782 unsigned long a, b;
00783 const char *str = nptr;
00784 const char *tmpptr;
00785
00786 while (isspace(*str))
00787 str++;
00788
00789 if (*str == '-') {
00790 *sgn = 1;
00791 ++str;
00792 } else if (*str == '+')
00793 ++str;
00794
00795 if (base) {
00796 if ((base == 1) || (base > 36)) {
00797
00798 return 0;
00799 }
00800 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
00801 (str[1] == 'X'))) {
00802 str += 2;
00803 }
00804 } else {
00805 base = 10;
00806
00807 if (*str == '0') {
00808 base = 8;
00809 if ((str[1] == 'X') || (str[1] == 'x')) {
00810 base = 16;
00811 str += 2;
00812 }
00813 }
00814 }
00815
00816 tmpptr = str;
00817
00818 while (*str) {
00819 c = *str;
00820 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
00821 (c <= '9' ? c - '0' : 0xff)));
00822 if (c > base) {
00823 break;
00824 }
00825
00826 a = (result & 0xff) * base + c;
00827 b = (result >> 8) * base + (a >> 8);
00828
00829 if (b > (ULONG_MAX >> 8)) {
00830
00831
00832 return ULONG_MAX;
00833 }
00834
00835 result = (b << 8) + (a & 0xff);
00836 ++str;
00837 }
00838
00839 if (str == tmpptr) {
00840
00841
00842
00843
00844
00845 str = nptr;
00846 result = 0;
00847 }
00848
00849 if (endptr)
00850 *endptr = (char *) str;
00851
00852 if (nptr == str) {
00853
00854 return 0;
00855 }
00856
00857 return result;
00858 }
00859
00873 long int strtol(const char *nptr, char **endptr, int base)
00874 {
00875 char sgn = 0;
00876 unsigned long number = 0;
00877
00878 number = _strtoul(nptr, endptr, base, &sgn);
00879
00880 if (number > LONG_MAX) {
00881 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
00882
00883 return number;
00884 }
00885
00886 return (sgn ? LONG_MIN : LONG_MAX);
00887 }
00888
00889 return (sgn ? -number : number);
00890 }
00891
00908 char *str_dup(const char *src)
00909 {
00910 size_t size = str_size(src) + 1;
00911 char *dest = (char *) malloc(size);
00912 if (dest == NULL)
00913 return (char *) NULL;
00914
00915 str_cpy(dest, size, src);
00916 return dest;
00917 }
00918
00939 char *str_ndup(const char *src, size_t n)
00940 {
00941 size_t size = str_size(src);
00942 if (size > n)
00943 size = n;
00944
00945 char *dest = (char *) malloc(size + 1);
00946 if (dest == NULL)
00947 return (char *) NULL;
00948
00949 str_ncpy(dest, size + 1, src, size);
00950 return dest;
00951 }
00952
00953
00967 unsigned long strtoul(const char *nptr, char **endptr, int base)
00968 {
00969 char sgn = 0;
00970 unsigned long number = 0;
00971
00972 number = _strtoul(nptr, endptr, base, &sgn);
00973
00974 return (sgn ? -number : number);
00975 }
00976
00977 char *strtok(char *s, const char *delim)
00978 {
00979 static char *next;
00980
00981 return strtok_r(s, delim, &next);
00982 }
00983
00984 char *strtok_r(char *s, const char *delim, char **next)
00985 {
00986 char *start, *end;
00987
00988 if (s == NULL)
00989 s = *next;
00990
00991
00992 while (*s && (str_chr(delim, *s) != NULL)) ++s;
00993 start = s;
00994
00995
00996 while (*s && (str_chr(delim, *s) == NULL)) ++s;
00997 end = s;
00998 *next = (*s ? s + 1 : s);
00999
01000 if (start == end) {
01001 return NULL;
01002 }
01003
01004
01005 *end = '\0';
01006 return start;
01007 }
01008
01020 static int str_uint(const char *nptr, char **endptr, unsigned int base,
01021 bool *neg, uint64_t *result)
01022 {
01023 assert(endptr != NULL);
01024 assert(neg != NULL);
01025 assert(result != NULL);
01026
01027 *neg = false;
01028 const char *str = nptr;
01029
01030
01031 while (isspace(*str))
01032 str++;
01033
01034 if (*str == '-') {
01035 *neg = true;
01036 str++;
01037 } else if (*str == '+')
01038 str++;
01039
01040 if (base == 0) {
01041
01042 base = 10;
01043
01044 if (*str == '0') {
01045 base = 8;
01046 str++;
01047
01048 switch (*str) {
01049 case 'b':
01050 case 'B':
01051 base = 2;
01052 str++;
01053 break;
01054 case 'o':
01055 case 'O':
01056 base = 8;
01057 str++;
01058 break;
01059 case 'd':
01060 case 'D':
01061 case 't':
01062 case 'T':
01063 base = 10;
01064 str++;
01065 break;
01066 case 'x':
01067 case 'X':
01068 base = 16;
01069 str++;
01070 break;
01071 default:
01072 str--;
01073 }
01074 }
01075 } else {
01076
01077 if ((base < 2) || (base > 36)) {
01078 *endptr = (char *) str;
01079 return EINVAL;
01080 }
01081 }
01082
01083 *result = 0;
01084 const char *startstr = str;
01085
01086 while (*str != 0) {
01087 unsigned int digit;
01088
01089 if ((*str >= 'a') && (*str <= 'z'))
01090 digit = *str - 'a' + 10;
01091 else if ((*str >= 'A') && (*str <= 'Z'))
01092 digit = *str - 'A' + 10;
01093 else if ((*str >= '0') && (*str <= '9'))
01094 digit = *str - '0';
01095 else
01096 break;
01097
01098 if (digit >= base)
01099 break;
01100
01101 uint64_t prev = *result;
01102 *result = (*result) * base + digit;
01103
01104 if (*result < prev) {
01105
01106 *endptr = (char *) str;
01107 return EOVERFLOW;
01108 }
01109
01110 str++;
01111 }
01112
01113 if (str == startstr) {
01114
01115
01116
01117
01118 str = nptr;
01119 }
01120
01121 *endptr = (char *) str;
01122
01123 if (str == nptr)
01124 return EINVAL;
01125
01126 return EOK;
01127 }
01128
01141 int str_uint64(const char *nptr, char **endptr, unsigned int base,
01142 bool strict, uint64_t *result)
01143 {
01144 assert(result != NULL);
01145
01146 bool neg;
01147 char *lendptr;
01148 int ret = str_uint(nptr, &lendptr, base, &neg, result);
01149
01150 if (endptr != NULL)
01151 *endptr = (char *) lendptr;
01152
01153 if (ret != EOK)
01154 return ret;
01155
01156
01157 if (neg)
01158 return EINVAL;
01159
01160
01161
01162 if ((strict) && (*lendptr != 0))
01163 return EINVAL;
01164
01165 return EOK;
01166 }
01167
01180 int str_size_t(const char *nptr, char **endptr, unsigned int base,
01181 bool strict, size_t *result)
01182 {
01183 assert(result != NULL);
01184
01185 bool neg;
01186 char *lendptr;
01187 uint64_t res;
01188 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
01189
01190 if (endptr != NULL)
01191 *endptr = (char *) lendptr;
01192
01193 if (ret != EOK)
01194 return ret;
01195
01196
01197 if (neg)
01198 return EINVAL;
01199
01200
01201
01202 if ((strict) && (*lendptr != 0))
01203 return EINVAL;
01204
01205
01206 size_t _res = (size_t) res;
01207 if (_res != res)
01208 return EOVERFLOW;
01209
01210 *result = _res;
01211
01212 return EOK;
01213 }
01214
01215 void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
01216 {
01217 if (val > UINT64_C(10000000000000000000)) {
01218 *rv = val / UINT64_C(1000000000000000000);
01219 *suffix = 'Z';
01220 } else if (val > UINT64_C(1000000000000000000)) {
01221 *rv = val / UINT64_C(1000000000000000);
01222 *suffix = 'E';
01223 } else if (val > UINT64_C(1000000000000000)) {
01224 *rv = val / UINT64_C(1000000000000);
01225 *suffix = 'T';
01226 } else if (val > UINT64_C(1000000000000)) {
01227 *rv = val / UINT64_C(1000000000);
01228 *suffix = 'G';
01229 } else if (val > UINT64_C(1000000000)) {
01230 *rv = val / UINT64_C(1000000);
01231 *suffix = 'M';
01232 } else if (val > UINT64_C(1000000)) {
01233 *rv = val / UINT64_C(1000);
01234 *suffix = 'k';
01235 } else {
01236 *rv = val;
01237 *suffix = ' ';
01238 }
01239 }
01240
01241 void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
01242 bool fixed)
01243 {
01244 if (val > UINT64_C(1152921504606846976)) {
01245 *rv = val / UINT64_C(1125899906842624);
01246 *suffix = "EiB";
01247 } else if (val > UINT64_C(1125899906842624)) {
01248 *rv = val / UINT64_C(1099511627776);
01249 *suffix = "TiB";
01250 } else if (val > UINT64_C(1099511627776)) {
01251 *rv = val / UINT64_C(1073741824);
01252 *suffix = "GiB";
01253 } else if (val > UINT64_C(1073741824)) {
01254 *rv = val / UINT64_C(1048576);
01255 *suffix = "MiB";
01256 } else if (val > UINT64_C(1048576)) {
01257 *rv = val / UINT64_C(1024);
01258 *suffix = "KiB";
01259 } else {
01260 *rv = val;
01261 if (fixed)
01262 *suffix = "B ";
01263 else
01264 *suffix = "B";
01265 }
01266 }
01267