This patch address two problems. 1st one is relatively minor: according our own manpage, upper and lower classes must be sorted, but currently not. 2nd one is serious: tr '[:lower:]' '[:upper:]' (and vice versa) currently works only if upper and lower classes have exact the same number of elements. When it is not true, like for many ISO8859-x locales which have bigger amount of lowercase letters, tr may do nasty things. The patch is complex, because whole conversion string need to be processed each time l-u or u->l conversion occurse, not single character at time, like in previous variant. See this page http://www.opengroup.org/onlinepubs/007908799/xcu/tr.html for detailed description of desired tr behaviour in such cases. Please test this patch on your system & locale and report me any strange things. diff -u ./extern.h /usr/src/usr.bin/tr/extern.h --- ./extern.h Fri Jun 14 19:56:52 2002 +++ /usr/src/usr.bin/tr/extern.h Fri Aug 1 04:19:36 2003 _at__at_ -40,7 +40,8 _at__at_ typedef struct { enum { STRING1, STRING2 } which; - enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; + enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, + SET, SET_UPPER, SET_LOWER } state; int cnt; /* character count */ int lastch; /* last character */ int equiv[NCHARS]; /* equivalence set */ _at__at_ -49,3 +50,5 _at__at_ } STR; int next(STR *); +int charcoll(const void *, const void *); + diff -u ./str.c /usr/src/usr.bin/tr/str.c --- ./str.c Fri Jul 5 13:28:13 2002 +++ /usr/src/usr.bin/tr/str.c Fri Aug 1 04:22:11 2003 _at__at_ -106,6 +106,8 _at__at_ } return (1); case SET: + case SET_UPPER: + case SET_LOWER: if ((s->lastch = s->set[s->cnt++]) == OOBCH) { s->state = NORMAL; return (next(s)); _at__at_ -194,7 +196,7 _at__at_ { int cnt, (*func)(int); CLASS *cp, tmp; - int *p; + int *p, n; tmp.name = s->str; if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / _at__at_ -208,10 +210,18 _at__at_ if ((func)(cnt)) *p++ = cnt; *p = OOBCH; + n = p - cp->set; s->cnt = 0; - s->state = SET; s->set = cp->set; + if (strcmp(s->str, "upper") == 0) + s->state = SET_UPPER; + else if (strcmp(s->str, "lower") == 0) { + s->state = SET_LOWER; + } else + s->state = SET; + if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1) + mergesort(s->set, n, sizeof(*(s->set)), charcoll); } static int diff -u ./tr.c /usr/src/usr.bin/tr/tr.c --- ./tr.c Thu Sep 5 03:29:07 2002 +++ /usr/src/usr.bin/tr/tr.c Fri Aug 1 04:32:01 2003 _at__at_ -101,8 +101,9 _at__at_ STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; -static int charcoll(const void *, const void *); static void setup(int *, char *, STR *, int, int); +static void process_upper(int); +static void process_lower(int); static void usage(void); int _at__at_ -110,7 +111,7 _at__at_ { static int collorder[NCHARS], tmpmap[NCHARS]; int ch, cnt, lastch, *p; - int Cflag, cflag, dflag, sflag, isstring2; + int Cflag, cflag, dflag, sflag, isstring2, do_upper, do_lower; (void)setlocale(LC_ALL, ""); _at__at_ -224,19 +225,67 _at__at_ if (!next(&s2)) errx(1, "empty string2"); - ch = s2.lastch; + do_upper = do_lower = 0; /* If string2 runs out of characters, use the last one specified. */ - if (sflag) - while (next(&s1)) { - string1[s1.lastch] = ch = s2.lastch; - string2[ch] = 1; - (void)next(&s2); - } - else - while (next(&s1)) { - string1[s1.lastch] = ch = s2.lastch; - (void)next(&s2); + while (next(&s1)) { + if (s1.state == SET_LOWER && + s2.state == SET_UPPER) { + if (do_lower) { + process_lower(sflag); + do_lower = 0; + } + do_upper = 1; + } else if (s1.state == SET_UPPER && + s2.state == SET_LOWER) { + if (do_upper) { + process_upper(sflag); + do_upper = 0; + } + do_lower = 1; + } else { + if (do_lower) { + /* Skip until aligned */ + if (s1.state == SET_UPPER) { + do { + if (!next(&s1)) + goto endloop; + } while (s1.state == SET_UPPER); + } else if (s2.state == SET_LOWER) { + do { + if (!next(&s2)) + break; + } while (s2.state == SET_LOWER); + } + process_lower(sflag); + do_lower = 0; + } else if (do_upper) { + /* Skip until aligned */ + if (s1.state == SET_LOWER) { + do { + if (!next(&s1)) + goto endloop; + } while (s1.state == SET_LOWER); + } else if (s2.state == SET_UPPER) { + do { + if (!next(&s2)) + break; + } while (s2.state == SET_UPPER); + } + process_upper(sflag); + do_upper = 0; + } + string1[s1.lastch] = s2.lastch; + if (sflag) + string2[s2.lastch] = 1; } + (void)next(&s2); + } +endloop: + if (do_lower) + process_lower(sflag); + else if (do_upper) + process_upper(sflag); + /* End of upper & lower special processing */ if (cflag || Cflag) { s2.str = argv[1]; _at__at_ -294,15 +343,55 _at__at_ string[cnt] = !string[cnt] && ISCHAR(cnt); } -static int +int charcoll(const void *a, const void *b) { - char sa[2], sb[2]; + static char sa[2], sb[2]; sa[0] = *(const int *)a; sb[0] = *(const int *)b; - sa[1] = sb[1] = '\0'; return (strcoll(sa, sb)); +} + + +/* + * For -s result will contain only those characters defined + * as the second characters in each of the toupper or tolower + * pairs. + */ + +static void +process_upper(int sflag) +{ + int cnt, ch; + + for (cnt = 0; cnt < NCHARS; cnt++) { + ch = string1[cnt]; + if (ch == OOBCH) /* [Cc]flag */ + ch = cnt; + if (islower(ch)) { + string1[cnt] = ch = toupper(ch); + if (sflag && isupper(ch)) + string2[ch] = 1; + } + } +} + +static void +process_lower(int sflag) +{ + int cnt, ch; + + for (cnt = 0; cnt < NCHARS; cnt++) { + ch = string1[cnt]; + if (ch == OOBCH) /* [Cc]flag */ + ch = cnt; + if (isupper(ch)) { + string1[cnt] = ch = tolower(ch); + if (sflag && islower(ch)) + string2[ch] = 1; + } + } } static voidReceived on Thu Jul 31 2003 - 15:44:13 UTC
This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:17 UTC