Revised version (was Re: Serious 'tr' bug, patch for review included)

From: Andrey Chernov <ache_at_nagual.pp.ru>
Date: Fri, 1 Aug 2003 06:37:03 +0400
On Fri, Aug 01, 2003 at 04:44:08 +0400, Andrey Chernov wrote:
> This patch address two problems.

Revides patch version with accurate skipping. Surprisingly, the code is 
reduced.

Only in .: CVS
diff -u ./extern.h /usr/src/usr.bin/tr/extern.h
--- ./extern.h	Fri Jun 14 19:56:52 2002
+++ /usr/src/usr.bin/tr/extern.h	Fri Aug  1 04:19:36 2003
_at__at_ -40,7 +40,8 _at__at_
 
 typedef struct {
 	enum { STRING1, STRING2 } which;
-	enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
+	enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
+	       SET, SET_UPPER, SET_LOWER } state;
 	int	 cnt;			/* character count */
 	int	 lastch;		/* last character */
 	int	equiv[NCHARS];		/* equivalence set */
_at__at_ -49,3 +50,5 _at__at_
 } STR;
 
 int	 next(STR *);
+int charcoll(const void *, const void *);
+
diff -u ./str.c /usr/src/usr.bin/tr/str.c
--- ./str.c	Fri Jul  5 13:28:13 2002
+++ /usr/src/usr.bin/tr/str.c	Fri Aug  1 04:22:11 2003
_at__at_ -106,6 +106,8 _at__at_
 		}
 		return (1);
 	case SET:
+	case SET_UPPER:
+	case SET_LOWER:
 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
 			s->state = NORMAL;
 			return (next(s));
_at__at_ -194,7 +196,7 _at__at_
 {
 	int cnt, (*func)(int);
 	CLASS *cp, tmp;
-	int *p;
+	int *p, n;
 
 	tmp.name = s->str;
 	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
_at__at_ -208,10 +210,18 _at__at_
 		if ((func)(cnt))
 			*p++ = cnt;
 	*p = OOBCH;
+	n = p - cp->set;
 
 	s->cnt = 0;
-	s->state = SET;
 	s->set = cp->set;
+	if (strcmp(s->str, "upper") == 0)
+		s->state = SET_UPPER;
+	else if (strcmp(s->str, "lower") == 0) {
+		s->state = SET_LOWER;
+	} else
+		s->state = SET;
+	if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1)
+		mergesort(s->set, n, sizeof(*(s->set)), charcoll);
 }
 
 static int
diff -u ./tr.c /usr/src/usr.bin/tr/tr.c
--- ./tr.c	Thu Sep  5 03:29:07 2002
+++ /usr/src/usr.bin/tr/tr.c	Fri Aug  1 06:30:24 2003
_at__at_ -101,8 +101,9 _at__at_
 STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 
-static int charcoll(const void *, const void *);
 static void setup(int *, char *, STR *, int, int);
+static void process_upper(int);
+static void process_lower(int);
 static void usage(void);
 
 int
_at__at_ -224,20 +225,47 _at__at_
 	if (!next(&s2))
 		errx(1, "empty string2");
 
-	ch = s2.lastch;
 	/* If string2 runs out of characters, use the last one specified. */
-	if (sflag)
-		while (next(&s1)) {
-			string1[s1.lastch] = ch = s2.lastch;
-			string2[ch] = 1;
-			(void)next(&s2);
-		}
-	else
-		while (next(&s1)) {
-			string1[s1.lastch] = ch = s2.lastch;
-			(void)next(&s2);
+	while (next(&s1)) {
+	again:
+		if (s1.state == SET_LOWER &&
+		    s2.state == SET_UPPER &&
+		    s1.cnt == 1 && s2.cnt == 1) {
+			do {
+				if (!next(&s1)) {
+					process_upper(sflag);
+					goto endloop;
+				}
+			} while (s1.state == SET_LOWER && s1.cnt > 1);
+			do {
+				if (!next(&s2))
+					break;
+			} while (s2.state == SET_UPPER && s2.cnt > 1);
+			process_upper(sflag);
+			goto again;
+		} else if (s1.state == SET_UPPER &&
+			   s2.state == SET_LOWER &&
+			   s1.cnt == 1 && s2.cnt == 1) {
+			do {
+				if (!next(&s1)) {
+					process_lower(sflag);
+					goto endloop;
+				}
+			} while (s1.state == SET_UPPER && s1.cnt > 1);
+			do {
+				if (!next(&s2))
+					break;
+			} while (s2.state == SET_LOWER && s2.cnt > 1);
+			process_lower(sflag);
+			goto again;
+		} else {
+			string1[s1.lastch] = s2.lastch;
+			if (sflag)
+				string2[s2.lastch] = 1;
 		}
-
+		(void)next(&s2);
+	}
+endloop:
 	if (cflag || Cflag) {
 		s2.str = argv[1];
 		s2.state = NORMAL;
_at__at_ -294,15 +322,59 _at__at_
 			string[cnt] = !string[cnt] && ISCHAR(cnt);
 }
 
-static int
+int
 charcoll(const void *a, const void *b)
 {
-	char sa[2], sb[2];
+	static char sa[2], sb[2];
+	int r;
 
 	sa[0] = *(const int *)a;
 	sb[0] = *(const int *)b;
-	sa[1] = sb[1] = '\0';
-	return (strcoll(sa, sb));
+	r = strcoll(sa, sb);
+	if (r == 0)
+		r = *(const int *)a - *(const int *)b;
+	return (r);
+}
+
+
+/*
+ * For -s result will contain only those characters defined
+ * as the second characters in each of the toupper or tolower
+ * pairs.
+ */
+
+static void
+process_upper(int sflag)
+{
+	int cnt, ch;
+
+	for (cnt = 0; cnt < NCHARS; cnt++) {
+		ch = string1[cnt];
+		if (ch == OOBCH)        /* [Cc]flag */
+			ch = cnt;
+		if (islower(ch)) {
+			string1[cnt] = ch = toupper(ch);
+			if (sflag && isupper(ch))
+				string2[ch] = 1;
+		}
+	}
+}
+
+static void
+process_lower(int sflag)
+{
+	int cnt, ch;
+
+	for (cnt = 0; cnt < NCHARS; cnt++) {
+		ch = string1[cnt];
+		if (ch == OOBCH)        /* [Cc]flag */
+			ch = cnt;
+		if (isupper(ch)) {
+			string1[cnt] = ch = tolower(ch);
+			if (sflag && islower(ch))
+				string2[ch] = 1;
+		}
+	}
 }
 
 static void
Received on Thu Jul 31 2003 - 17:37:05 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:37:17 UTC