diff --git a/usr.bin/grep/fastgrep.c b/usr.bin/grep/fastgrep.c index 5239a0e..2405ed8 100644 --- a/usr.bin/grep/fastgrep.c +++ b/usr.bin/grep/fastgrep.c @@ -198,7 +198,7 @@ fastcomp(fastgrep_t *fg, const char *pat) } int -grep_search(fastgrep_t *fg, unsigned char *data, size_t len, regmatch_t *pmatch) +grep_search(fastgrep_t *fg, const unsigned char *data, size_t len, regmatch_t *pmatch) { unsigned int j; int ret = REG_NOMATCH; diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c index c4de7db..a7e3bf0 100644 --- a/usr.bin/grep/file.c +++ b/usr.bin/grep/file.c @@ -37,7 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -47,133 +47,159 @@ __FBSDID("$FreeBSD$"); #include "grep.h" -static char fname[MAXPATHLEN]; /* file name */ +#define MAXBUFSIZ (32 * 1024) +#define LNBUFBUMP 80 -#define MAXBUFSIZ (16 * 1024) -#define PREREAD_M 0.2 +static gzFile gzbufdesc; +static BZFILE* bzbufdesc; -/* Some global variables for the buffering and reading. */ -static char *lnbuf; -static size_t lnbuflen; -static unsigned char *binbuf; -static int binbufsiz; -unsigned char *binbufptr; -static int bzerr; +static unsigned char buffer[MAXBUFSIZ]; +static unsigned char *bufpos; +static size_t bufrem; -#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \ - (ch != L'\b') && (ch != L'\0')) +static unsigned char *lnbuf; +static size_t lnbuflen; -/* - * Returns a single character according to the file type. - * Returns -1 on failure. - */ -int -grep_fgetc(struct file *f) +static int +grep_refill(struct file *f) { - unsigned char c; - - switch (filebehave) { - case FILE_STDIO: - return (fgetc(f->f)); - case FILE_GZIP: - return (gzgetc(f->gzf)); - case FILE_BZIP: - BZ2_bzRead(&bzerr, f->bzf, &c, 1); - if (bzerr == BZ_STREAM_END) - return (-1); - else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK) - errx(2, "%s", getstr(2)); - return (c); + ssize_t nr; + int bzerr; + + bufpos = buffer; + bufrem = 0; + + if (filebehave == FILE_GZIP) { + nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); + } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { + nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); + switch (bzerr) { + case BZ_OK: + case BZ_STREAM_END: + /* No problem, nr will be okay */ + break; + case BZ_DATA_ERROR_MAGIC: + /* + * As opposed to gzread(), which simply returns the + * plain file data, if it is not in the correct + * compressed format, BZ2_bzRead() instead aborts. + * + * So, just restart at the beginning of the file again, + * and use plain reads from now on. + */ + BZ2_bzReadClose(&bzerr, bzbufdesc); + bzbufdesc = NULL; + if (lseek(f->fd, 0, SEEK_SET) == -1) + return (EOF); + nr = read(f->fd, buffer, MAXBUFSIZ); + break; + default: + /* Make sure we exit with an error */ + nr = -1; + } + } else { + nr = read(f->fd, buffer, MAXBUFSIZ); } - return (-1); + + if (nr <= 0) + return (EOF); + + bufrem = nr; + return (0); } -/* - * Returns true if the file position is a EOF, returns false - * otherwise. - */ -int -grep_feof(struct file *f) +static int +grep_lnbufgrow(size_t newlen) { - - switch (filebehave) { - case FILE_STDIO: - return (feof(f->f)); - case FILE_GZIP: - return (gzeof(f->gzf)); - case FILE_BZIP: - return (bzerr == BZ_STREAM_END); - } - return (1); + unsigned char *p; + + if (lnbuflen >= newlen) + return (0); + if ((p = realloc(lnbuf, newlen)) == NULL) + return (EOF); + lnbuf = p; + lnbuflen = newlen; + return (0); } -/* - * At the first call, fills in an internal buffer and checks if the given - * file is a binary file and sets the binary flag accordingly. Then returns - * a single line and sets len to the length of the returned line. - * At any other call returns a single line either from the internal buffer - * or from the file if the buffer is exhausted and sets len to the length - * of the line. - */ char * -grep_fgetln(struct file *f, size_t *len) +grep_fgetln(struct file *f, size_t *lenp) { - struct stat st; - size_t bufsiz, i = 0; - int ch = 0; - - /* Fill in the buffer if it is empty. */ - if (binbufptr == NULL) { - - /* Only pre-read to the buffer if we need the binary check. */ - if (binbehave != BINFILE_TEXT) { - if (f->stdin) - st.st_size = MAXBUFSIZ; - else if (stat(fname, &st) != 0) - err(2, NULL); - - bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ? - (st.st_size / 2) : MAXBUFSIZ; - - binbuf = grep_malloc(sizeof(char) * bufsiz); - - while (i < bufsiz) { - ch = grep_fgetc(f); - if (ch == EOF) - break; - binbuf[i++] = ch; - } - - f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? - '\0' : '\200', i - 1) != NULL; - } - binbufsiz = i; - binbufptr = binbuf; + unsigned char *p; + char *ret; + size_t len; + size_t diff; + size_t off; + + /* Fill the buffer, if necessary */ + if (bufrem == 0 && grep_refill(f)) { +error: + *lenp = 0; + return (NULL); } - /* Read a line whether from the buffer or from the file itself. */ - for (i = 0; !(grep_feof(f) && - (binbufptr == &binbuf[binbufsiz])); i++) { - if (binbufptr == &binbuf[binbufsiz]) { - ch = grep_fgetc(f); - } else { - ch = binbufptr[0]; - binbufptr++; - } - if (i >= lnbuflen) { - lnbuflen *= 2; - lnbuf = grep_realloc(lnbuf, ++lnbuflen); - } - if ((ch == '\n') || (ch == EOF)) { - lnbuf[i] = '\0'; - break; - } else - lnbuf[i] = ch; + /* Look for a newline in the remaining part of the buffer */ + if ((p = memchr(bufpos, '\n', bufrem)) != NULL) { + ++p; /* advance over newline */ + ret = bufpos; + len = p - bufpos; + bufrem -= len; + bufpos = p; + *lenp = len; + return (ret); } - if (grep_feof(f) && (i == 0) && (ch != '\n')) + + /* We have to copy the current buffered data to the line buffer */ + for (len = bufrem, off = 0; ; len += bufrem) { + /* Make sure there is room for more data */ + if (grep_lnbufgrow(len + LNBUFBUMP)) + goto error; + memcpy(lnbuf + off, bufpos, len - off); + off = len; + if (grep_refill(f)) + break; /* EOF or error: return partial line */ + if ((p = memchr(bufpos, '\n', bufrem)) == NULL) + continue; + /* got it: finish up the line (like code above) */ + ++p; + diff = p - bufpos; + len =+ diff; + if (grep_lnbufgrow(len)) + goto error; + memcpy(lnbuf + off, bufpos, diff); + bufrem -= diff; + bufpos = p; + break; + } + *lenp = len; + return lnbuf; +} + +static struct file * +grep_file_init(struct file *f) +{ + if (filebehave == FILE_GZIP && + (gzbufdesc = gzdopen(f->fd, "r")) == NULL) { +error: + if (!f->stdin) + close(f->fd); + free(f); return (NULL); - *len = i; - return (lnbuf); + } + + if (filebehave == FILE_BZIP && + (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) + goto error; + + /* Fill read buffer, also catches errors early */ + if (grep_refill(f)) + goto error; + + /* Check for binary stuff, if necessary */ + if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL) + f->binary = true; + + return (f); } /* @@ -184,72 +210,45 @@ grep_stdin_open(void) { struct file *f; - snprintf(fname, sizeof fname, "%s", getstr(1)); - f = grep_malloc(sizeof *f); + memset(f, 0, sizeof *f); + f->fd = fileno(stdin); + f->stdin = true; - if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) { - f->stdin = true; - return (f); - } - - free(f); - return (NULL); + return grep_file_init(f); } /* - * Opens a normal, a gzipped or a bzip2 compressed file for processing. + * Opens a file for processing. */ struct file * grep_open(const char *path) { struct file *f; - snprintf(fname, sizeof fname, "%s", path); - f = grep_malloc(sizeof *f); - - f->stdin = false; - switch (filebehave) { - case FILE_STDIO: - if ((f->f = fopen(path, "r")) != NULL) - return (f); - break; - case FILE_GZIP: - if ((f->gzf = gzopen(fname, "r")) != NULL) - return (f); - break; - case FILE_BZIP: - if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL) - return (f); - break; + memset(f, 0, sizeof *f); + if ((f->fd = open(path, O_RDONLY)) == -1) { + free(f); + return (NULL); } - free(f); - return (NULL); + return grep_file_init(f); } /* - * Closes a normal, a gzipped or a bzip2 compressed file. + * Closes a file. */ void grep_close(struct file *f) { + close(f->fd); - switch (filebehave) { - case FILE_STDIO: - fclose(f->f); - break; - case FILE_GZIP: - gzclose(f->gzf); - break; - case FILE_BZIP: - BZ2_bzclose(f->bzf); - break; - } - - /* Reset read buffer for the file we are closing */ - binbufptr = NULL; - free(binbuf); + /* Reset read buffer and line buffer */ + bufpos = buffer; + bufrem = 0; + free(lnbuf); + lnbuf = NULL; + lnbuflen = 0; } diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h index bb5e5f9..3988240 100644 --- a/usr.bin/grep/grep.h +++ b/usr.bin/grep/grep.h @@ -77,10 +77,7 @@ extern const char *errstr[]; #define MAX_LINE_MATCHES 32 struct file { - struct mmfile *mmf; - BZFILE *bzf; - FILE *f; - gzFile *gzf; + int fd; bool binary; bool stdin; }; @@ -153,11 +150,9 @@ void clearqueue(void); void grep_close(struct file *f); struct file *grep_stdin_open(void); struct file *grep_open(const char *path); -int grep_feof(struct file *f); -int grep_fgetc(struct file *f); char *grep_fgetln(struct file *f, size_t *len); /* fastgrep.c */ int fastcomp(fastgrep_t *, const char *); void fgrepcomp(fastgrep_t *, const char *); -int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *); +int grep_search(fastgrep_t *, const unsigned char *, size_t, regmatch_t *);