Re: PLEASE TEST: IPI deadlock avoidance patch

From: Julian Elischer <julian_at_elischer.org>
Date: Thu, 26 Aug 2004 12:54:52 -0700
In the same vein here's a program I use to test filesystems AND disks.
It will assume the file exists because it was originally written to test 
raw disks. (in raids)

it also writes a pass number so you can see if data failed to get written,
(we actually saw that on one array type) and you are looking at data 
from the previous pass.



Dan Nelson wrote:

>In the last episode (Aug 26), Daniel Eriksson said:
>  
>
>>Robert Watson wrote:
>>    
>>
>>>Could you be more specific about "one way" or "another"?  
>>>      
>>>
>>Not at this point. The machine is on the other side of town and needs
>>to be online for the next couple of days.
>>
>>The corrupted files are large (10-400MB) binary files, and the
>>content of the files makes it hard to pinpoint exactly what is
>>corrupted.
>>
>>I will see what I can do about providing more information, but I
>>cannot make any promises right now.
>>    
>>
>
>I used the following two programs to help diagnose some file corruption
>problems in Linux's XFS and NFS code.  Genoffsets creates a 1.5gb file
>called "offsets" with the 4-byte integers 0, 4, 8, 16, etc. in
>network-byte order (so each number "n" starts at file offset "n").  You
>than copy that file (or a part of it) to a remote system and run
>cmpoffsets on it to verify that the file copied ok.
>
>Cmpoffsets just reads the file and checks that the contents are
>correct.  For runs of data that are incorrect, it prints the start and
>end offsets (and whether they're on power-of-2 boundaries).  If the
>corrupted data is zeros, it prints that.  Otherwise it assumes the data
>was copied from another location and prints the start and end offsets
>of the copied data (and whether they're on power-of-2 boundaries).
>
>Run the copy and verify in a loop that exits if cmpoffsets ever returns
>a nonzero result.
>
>  
>

#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <fcntl.h>
#include <limits.h>
#include <err.h>

extern char *optarg;
extern int optind;
extern int optopt;
extern int opterr;
extern int optreset;


void
usage(void)
{
	fprintf(stderr, "usage: checkdisk [-c count] [-p passes ] [-o offset] -f devicename\n");
	fprintf(stderr, "	default offset is 63 (512 byte blocks)\n");
	fprintf(stderr, "	default count of writes is infinite\n");
	fprintf(stderr, "	default count of passes is infinite\n");
	exit (1);
}

union block {
	char chars[512];
	struct {
		char desc[128];
		long long int blocknumber;
	} record;
} ;

struct testbuffer {
	union block blocks[256];
};

struct testbuffer *buffer1, *buffer2;

char *fmtstring = "block %lld : %d of 256, pass %d";

unsigned int offset = 63;
unsigned int count = 0;
unsigned int pass = 0;
unsigned int passes = 0;
unsigned long long baseblock;
off_t writebase;

main(int argc, char **argv)
{
	int ch, fd;
		int blk;
		int retval;
	long long int loopcount;

	buffer1 = (void *)malloc(sizeof(struct testbuffer));
	buffer2 = (void *)malloc(sizeof(struct testbuffer));
	bzero(buffer1, sizeof(struct testbuffer));
	bzero(buffer2, sizeof(struct testbuffer));

	while ((ch = getopt(argc, argv, "?f:o:c:p:")) != -1)
		 switch (ch) {
		 case 'f':
			 if ((fd = open(optarg, O_RDWR, 0666)) < 0)
				err(1, "%s", optarg);
			 break;
		 case 'c':
			 if (optarg[0] < '0' || optarg[0] > '9') {
				usage();
			 } 
			 if ((count = strtoul(optarg, NULL, 10)) == ULONG_MAX)
				usage();
			 break;
		 case 'p':
			 if (optarg[0] < '0' || optarg[0] > '9') {
				usage();
			 } 
			 if ((passes = strtoul(optarg, NULL, 10)) == ULONG_MAX)
				usage();
			 break;
		 case 'o':
			 if (optarg[0] < '0' || optarg[0] > '9') {
				usage();
			 } 
			 if ((offset = strtoul(optarg, NULL, 10)) == ULONG_MAX)
				usage();
			 break;
		 case '?':
		 default:
			 usage();
		 }
	argc -= optind;
	argv += optind;

	for ( pass = 0; passes == 0 || pass < passes; pass++ ) {
		baseblock = offset;
		writebase = offset * 512;

		lseek(fd, writebase, SEEK_SET);
		for ( loopcount = 0; count == 0 || loopcount < count; loopcount++ ) {

			for (blk = 0; blk < 256; blk++) {
				sprintf(buffer1->blocks[blk].record.desc, fmtstring, (baseblock + blk), blk, pass);
			}
			retval = write (fd, buffer1, sizeof(*buffer1));
			if (retval != sizeof(*buffer1)) {
				if (retval == -1) {
					warn("write of block %llx", baseblock);
				} else {
					fprintf(stderr, "write of block %lld wrote %d blocks ", baseblock, retval/512);
		
				}
				break;
			}
			if (((baseblock/256)%10000) == 0) {
				printf("pass %d write at block %lld          \r", pass, baseblock);
				fflush(stdout);
			}
			baseblock += (sizeof(*buffer1)/sizeof(union block));
		}

		baseblock = offset;
		writebase = offset * 512;

		lseek(fd, writebase, SEEK_SET);
		for ( loopcount = 0; count == 0 || loopcount < count; loopcount++ ) {
			char *src, *chk;

			for (blk = 0; blk < 256; blk++) {
				sprintf(buffer1->blocks[blk].record.desc, fmtstring, (baseblock + blk), blk, pass);
			}
			retval = read (fd, buffer2, sizeof(*buffer1));
			if (retval != sizeof(*buffer1)) {
				if (retval == -1) {
					warn("read of block %llx", baseblock);
				} else {
					fprintf(stderr, "read of block %lld delivered %d blocks ", baseblock, retval/512);
				}
				break;
			}
			for (blk = 0; blk < 256; blk++) {
				src = buffer2->blocks[blk].record.desc;
				chk = buffer1->blocks[blk].record.desc;
				if ( strncmp(src, chk, strlen(src))) {
					printf("block %lld failed\n", (baseblock + blk));
					printf("found:\n %s\n", src);
				} else {
					/* printf("block %lld matched\n", (baseblock + blk));*/
				}
			}
			if (((baseblock/256)%10000) == 0) {
				printf("pass %d read at block %lld            \r", pass, baseblock);
				fflush(stdout);
			}
			
			baseblock += (sizeof(*buffer1)/sizeof(union block));
		}
	}
}
Received on Thu Aug 26 2004 - 17:54:53 UTC

This archive was generated by hypermail 2.4.0 : Wed May 19 2021 - 11:38:08 UTC