| /* |
| * Experimental data distribution table generator |
| * Taken from the uncopyrighted NISTnet code (public domain). |
| * |
| * Read in a series of "random" data values, either |
| * experimentally or generated from some probability distribution. |
| * From this, create the inverse distribution table used to approximate |
| * the distribution. |
| */ |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <math.h> |
| #include <malloc.h> |
| #include <string.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| |
| |
| double * |
| readdoubles(FILE *fp, int *number) |
| { |
| struct stat info; |
| double *x; |
| int limit; |
| int n=0, i; |
| |
| fstat(fileno(fp), &info); |
| if (info.st_size > 0) { |
| limit = 2*info.st_size/sizeof(double); /* @@ approximate */ |
| } else { |
| limit = 10000; |
| } |
| |
| x = calloc(limit, sizeof(double)); |
| if (!x) { |
| perror("double alloc"); |
| exit(3); |
| } |
| |
| for (i=0; i<limit; ++i){ |
| fscanf(fp, "%lf", &x[i]); |
| if (feof(fp)) |
| break; |
| ++n; |
| } |
| *number = n; |
| return x; |
| } |
| |
| void |
| arraystats(double *x, int limit, double *mu, double *sigma, double *rho) |
| { |
| int n=0, i; |
| double sumsquare=0.0, sum=0.0, top=0.0; |
| double sigma2=0.0; |
| |
| for (i=0; i<limit; ++i){ |
| sumsquare += x[i]*x[i]; |
| sum += x[i]; |
| ++n; |
| } |
| *mu = sum/(double)n; |
| *sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1)); |
| |
| for (i=1; i < n; ++i){ |
| top += ((double)x[i]- *mu)*((double)x[i-1]- *mu); |
| sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu); |
| |
| } |
| *rho = top/sigma2; |
| } |
| |
| /* Create a (normalized) distribution table from a set of observed |
| * values. The table is fixed to run from (as it happens) -4 to +4, |
| * with granularity .00002. |
| */ |
| |
| #define TABLESIZE 16384/4 |
| #define TABLEFACTOR 8192 |
| #ifndef MINSHORT |
| #define MINSHORT -32768 |
| #define MAXSHORT 32767 |
| #endif |
| |
| /* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger |
| * than MAXSHORT, we don't bother looking at a larger domain than this: |
| */ |
| #define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1) |
| #define DISTTABLEGRANULARITY 50000 |
| #define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2) |
| |
| static int * |
| makedist(double *x, int limit, double mu, double sigma) |
| { |
| int *table; |
| int i, index, first=DISTTABLESIZE, last=0; |
| double input; |
| |
| table = calloc(DISTTABLESIZE, sizeof(int)); |
| if (!table) { |
| perror("table alloc"); |
| exit(3); |
| } |
| |
| for (i=0; i < limit; ++i) { |
| /* Normalize value */ |
| input = (x[i]-mu)/sigma; |
| |
| index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY); |
| if (index < 0) index = 0; |
| if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1; |
| ++table[index]; |
| if (index > last) |
| last = index +1; |
| if (index < first) |
| first = index; |
| } |
| return table; |
| } |
| |
| /* replace an array by its cumulative distribution */ |
| static void |
| cumulativedist(int *table, int limit, int *total) |
| { |
| int accum=0; |
| |
| while (--limit >= 0) { |
| accum += *table; |
| *table++ = accum; |
| } |
| *total = accum; |
| } |
| |
| static short * |
| inverttable(int *table, int inversesize, int tablesize, int cumulative) |
| { |
| int i, inverseindex, inversevalue; |
| short *inverse; |
| double findex, fvalue; |
| |
| inverse = (short *)malloc(inversesize*sizeof(short)); |
| for (i=0; i < inversesize; ++i) { |
| inverse[i] = MINSHORT; |
| } |
| for (i=0; i < tablesize; ++i) { |
| findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN; |
| fvalue = (double)table[i]/(double)cumulative; |
| inverseindex = (int)rint(fvalue*inversesize); |
| inversevalue = (int)rint(findex*TABLEFACTOR); |
| if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1; |
| if (inversevalue > MAXSHORT) inversevalue = MAXSHORT; |
| inverse[inverseindex] = inversevalue; |
| } |
| return inverse; |
| |
| } |
| |
| /* Run simple linear interpolation over the table to fill in missing entries */ |
| static void |
| interpolatetable(short *table, int limit) |
| { |
| int i, j, last, lasti = -1; |
| |
| last = MINSHORT; |
| for (i=0; i < limit; ++i) { |
| if (table[i] == MINSHORT) { |
| for (j=i; j < limit; ++j) |
| if (table[j] != MINSHORT) |
| break; |
| if (j < limit) { |
| table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti); |
| } else { |
| table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti); |
| } |
| } else { |
| last = table[i]; |
| lasti = i; |
| } |
| } |
| } |
| |
| static void |
| printtable(const short *table, int limit) |
| { |
| int i; |
| |
| printf("# This is the distribution table for the experimental distribution.\n"); |
| |
| for (i=0 ; i < limit; ++i) { |
| printf("%d%c", table[i], |
| (i % 8) == 7 ? '\n' : ' '); |
| } |
| } |
| |
| int |
| main(int argc, char **argv) |
| { |
| FILE *fp; |
| double *x; |
| double mu, sigma, rho; |
| int limit; |
| int *table; |
| short *inverse; |
| int total; |
| |
| if (argc > 1) { |
| if (!(fp = fopen(argv[1], "r"))) { |
| perror(argv[1]); |
| exit(1); |
| } |
| } else { |
| fp = stdin; |
| } |
| x = readdoubles(fp, &limit); |
| if (limit <= 0) { |
| fprintf(stderr, "Nothing much read!\n"); |
| exit(2); |
| } |
| arraystats(x, limit, &mu, &sigma, &rho); |
| #ifdef DEBUG |
| fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n", |
| limit, mu, sigma, rho); |
| #endif |
| |
| table = makedist(x, limit, mu, sigma); |
| free((void *) x); |
| cumulativedist(table, DISTTABLESIZE, &total); |
| inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total); |
| interpolatetable(inverse, TABLESIZE); |
| printtable(inverse, TABLESIZE); |
| return 0; |
| } |