#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define HASHBUCKET_SIZE 1024*1024*184 //Bytes. Four times as many elements.
#define INMEM_RADIX_THRESH 1024*1024*184 //Max memory to use for the in-memory radix sort. Beyond this, fall back to disk method.
#define HASHBYTES 8 //Length of hash. *MUST* be 8. Earlier versions supported 16-byte hashes, but this is now depreciated. I just keps this as a define for readability.

//Tasks:

//#define fopen64 fopen
//#define fseeko64 fseek 
//#define DEBUG 1

//1. Produce the hash-list (Caution, large, so will dump to a file)
//2. Sort the list.
//3. Remove non-duplicates.

void srand(unsigned int seed);
unsigned long long hashfile(unsigned char* inputfile, unsigned char* outputfile, unsigned long long *nonnullblocks);
void inmemradixsort_byte(unsigned char *sortme, unsigned char *temp, unsigned char byte, unsigned int recordlen, unsigned int numblocks);
void inmemradixsort_bit(unsigned char *in, unsigned char *out, unsigned char byte, unsigned char bit, unsigned int recordlen, unsigned int numblocks);
//Part of the modified radix sort, byte-level.
void sortpass(char *infile, char *outfile, int byte, unsigned int recordlen);
//As above, but bit-level.
void sortpass_bit(char *infile, char *outfile, int byte, unsigned char bit, unsigned int recordlen);
void sortpass_bit_improved(char *in1, char *in2, char *out1, char *out2, int byte, unsigned char bit, unsigned int recordlen);
//Takes a sorted block-hash list, passes only those entries that have hashes appearing more than once.
unsigned long long dupesonly(char *infile, char *outfile);
unsigned long long addfirstoccurancedata(char *infilename, char *outfilename, unsigned long long numrecords);
void stage5sort(char *infile, char *outfile, unsigned long long blocks);
int compressfile(char *sourcefile, char *compressedfile, char *finallist, unsigned long long numblocks);
void writenewdata_escaped(unsigned long long newdatablocks, FILE *source, FILE *dest);
int isnulls(unsigned char *buffer);
FILE* fopen64(const char *filename, const char *type);
int fseeko64(FILE *stream, long long offset, int whence);
void outputheader(FILE *output);
unsigned int FNVHash(unsigned char *block);
void setbloombit(char *bits, unsigned int bitnum);
unsigned char getbloombit(char *bits, unsigned int bitnum);
unsigned long long bloomreduce(char *infile, char *outfile, unsigned char hashoffset);
unsigned char *gettempfilename();
void inmemorysort(unsigned char *filenamein, unsigned char *filenameout, unsigned long long numhashes);
void checkcollisions(char *infilename, char *outfilename, char *checkfilename);
void make_fnv_from_block(unsigned char *dest, unsigned char *block);
unsigned long long validatesorted(unsigned char *filename);
void swapends(unsigned long long *num);
void dobigsort(unsigned char *infile, unsigned char *outfile, unsigned long long numhashes);

unsigned int blocksize;
unsigned char *recurbuffer; //Used in the recursive sort routine as a temporary storage area.

int main(int argc, unsigned char* argv[]){
  blocksize=512;
  if(argc<3){
    printf("Usage: BLDDcompact <inputfile> <outputfile> [options]\n  Inputfile is a file to compact, usually a disk, filesystem image or .tar archive.\n   Outputfile is a to-create .BLDD file.\n");
    printf("       -bs <size>          Specifies a block size. If in doubt, leave out: The default is 512, which works in all but the most exotic cases. If you know what you're doing, setting this to the allocation unit size of the filesystem you are processing can greatly reduce the time required.\n");
    #ifdef WINDOWS
      printf("During processing a number of files are created in the CWD. These may be many gigabytes in size. If execution is interupted, you should delete these files.\n");
    #else
      printf("During processing a number of files are created in /tmp/. These may be many gigabytes in size. If execution is interupted, you should delete these files.\n");
    #endif
    printf("If you are using this to backup a filesystem for non-forensic purposes, zeroing-out unallocated space will greatly reduce the time required for dedup.\n");
    return(1);
  }
  if(argc>3){
    for(int n=3;n<argc;n++){
      if(strcmp(argv[n], "-bs")==0){
        blocksize=atoi(argv[n+1]);
        printf("Blocksize specified as %u\n", blocksize);
      }
    }
  }
  
  unsigned char* buffer;
  buffer=malloc(blocksize);
  unsigned long long totalblocks, nonnullblocks=0;
  srand(time(NULL)); //Randomness is used to generate the temporary filenames.
  unsigned char *filenamestage1out=gettempfilename();
  totalblocks=hashfile(argv[1], filenamestage1out, &nonnullblocks);
  printf(" Found %llu blocks in file.\n", totalblocks);
  printf("hashed %llu blocks (The rest skipped as nulls).\n", nonnullblocks);


  unsigned long long numhashes=nonnullblocks;
  printf("Stage 1.5 (Optional): Bloom filter reduction. This will take a little while, but save time later.\n");
  unsigned char *bloomtempfile=gettempfilename();
  unsigned long long bloomcounter;
  for(int n=0;n<=5;n+=2){
    bloomcounter=numhashes;
    numhashes=bloomreduce(filenamestage1out, bloomtempfile,n);
    printf("%llu in, %llu remain.\n", bloomcounter, numhashes);
    if(bloomcounter==numhashes||numhashes==0)
      break;
    bloomcounter=numhashes;
    numhashes=bloomreduce(bloomtempfile, filenamestage1out,n+1);
    printf("%llu in, %llu remain.\n", bloomcounter, numhashes);
    if(bloomcounter==numhashes||numhashes==0)
      break;
  }
  unlink(bloomtempfile);

  if(numhashes==0){
    printf("Not a single identical block found. Continuing would be pointless.\n");
    return(1);
  }

  printf("Stage 2: Sorting list.\n  This will be slow. Now would be a good time for:\n  <5gig:   A cup of tea.\n  <20gig:   A good nights sleep.\n  <200gig:  A weekend break.\n");
  printf("Sorting %llu records.\n", numhashes);
  unsigned char *stage2out=gettempfilename();
  dobigsort(filenamestage1out, stage2out, numhashes); //Also deletes filenamestage1out
  free(filenamestage1out);
  printf("Checking the sorted output in case anything went wrong.\n");
  unsigned long long ret;
//  if((ret=validatesorted(stage2out)) != numhashes){
//    printf("Something went wrong - hashes were in order, but only %llu came out of the sort. Aborting.\n", ret);
//    exit(1);
//  }
  printf("\nStage 3: Eliminating non-duplicated data from the list.\n");
  unsigned char *stage3out=gettempfilename();
  unsigned long long dupblocks=dupesonly(stage2out, stage3out);
  printf("  Done, found %llu duplicated blocks.\n", dupblocks);
  unlink(stage2out);
  free(stage2out);
  printf("Stage 4: Adding the first-occurance field to every record.\n");
  unsigned long long replaceableblocks;
  unsigned char *stage4out=gettempfilename();
  replaceableblocks=addfirstoccurancedata(stage3out, stage4out, dupblocks);
  unlink(stage3out);
  free(stage3out);
  printf(" Found a total of %llu blocks that can be replaced by references.\n", replaceableblocks);
  if(replaceableblocks==0){
    printf("  Not bothering. Not a single replaceable block, so continuing would be pointless.\n");
    return(1);
  }
  printf("Stage 5: Resorting list by block number.\n");
  unsigned char *stage5out=gettempfilename();
  stage5sort(stage4out, stage5out, replaceableblocks);
  unlink(stage4out);
  free(stage4out);
  printf("Stage 5.1: Checking for collisions.\n");
  unsigned char *finalindexfile=gettempfilename();
  checkcollisions(stage5out, finalindexfile, argv[1]); //This function also does the convertion back to littleendian if required.
  unlink(stage5out);
  free(stage5out);
  printf("Stage 6: Compressing.\n");
  compressfile(argv[1], argv[2], finalindexfile, totalblocks);
  unlink(finalindexfile);
  free(finalindexfile);
  printf("Done.\n");

}

void recursivefilesort(char *infile, char *outfile, unsigned char bit, unsigned long long records, unsigned int recordlen){
  //This sort routine is something like a quicksort or radix sort, but extensively adapted to efficiently (linear-access only) use files on disc.
  //It'll use the comparatively slow disk sort when there is too much data for RAM, and in the process break the data down to more manageable size.
  //Really old programmers might recognise some of this as similar to how data was sorted in the days of reel-to-reel tape storage on mainframes.
  //Deletes infile after reading. Infile and outfile may be the same filename.
  //Use only on random keys! If the keys are not randomly distributed, recursion may occur far, far too many levels deep.
  unsigned int byte=7-((bit&0xF8)>>3);
  unsigned int subbit=128>>(7-(bit&0x07));
  printf("Recursively sorting %s to %s, bit %u(%u,%u), records %u\n", infile, outfile, bit, byte, subbit, records);
  if(records<(1024*1024*256)/32){ //Times 32 to get memory usage in bytes.
                       //You do *not* want to recur two many layers - each one leaves two temporary files on disc until it terminates.
                       //So ten layers of recursion means 2*2^10 files sitting around. Much more than that and you'll start to overwhelm filesystem!
    inmemorysort(infile, outfile, records);
    return;
  }
  //This is going to be fun. Note that 'bit' always decreases by one each level of recursion.
  unsigned char *lowerfilename=gettempfilename();
  unsigned char *upperfilename=gettempfilename();
  unsigned int lowerfilerecords=0;
  unsigned int upperfilerecords=0;
  if(!recurbuffer)
    recurbuffer=malloc(recordlen*128);
  FILE *readfile=fopen64(infile, "rb");
  FILE *lowerfile=fopen64(lowerfilename, "wb");
  FILE *upperfile=fopen64(upperfilename, "wb");
  
  do{
    unsigned int gotlines=fread(recurbuffer, recordlen, 128, readfile);
    if(gotlines)
      for(int n=0;n<gotlines;n++)
        if(!(recurbuffer[(n*recordlen)+byte]&(subbit))){
          fwrite(recurbuffer+(n*recordlen), recordlen, 1, lowerfile);
          lowerfilerecords++;
        }
  }while(!feof(readfile));
  fclose(lowerfile);
  fseeko64(readfile, 0, SEEK_SET);
  do{
    unsigned int gotlines=fread(recurbuffer, recordlen, 128, readfile);
    if(gotlines)
      for(int n=0;n<gotlines;n++)
        if((recurbuffer[(n*recordlen)+byte]&(subbit))){
          fwrite(recurbuffer+(n*recordlen), recordlen, 1, upperfile);
          upperfilerecords++;
        }
  }while(!feof(readfile));
  fclose(upperfile);
  fclose(readfile);
  unlink(infile);

  if(bit){
    if(lowerfilerecords>1)
      recursivefilesort(lowerfilename, lowerfilename, bit-1, lowerfilerecords, recordlen);
    if(upperfilerecords>1)
      recursivefilesort(upperfilename, upperfilename, bit-1, upperfilerecords, recordlen);
  }


  FILE *writefile=fopen64(outfile, "wb");
  unsigned int outcount=0;
  lowerfile=fopen64(lowerfilename, "rb");
  do{
    unsigned int gotlines=fread(recurbuffer, recordlen, 128, lowerfile);
    if(gotlines)
      for(int n=0;n<gotlines;n++){
        fwrite(recurbuffer+(n*recordlen), recordlen, 1, writefile);
        outcount++;
      }
  }while(!feof(lowerfile));
  fclose(lowerfile);
//  printf("Read %u from lower file.\n", outcount);
  upperfile=fopen64(upperfilename, "rb");
  do{
    unsigned int gotlines=fread(recurbuffer, recordlen, 128, upperfile);
    if(gotlines)
      for(int n=0;n<gotlines;n++){
        fwrite(recurbuffer+(n*recordlen), recordlen, 1, writefile);
        outcount++;
      }
  }while(!feof(upperfile));
  fclose(upperfile);
  fclose(writefile);
  unlink(upperfilename);
  unlink(lowerfilename);
  free(lowerfilename);
  free(upperfilename);

//  printf("Total records out to %s: %u\n", outfile, outcount);
}

void sortpass_bit(char *infile, char *outfile, int byte, unsigned char bit, unsigned int recordlen){
  FILE *readfile=fopen64(infile, "rb");
  FILE *writefile=fopen64(outfile, "wb");
  if(readfile==0){
    printf("Could not open %s.\n", infile);
    exit(1);
  }

  char *buffer=malloc(recordlen*128);
/*  fread(buffer, recordlen, 1, readfile);
  do{
    if(!(buffer[byte]&(1<<bit)))
      fwrite(buffer, recordlen, 1, writefile);
    fread(buffer, recordlen, 1, readfile);
  }while(!feof(readfile));*/



  do{
    unsigned int gotlines=fread(buffer, recordlen, 128, readfile);
    if(gotlines)
      for(int n=0;n<gotlines;n++)
        if(!(buffer[(n*recordlen)+byte]&(1<<bit)))
          fwrite(buffer+(n*recordlen), recordlen, 1, writefile);
  }while(!feof(readfile));

  fseeko64(readfile, 0, SEEK_SET);

/*  fread(buffer, recordlen, 1, readfile);
  do{
    if((buffer[byte]&(1<<bit)))
      fwrite(buffer, recordlen, 1, writefile);
    fread(buffer, recordlen, 1, readfile);
  }while(!feof(readfile)); */

  do{
    unsigned int gotlines=fread(buffer, recordlen, 128, readfile);
    if(gotlines)
      for(int n=0;n<gotlines;n++)
        if((buffer[(n*recordlen)+byte]&(1<<bit)))
          fwrite(buffer+(n*recordlen), recordlen, 1, writefile);
  }while(!feof(readfile));


  fclose(readfile);
  fclose(writefile);
  free(buffer);
}

void dobigsort(unsigned char *infile, unsigned char *outfile, unsigned long long numhashes){
//  if(numhashes>(INMEM_RADIX_THRESH/((HASHBYTES+8)*2))){ //Memory usage is 48*this. So 1024*1024*4 is 48M*4=192M.
/*    printf("Too much data to hold in RAM. Using serial-access radix sort in temp files.\n");
    //I thought up this sort myself. Its inefficient by most standards - but it scales O(n) with the data size (good), and it doesn't require holding the whole set in memory at once (Not an option). It's a radix sort with tweeks, and very similar to the quicksort. Just adapted very slightly in one crucial way: All access is serial. No seeking.
    unsigned char *sorttempfile1=gettempfilename();
    unsigned char *sorttempfile2=gettempfilename();
    sortpass (infile, sorttempfile2, HASHBYTES-1, HASHBYTES+8);  printf(" ");
    unlink(infile);
    for(int n=HASHBYTES-2;n>0;n--){
      sortpass (sorttempfile2, sorttempfile1, n, HASHBYTES+8);  printf(" ");
      unsigned char *t=sorttempfile1;
      sorttempfile1=sorttempfile2;
      sorttempfile2=t;
    }
    unlink(sorttempfile1);
    sortpass (sorttempfile2, outfile, 0, HASHBYTES+8);
    unlink(sorttempfile2);*/
//  }else{
//    inmemorysort(infile, outfile, numhashes);
    recursivefilesort(infile, outfile, 63, numhashes, HASHBYTES+8);
    unlink(infile);
//  }
}
void inmemradixsort_bit(unsigned char *in, unsigned char *out, unsigned char byte, unsigned char bit, unsigned int recordlen, unsigned int numblocks){
  //You could try to replace this with an in-place sort, but if you do, it *must* be a stable sort.
  unsigned int outbufferpos=0;
  for(unsigned int n=0;n<numblocks;n++){
    unsigned char *record=in+(n*recordlen);
    if(!(record[byte]&(1<<bit)))
      memcpy(out+(recordlen*outbufferpos++), record, recordlen);
  }
  for(unsigned int n=0;n<numblocks;n++){
    unsigned char *record=in+(n*recordlen);
    if(record[byte]&(1<<bit))
      memcpy(out+(recordlen*outbufferpos++), record, recordlen);
  }
}


void inmemorysort(unsigned char *filenamein, unsigned char *filenameout, unsigned long long numhashes){
  printf("Using in-memory sort\n");
  
  unsigned char *buffer1=malloc(numhashes*(HASHBYTES+8));
  unsigned char *buffertemp=malloc(numhashes*(HASHBYTES+8));
  FILE *readfile=fopen(filenamein, "rb");
//  printf("Loading.\n");
  
  if(!readfile){
    printf("Failed to open temporary file %s, aborting.\n", filenamein);
    exit(1);
  }
  if(fread(buffer1, HASHBYTES+8, numhashes, readfile)!=numhashes){
    printf("opened temporary file %s but it was not of expected length, aborting.\n", filenamein);
    exit(1);
  }

  fclose(readfile);
  for(unsigned int n=HASHBYTES;n--;n>0)
//  for(unsigned int n=0;n++;n<8)
    inmemradixsort_byte(buffer1, buffertemp, n, HASHBYTES+8, numhashes);
  FILE *writefile=fopen(filenameout, "wb");
  fwrite(buffer1, HASHBYTES+8, numhashes, writefile);
  fclose(writefile);
  free(buffer1);
  free(buffertemp);
}

unsigned long long bloomreduce(char *infile, char *outfile, unsigned char hashoffset){
  //(HASHBYTES+8)-byte records: 16 byte hash, 8 byte block number.
  unsigned char *record=malloc(HASHBYTES+8);
  unsigned char *bloombits=malloc(HASHBUCKET_SIZE);
  unsigned long long reducedhash;
  unsigned long long initialblocks = 0;
  printf("  Creating bloom filter.\n");
  FILE *readfile=fopen64(infile, "rb");
  fread(record, HASHBYTES+8, 1, readfile);
  do{
    memcpy(&reducedhash, record, sizeof(unsigned long long));
    reducedhash=reducedhash<<hashoffset;
    setbloombit(bloombits, reducedhash);
    fread(record, HASHBYTES+8, 1, readfile);
    initialblocks++;
  }while(!feof(readfile));
  fseeko64(readfile, 0, SEEK_SET);


  unsigned long long survivingblocks=0;
  FILE *writefile=fopen64(outfile, "wb");
    fread(record, HASHBYTES+8, 1, readfile);
  do{
    memcpy(&reducedhash, record, sizeof(unsigned long long));
    reducedhash=reducedhash<<hashoffset;
    if(getbloombit(bloombits, reducedhash)){
      survivingblocks++;
      fwrite(record, HASHBYTES+8, 1, writefile);
    }
    fread(record, HASHBYTES+8, 1, readfile);
  }while(!feof(readfile));
//  printf("  Bloom processing complete, %llu blocks passed.\n", survivingblocks);


  free(record);
  free(bloombits);
  fclose(writefile);
  fclose(readfile);
  return(survivingblocks);
}


void stage5sort(char *infile, char *outfile, unsigned long long blocks){
  //Note that the block numbers are in big-endian format.
  //Why? Because it makes sorting them easier to follow.

  recursivefilesort(infile, outfile, 63, blocks, 16);
  return;

/*  unsigned char *tempfile1=gettempfilename();
  unsigned char *tempfile2=gettempfilename();

  sortpass(infile, tempfile2, 0, 16);printf(" ");fflush(stdout);
  sortpass(tempfile2, tempfile1, 1, 16);printf(" ");fflush(stdout);
  sortpass(tempfile1, tempfile2, 2, 16);printf(" ");fflush(stdout);
  sortpass(tempfile2, tempfile1, 3, 16);printf(" ");fflush(stdout);
  sortpass(tempfile1, tempfile2, 4, 16);printf(" ");fflush(stdout);
  sortpass(tempfile2, tempfile1, 5, 16);printf(" ");fflush(stdout);
  sortpass(tempfile1, tempfile2, 6, 16);printf(" ");fflush(stdout);
  sortpass(tempfile2, outfile, 7, 16);
  printf(" \n");
  printf("  Finally done precomputing block hash table.\n");
  unlink(tempfile1);
  unlink(tempfile2);
  free(tempfile1);
  free(tempfile2);*/
}

void setbloombit(char *bits, unsigned int bitnum){
  bitnum=bitnum%(HASHBUCKET_SIZE * 4);
  int bytenum=(bitnum&0xFFFFFFFC)>>2;
  int pair=bitnum&0x00000003;
  unsigned char thebyte=(bits[bytenum]>>(pair*2))&0x03;
  if(thebyte==1)
    thebyte=3;
  if(thebyte==0)
    thebyte=1;
  bits[bytenum]=bits[bytenum]|(thebyte<<(pair*2));
//  return(thebyte==3);
//  bits[bitnum]+=1;
}

unsigned char getbloombit(char *bits, unsigned int bitnum){
  bitnum=bitnum%(HASHBUCKET_SIZE * 4);
  int bytenum=(bitnum&0xFFFFFFFC)>>2;
  int pair=bitnum&0x00000003;
  unsigned char thebyte=(bits[bytenum]>>(pair*2))&0x03;
  return(thebyte==3);
}



unsigned long long hashfile(unsigned char* inputfile, unsigned char* outputfile, unsigned long long *nonnullblocks){
  printf("Stage 1: Producing block hash list.\n");

  FILE *readfile=fopen64(inputfile, "rb");
  if(readfile==NULL){
    printf("  Unable to open input file %s\n", inputfile);
    exit(1);
  }

  unsigned char *buffer=malloc(blocksize*64);
  unsigned long long blockno=0;

 FILE *writefile=fopen64(outputfile, "wb");

  do{
    unsigned int gotblocks=fread(buffer, blocksize, 64, readfile);
    if(gotblocks)
      for(int n=0;n<gotblocks;n++){
        unsigned char *blockbuffer=buffer+(blocksize*n);
        if((!isnulls(buffer))){
          make_fnv_from_block(blockbuffer, blockbuffer);
          memcpy(blockbuffer+HASHBYTES, &blockno, 8);
          swapends((unsigned long long *)(blockbuffer+8));
          fwrite(blockbuffer, 1, HASHBYTES+8, writefile);
          (*nonnullblocks)++;
        }
        blockno++;
      }
    if(!(blockno & (unsigned long long)0xFFFFF0))
      printf("Completed block %llu (%llu MB)\n", blockno, (blockno*blocksize)/(1024*1024));
  }while(!feof(readfile));

  fclose(writefile);
  fclose(readfile);
  free(buffer);
  printf("  Done, hashed %llu of %llu blocks.\n", *nonnullblocks, blockno);
  return(blockno);
 }
/*
void sortpass(char *infile, char *outfile, int byte, unsigned int recordlen){
  //This is a sort routine of my own design, so you probably havn't seen it before.
  //Its almost, but not quite, identical to the radix sort. Just tweeked for more efficient use of files to store intermediate stages.
  //The lists sorted can easily get too large for ram, or even the four-gig limit of virtual memory.
//  printf("(%u/%u)", byte, recordlen);
  unsigned char *tempfile1=gettempfilename();
  unsigned char *tempfile2=gettempfilename();
  sortpass_bit(infile, tempfile1, byte, 0, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile1, tempfile2, byte, 1, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile2, tempfile1, byte, 2, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile1, tempfile2, byte, 3, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile2, tempfile1, byte, 4, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile1, tempfile2, byte, 5, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile2, tempfile1, byte, 6, recordlen);printf("!");fflush(stdout);
  sortpass_bit(tempfile1, outfile, byte, 7, recordlen);printf("!");fflush(stdout);
  unlink(tempfile1);
  unlink(tempfile2);
  free(tempfile1);
  free(tempfile2);
}*/

void inmemradixsort_byte(unsigned char *sortme, unsigned char *temp, unsigned char byte, unsigned int recordlen, unsigned int numblocks){
  unsigned char *swapbufs;
  for(int n=0;n<=7;n++){
    inmemradixsort_bit(sortme, temp, byte, n, recordlen, numblocks);
//    printf("%d", n); fflush(stdout);
    swapbufs=sortme;sortme=temp;temp=swapbufs; //Just make sure you do this an even number of times.
  }
}




unsigned long long dupesonly(char *infile, char *outfile){
  //Think of this as the anti-uniq: Only duplicate hashes get through.
  //Here is how it works:
  //First, the structure of three entries is key:
  //  -------    <--Previous.            The current is compared to prev and next. If it matches, passes.
  //  -------    <--Current.             Then they all shift up one, discarding previous and reading a new
  //  -------    <--next.                next. Repeat.

  char *prev, *curr, *next;
  unsigned long long records=0;
  prev=malloc(HASHBYTES+8);
  curr=malloc(HASHBYTES+8);
  next=malloc(HASHBYTES+8);

  FILE *readfile=fopen64(infile, "rb");
  FILE *writefile=fopen64(outfile, "wb");
  fread(prev, HASHBYTES+8, 1, readfile);
  fread(curr, HASHBYTES+8, 1, readfile);
  fread(next, HASHBYTES+8, 1, readfile); //Initialise.
  if(!memcmp(prev, curr, HASHBYTES)){
    fwrite(prev, HASHBYTES+8, 1, writefile);
    records++;
  }
  do{
    if((!memcmp(curr, prev, HASHBYTES))||(!memcmp(curr, next, HASHBYTES))){
      fwrite(curr, HASHBYTES+8, 1, writefile);
      records++;
    }
    memcpy(prev, curr, HASHBYTES+8);
    memcpy(curr, next, HASHBYTES+8);
    fread(next, HASHBYTES+8, 1, readfile);
  }while(!feof(readfile));
  if(!memcmp(prev, curr, HASHBYTES)){
    fwrite(curr, HASHBYTES+8, 1, writefile);
    records++;
  }

  fclose(readfile);
  fclose(writefile);
  free(prev);
  free(curr);
  free(next);
//  printf("- Passed %llu of %llu\n", records, tried);
  return records;
}

unsigned long long addfirstoccurancedata(char *infilename, char *outfilename, unsigned long long numrecords){
  /* Ok, where are we now?
     The infile contains a lot of HASHBYTES+8 byte (hash+blockno) pairs. These are sorted in order of hash - that means matching hashes will always appear consecutivly. Within any run of matching hashes, they are in order of block number. That part is very important.
     This routine moves to 16-byte records, discarding the hash and adding a new field: The number of the block where a record first appears in the file. This is needed because the next step, ordering by block no, would otherwise destroy the hash associations. */
  printf("Reformatting %llu records.\n", numrecords);
  char *buffer=malloc(32); //The intermediate stage has 32-byte records.
  char *lastrecord=malloc(HASHBYTES+8);
  FILE *infile=fopen64(infilename, "rb");
  FILE *outfile=fopen64(outfilename, "wb");
  unsigned long long recordswritten=0;
  fread(lastrecord, HASHBYTES+8, 1, infile);
  fread(buffer, HASHBYTES+8, 1, infile);
  do{
    if(memcmp(lastrecord, buffer, HASHBYTES)){
      memcpy(lastrecord, buffer, HASHBYTES+8);
    }
    else
    {
      memcpy(buffer+HASHBYTES+8, lastrecord+HASHBYTES, 8);
      fwrite(buffer+HASHBYTES, 16, 1, outfile); //16 bytes: Two 8-byte long longs.
      recordswritten++;
    }
/*      unsigned long long dest;memcpy(&dest, buffer+HASHBYTES, 8);
      unsigned long long source;memcpy(&source, buffer+HASHBYTES+8, 8);
      swapends(&dest);
      swapends(&source);
      printf("addfirst: %llu <-- %llu\n", dest, source);*/
    fread(buffer, HASHBYTES+8, 1, infile);
  }while(!feof(infile));

//  #ifndef DEBUG
//  #else
  printf("  Wrote %llu records (Duplicate lists minus first occurance).\n", recordswritten);
//  #endif

  fclose(infile);
  fclose(outfile);
  free(buffer);
  free(lastrecord);
  return(recordswritten);
  //For reference outfile now contains a list of all blocks that are copies of a previous block.
  //The format is <block><prev block>. Both unsigned long longs.
}

void checkcollisions(char *infilename, char *outfilename, char *checkfilename){
  //128-bit keys are all very good, but the sort could be more than twice as fast with 64-bit keys.
  //And 64-bit keys means some way to be sure there are no collisions is required.
  //We're done with hashes by this point. No hashes involved in this function at all.
  unsigned long long source;
  unsigned long long dest;
  unsigned long long in=0;
  unsigned long long out=0;
  unsigned long long dud=0;
  char *sourceblock=malloc(blocksize);
  char *destblock=malloc(blocksize);
  char *buffer=malloc(16*128);
  FILE *infile=fopen64(infilename, "rb");
  FILE *outfile=fopen64(outfilename, "wb");
  FILE *checkfile=fopen64(checkfilename, "rb");
  do{
    unsigned int gotlines=fread(buffer, 16, 128, infile); //Get used to this do{for{ pattern. It's used a few times in this program to consolidate file IO requests into performance-friendly chunks.
    in+=gotlines;
    if(gotlines)
      for(int n=0;n<gotlines;n++){
        swapends((unsigned long long *)(buffer+(16*n)));
        swapends((unsigned long long *)(buffer+(16*n)+8));
        memcpy(&dest, buffer+(16*n), 8);
        memcpy(&source, buffer+(16*n)+8, 8);
//        printf("Checking %llu: %llu <-- %llu\n", in, dest, source);
        fseeko64(checkfile, source*(unsigned long long)blocksize, SEEK_SET);
        fread(sourceblock, blocksize, 1, checkfile);
        fseeko64(checkfile, dest*(unsigned long long)blocksize, SEEK_SET);
        fread(destblock, blocksize, 1, checkfile);
        if(!memcmp(sourceblock, destblock, blocksize)){
          fwrite(buffer+(16*n), 16, 1, outfile);
          out++;
        }
        else
          dud++;
//        in++;
      }
  }while(!feof(infile));
  fclose(infile);
  fclose(outfile);
  fclose(checkfile);
  free(sourceblock);
  free(destblock);
  printf("Checked %llu redirects, %llu good, %llu collisions.\n", in, out, dud);
}

int compressfile(char *sourcefile, char *compressedfile, char *finallist, unsigned long long numblocks){
  FILE *infile=fopen64(sourcefile, "rb");
  FILE *outfile=fopen64(compressedfile, "wb");
  FILE *listfile=fopen64(finallist, "rb");
  if(!outfile){
    printf("Could not open output file.\n");
    return(1);
  }
  outputheader(outfile);

  //This uses a lock-step approach to compression.
  //Both infile and listfile are read, and kept in-step.
  //The two together are processed to form outfile.
  char *record=malloc(32);
  memset(record, 0xFF, 32);
  char *block=malloc(blocksize);
//  unsigned long long one=1;
  unsigned long long recordblock;
  unsigned long long sourceblock;
  unsigned long long outstandingnewblocks=0;
  unsigned long long lastreferencedblock=0; //Used for the n+1 encoding, command byte 0x05.
  printf("Compressing %llu blocks.\n", numblocks);
  fread(record, 16, 1, listfile);
  memcpy(&recordblock, record, 8);
  memcpy(&sourceblock, record+8, 8);
  if(sourceblock>numblocks){
    printf("Error: First block appears to be after the file ends. Something went horribly wrong.\n");
    exit(1);
  }
  #ifdef DEBUG
    printf("First in the list, %llu references %llu\n", recordblock, sourceblock);
  #endif
  for(unsigned long long blockno=0; blockno<numblocks;){
//    if(blockno>recordblock){
//      printf("ERROR: Input file and block-match data file are no longer in sync. This should not happen.\n");
//      printf("Block %llu, recordblock %llu\n", blockno, recordblock);
//      return(1);
//    }
    if(blockno==recordblock){
      //Start of a run of old data.
      if(outstandingnewblocks){
        writenewdata_escaped(outstandingnewblocks, infile, outfile);
        outstandingnewblocks=0;
      }
      if(sourceblock==(lastreferencedblock+1)){
        fwrite("\xE7", 1, 1, outfile);
        fwrite("\x05", 1, 1, outfile);
        lastreferencedblock++;
        #ifndef DEBUG
        #else
        printf("%llu -> %llu (n+1)\n", blockno, sourceblock);
        #endif
      }
      else
        if(sourceblock<=0xffffffff){
          fwrite("\xE7", 1, 1, outfile);
          fwrite("\x02", 1, 1, outfile);
          unsigned int sourceblockint=sourceblock;
          lastreferencedblock=sourceblock;
          fwrite(&sourceblockint, 4, 1, outfile);
          #ifndef DEBUG
          #else
          printf("%llu -> %u (32)\n", blockno, sourceblockint);
          #endif
        }else{
          #ifndef DEBUG
          #else
          printf("%llu -> %llu (64)\n", blockno, sourceblock);
          #endif
          lastreferencedblock=sourceblock;
          fwrite("\xE7", 1, 1, outfile);
          fwrite("\x01", 1, 1, outfile);
          fwrite(&sourceblock, 8, 1, outfile);
        }
      fread(block, blocksize, 1, infile); //Just to move it along.
      fread(record, 16, 1, listfile);
      memcpy(&recordblock, record, 8);
      memcpy(&sourceblock, record+8, 8);
      blockno++;
    }
    else
    {
      outstandingnewblocks++;
      blockno++;
    }
  }
  //Copy any outstanding data.
  if(outstandingnewblocks){
    writenewdata_escaped(outstandingnewblocks, infile, outfile);
    outstandingnewblocks=0;
  }
  free(record);
  free(block);
  fwrite("\xe7\x06\x00\x00\x00\x00", 6, 1, outfile); //End-of-file indicator.
  fclose(infile);
  fclose(outfile);
  printf("Compression done.\n");
}

void writenewdata_escaped(unsigned long long newdatablocks, FILE *source, FILE *dest){
  //The 'escaped' encoding is standard on the streaming encoder, but here used only for short runs.
  //This is just because fewer interuptions to the source bitstream mean it can be more compress-friendly on output.
    #ifdef DEBUG
    printf("Escaping %llu new blocks.\n", newdatablocks);
    #endif
    unsigned char *blockbuffer=malloc(blocksize);
    for(;newdatablocks--;){
      fread(blockbuffer, blocksize, 1, source);
      if(isnulls(blockbuffer)){
        putc(0xE7, dest);
        putc(3, dest);
      }
      else
      {
        if(blockbuffer[0]==0xE7)
          putc(0xE7, dest);
        fwrite(blockbuffer, blocksize, 1, dest);
      }
    }
    free(blockbuffer);
}

//void writenewdata_unescaped(unsigned long long newdatablocks, FILE *source, FILE *dest){
//This function was removed due to an inability to skip null blocks without major rearchitecting.
//}

int isnulls(unsigned char *buffer){
  for(int n=0;n<blocksize;n++){
    if(buffer[n])
      return(0);
  }
  return(1);
//  unsigned int *bufferint=(unsigned int*)buffer;
//  for(int n=128;n--;){
//    if(bufferint[n])
//      return(0);
//  }
//  return(1);
}

unsigned long long validatesorted(unsigned char *filename){
  FILE *checkfile=fopen(filename, "rb");
  unsigned char temp[HASHBYTES+8];
  unsigned char last[HASHBYTES+8];
  unsigned long long count=0;
  fread(last, HASHBYTES+8, 1, checkfile);
  while(!feof(checkfile)){
    fread(temp, HASHBYTES+8, 1, checkfile);
    if(memcmp(temp, last, HASHBYTES)<0){
      printf("Validation failure - out-of-order records found at record %llu. Something went wrong.\n", count);
      for(int n=0;n<8;n++)
        printf("%02X ", last[n]);
      printf("\n");
      for(int n=0;n<8;n++)
        printf("%02X ", temp[n]);
      printf("\n");
      exit(1);
    }
    unsigned long long dest;
    unsigned long long src;
    memcpy(&dest, temp, 8);
    memcpy(&src, temp+8, 8);
    if(src>=dest){
      printf("Dest before src: Something went horribly wrong. %llu %llu\n", dest, src);
      exit(1);
    }
    count++;
    memcpy(last, temp, HASHBYTES+8);
  }
  
  fclose(checkfile);
  return(count);
}

void make_fnv_from_block(unsigned char *dest, unsigned char *block){
    unsigned long long hash = 14695981039346656037ull; // Magic. I don't know why, but the mathematics of the FNV hash says to use exactly this number.
    for(int n = 0; n < blocksize; n++)
    {
        hash = hash ^ (block[n]);
        hash = hash * 1099511628211; // Magic prime. As above.
    }
    memcpy(dest, &hash, 8);
}

void outputheader(FILE *output){
  fwrite("VDDCompactedFile", 16, 1, output);
  putc(0x02, output); //Min version.
  if(blocksize!=512){
    unsigned int headerextensionsize=8;
    fwrite(&headerextensionsize, 4, 1, output);
    putc('B', output);
    putc('K', output);
    putc('S', output);
    putc('Z', output);
    fwrite(&blocksize, 4, 1, output);
  }

  putc(0x00, output);putc(0x00, output);putc(0x00, output);putc(0x00, output);
}

unsigned char *gettempfilename(){
  unsigned char *filename=malloc(30);
  unsigned int num=rand() % 0xFFFFFF;
  #ifdef WINDOWS
    sprintf(filename, "./BLDD%u.tmp", num);
  #else
    sprintf(filename, "/tmp/BLDD%u.tmp", num);
  #endif
  return(filename);
}

void swapends(unsigned long long *num){
  //if bigendian architecture,
//    return;
  unsigned char *ref=(unsigned char *)num;
  unsigned char t;
  t=ref[0];ref[0]=ref[7];ref[7]=t;
  t=ref[1];ref[1]=ref[6];ref[6]=t;
  t=ref[2];ref[2]=ref[5];ref[5]=t;
  t=ref[3];ref[3]=ref[4];ref[4]=t;
}
