#!/usr/bin/perl
#********************************************************************************
# IBM Storage Protect
# 
# name: create_seed_file.pl
#
# desc: The create_seed_file.pl utility is used to transform test data files to a predictible
#       response to LZ4 compression.  Optionally, there is also the ability to
#       influence to some degree the resulting average for variable deduplication
#       extent sizes.  The file size and chunk size parameters are specified in bytes.
#
#       On Windows, this script depends on crerand.exe existing in the same directory.
#
# usage:  perl create_seed_file.pl <file path> <file size> <lz4 compression ratio> [<max chunk size>]
#
# Notice: This program is provided as a tool intended for use by IBM Internal, 
#         IBM Business Partners, and IBM Customers. This program is provided as is, 
#         without support, and without warranty of any kind expressed or implied.
#
# (C) Copyright International Business Machines Corp. 2013, 2018
#********************************************************************************

if ((@ARGV != 3) && (@ARGV != 4)) {
  die "USAGE: perl create_seed_file.pl <file path> <file size> <lz4 compression ratio> [<max chunk size>] \n";
}


$filepath = shift(@ARGV);
$filesize = shift(@ARGV);
$lz4cmpratio = shift(@ARGV);
$maxchunksize = shift(@ARGV);

@FPBuffer = ("y", "k", "x", "q", "r", "i", "o", "n",
             "t", "r", "z", "q", "j", "m", "l", "h",
             "x", "x", "c", "x", "k", "v", "o", "a",
             "t", "m", "b", "u", "y", "u", "w", "h",
             "d", "h", "r", "e", "o", "j", "o", "s",
             "t", "c", "z", "r", "i", "k", "n", "k",
             "h", "g", "f", "i", "x", "x", "o", "e",
             "e", "t", "f", "r", "z", "c", "g", "g");

if (($lz4cmpratio >= 100) || ($lz4cmpratio < 0))
{
  print "ERROR: The compression ratio must be between 0 and 99\n";
  exit 1;
}

if (isWin() == 1)
{
  $ss = ".\\bin\\windows\\";
  if (! -e ".\\bin\\windows\\crerand.exe")
  {
    print "ERROR: Cannot locate the required file crerande.exe\n";
    exit 1;
  }
}
else
{
  $ss = "./";
}

$lastslashpos = rindex($filepath, "${ss}");

if ($lastslashpos >= 0)
{
  $dirpath = substr($filepath, 0, $lastslashpos);
  if (! -d $dirpath )
  {
    print "ERROR: Directory $dirpath does not exist\n";
    exit 1;
  }
}

if ( -f $filepath )
{
  print "The file $filepath already exists\n";
  exit 1;
}

$maxchunksizespecified = 0;

if ($maxchunksize ne "")
{
  $maxchunksizespecified = 1;

  if ($maxchunksize < 51200 || $maxchunksize > $filesize)
  {
    die "The maximum chunk size must be at least 51200 and smaller than the file size\n";
  }
}

generatefile($filepath, $filesize, $lz4cmpratio);

sub generatefile
{	
        my $filename = shift(@_);
	my $filesize = shift(@_);
        my $cmprratio = shift(@_);
     
        my @randomCharsArray = ();

        my $randomCharArrayLength = int(((100 - $cmprratio) * $filesize) / 100);

        # generate the random part

        if (isWin() == 1)
        {
          my $sizeKB = int($filesize / 1024 + 1);
          my $seed = int (rand $filesize) +1;
          my $randName = "gen_rand_".$sizeKB;
          `.${ss}crerand.exe $randName $sizeKB $seed`;
          open RAND, "<".$randName;
          for (my $z = 0; $z < $randomCharArrayLength; $z++)
          {
            read RAND, $achar, 1;
            push(@randomCharsArray, $achar);
          }
          close RAND;
          unlink($randName);
        }
        else
        {
          open RAND, "</dev/urandom";
          for (my $z = 0; $z < $randomCharArrayLength; $z++)
          {
            read RAND, $achar, 1;
            push(@randomCharsArray, $achar);
          }
          close RAND;
        }

        my $segmentlength = 1000;
        my $numrandomsegments = int ($randomCharArrayLength / $segmentlength);
        my $remrandomsegment = $randomCharArrayLength % $segmentlength;

        if ($remrandomsegment > 0)
        {
           $numrandomsegments++;
        }

        my $numsegments = int ($filesize / $segmentlength);
        my $remsegment = $filesize % $segmentlength;

        if ($remsegment > 0)
        {
           $numsegments++;
        }
 
        my $v = 0;
      
        my $k = 0;

        open(OUTH, ">${filename}") or die "Unable to open $filename\n";
        binmode OUTH;

        while ($k < ($filesize))
        {
            my $u = 0;
            my $selectedseg = int(($v * $numrandomsegments) / $numsegments);
            
            while (($k < ($filesize)) && ($u < $segmentlength))
            {
              if (($maxchunksizespecified == 1) && ($k % $maxchunksize) >= ($maxchunksize - 64))
              {
                print OUTH "$FPBuffer[64 - ($maxchunksize - ($k % $maxchunksize))]";
              }
              else
              {
                $mychar = $randomCharsArray[((($segmentlength * $selectedseg) + $u) % $randomCharArrayLength)];
                print OUTH "$mychar";
              }
              $u++;
	      $k++;
            }
            $v++;
           
        }
        close OUTH;   
}

sub isWin ()
{
  $platform = $^O;      # $^O is built-in variable containing osname
  
  if ($platform =~ m#^MSWin32#)              
  { 
    return 1;
  }
  else
  {
    return 0;
  } 
}
