// Green Gandalf's perlin noise shader v5.0 - seamless and basic versions
// Previous version: "perlinNoiseV4d.fx".
//
//   ** This is still work in progress - so be warned!
//
// This is designed to work with a simple 2x2 quad with corners clockwise from top left:
// (-1, 1), (1, 1), (1, -1), (-1, -1) and corresponding UV coordinates (0, 0),  (1, 0), (0, 1), (1, 1).
//
// Intended to be used with the DBPro set camera to image command.
//
// Creates a Perlin noise style image with up to 8 octaves (numbered 0 to 7).
// Contains two techniques:
//
//    perlinNoiseBasic - produces a single image that is not tileable, i.e. it is not seamless
//                       (it is slightly faster than the seamless version next)
//    perlinNoiseSeamless - produces a single image that tiles seamlessly and includes an
//                          additional "seed" feature that allows you to create different
//                          images which tile together (as in endless terrain).

// Includes two RNGs:
//
//    random1 - one I borrowed and modified from this site:
//                     http://obge.paradice-insight.us/wiki/Includes_(Effects)
//              unfortunately that site doesn't seem to be available anymore - but it's the only
//              reference I have :(
//    random2 - my own simpler ad hoc RNG - my tests suggest that random1 is slightly slower
//              but gives slightly better results).
//
// Suggestions for a better random generator welcome.
//
// Created 16 August 2014, modified 24 August 2014, minor edit 11 February 2017.
//
// *** Please note: This shader might not run correctly in the current version of Dark Shader.
// ***              I've no idea why. However, the compilation errors seem to be correct which
// ***              is what I use Dark Shader for.
// ***
// ***              The good news is that the shader seems to load and run correctly in DBPro
// ***              although the initial loading time can be slow (see fxc usage comments next).
// ***
// ***              More importantly, this shader can be compiled using fxc.exe (from the DX9 SDK) in a DOS Command Box.
// ***              Make sure that fxc.exe is in the same folder as your shader.
// ***              Typical fxc usage:
// ***                 fxc perlinNoiseV5.fx /LD /T fx_2_0 /Fo perlinNoiseV5.obj
// ***              Note that this compilation is very slow (about 3 - 4 minutes!) but only needs to be performed once
// ***              since the compiled shader code can be loaded directly into Dark Basic Professional
// ***              using the usual load effect command, e.g.
// ***                 load effect "perlinNoiseV5.obj", 2, 0
// ***              This bypasses the slow compilation step in DBP (which I guess uses the same fx compiler).
// ***              Another undocumented feature of DBP!

float4x4 mWVP : WorldViewProjection;

// Dark Shader style tweakables have been removed from this version since they can't be used
// in Dark Shader with this shader.

float boundFactor = 2.0; // not very sure what would be the best value for this - but this choice seems to work
float octaveDivisor = 0.6; // make small adjustments to this to change the appearance of the clouds
                           // should probably be in the range 0.4-0.8
int maxOctaves = 8; // don't change this - as the octaves are hard-coded in the shader

float4 list1 = {1.0, 1.0, 1.0, 1.0}; // used for initialising first four octaves (Dark Shader bug :( )
float4 list2 = {1.0, 1.0, 1.0, 1.0}; // used for initialising last four octaves
int octaveList[8] = {1, 1, 1, 1, 1, 1, 1, 1}; // list of which octaves to use - use all by default

// various seeds for random number calculations
float2 seed = {0.0, 0.0};    // this enables you to get different realisations of the image
float2 seed2 = {0.61, 0.37}; // this enables you to get different X and Y components for the tangent slopes
                             // and is used to change the interior points of each image
// the next 8 seeds are for initialising the edges in a hopefully intuitive way that enables seamless
// endless terrains to be constructed (eventually!)
float2 seedN = {0.0, 0.0};
float2 seedE = {0.0, 0.0};
float2 seedS = {0.0, 0.0};
float2 seedW = {0.0, 0.0};
float2 seedNW = {0.0, 0.0};
float2 seedNE = {0.0, 0.0};
float2 seedSE = {0.0, 0.0};
float2 seedSW = {0.0, 0.0};

float2 cornerNW = {0.0, 0.0}; // to simplify referencing the four corners of each tile
float2 cornerSW = {0.0, 1.0}; // but does it result in unnecessary multiplications and additions?
float2 cornerNE = {1.0, 0.0};
float2 cornerSE = {1.0, 1.0};

float2 invSourceTextureWidth = {1.0/512.0, 1.0/512.0}; // these should be initialised in the application
                                                       // to match your target image dimensions

struct VSInput
{ float4 pos    : position;
  float2 UV     : texcoord0;
};

struct VSOutput
{ float4 pos    : position;
  float2 UV     : texcoord0;
};

struct PSInput
{ float2 UV     : texcoord0;
};

struct PSOutput { float4 colour : color; };

float random1(in float2 p)
// simplified version of rand_1_5 from this page: http://obge.paradice-insight.us/wiki/Includes_(Effects)
// (unfortunately the above link is no longer available :( )
// works quite well but what's the theory behind this?
{ return frac(sin(dot(p, float2(25.9796, 156.466))) * 43758.5453);
  // returns a pseudo-random float in the range [0, 1)
  // - with weak correlation between neighbouring values of p (unless very close? how close??)
}

/*
float random2(in float2 p)
// my own ad hoc non-linear RNG using almost completely arbitrary numbers
// appears to produce acceptable textures
{ float x = dot(p, float2(57.3791, 493.15937)+13.50987);
  return frac(x * x * 13.50987);
}
*/

float random(in float2 p)
// returns a pseudo random float in the range 0 to 1
// done this way so only one line needs to be changed if another RNG is used
{ return random1(frac(p + seed));
  //return random2(frac(p + seed)); // used for testing logic - but random1 may give better results
}

/*
float4 mod_div(float4 a, float4 b, out float4 d) {
// accurate mod function if necessary - not used but kept here just in case ...
d = floor(a/b);
float4 r = a - d*b;
// handle case where division off by -1 ulp
d = (r<0) ? d-1 : d;
r = (r<0) ? r+b : r;
// handle case where division off by +1 ulp
d = (r<b) ? d : d+1;
r = (r<b) ? r : r-b;
return r;
}
*/

VSOutput VShader(VSInput In, VSOutput Out)
{ float4 pos = In.pos;
  pos.xy = pos.xy + float2( -invSourceTextureWidth.x, invSourceTextureWidth.y );
  Out.pos = pos;
  Out.UV = In.UV;
  return Out;
};

/*
PSOutput PShaderBasic(PSInput In, PSOutput Out)
{ octaveList[0] = list1.x;    // will this fudge work? [Yes, it does - let's hope the shader compiler extracts this as a pre-shader.]
  octaveList[1] = list1.y;
  octaveList[2] = list1.z;
  octaveList[3] = list1.w;
  octaveList[4] = list2.x;
  octaveList[5] = list2.y;
  octaveList[6] = list2.z;
  octaveList[7] = list2.w;
  float octaveWeight = 1.0;
  float invTileWidth = 0.5;
  float tileWidth = 2.0;
  float result = 0.0; // Perlin noise should range between -1 and +1 so for colour need to centre and rescale at 0.5 at end
  float upperBound = 0.0;
  float2 coeffsNW;
  float2 coeffsSW;
  float2 coeffsNE;
  float2 coeffsSE;
  // several octaves
  for ( int i = 0; i < maxOctaves; i++ )
  { octaveWeight *= octaveDivisor;
    invTileWidth *= 2.0;
    tileWidth *= 0.5;
    float tolerance = tileWidth * 0.5;
    float tolerance2 = 1.0 - tileWidth - tolerance;
    if (octaveList[i] == 1)
    { // find relative position of this pixel within current tile
      float2 imageUV = frac(invTileWidth * In.UV);
      // recalculate the weights
      float2 weights = imageUV * imageUV * imageUV * (6.0 * imageUV * imageUV - 15.0 * imageUV + 10.0); // revised Perlin weights

      // find coordinates of corners of current tile
      float2 offset  = floor(invTileWidth * In.UV) * tileWidth;

      // default UV values of tile corners - change where necessary in seamless technique
      float2 tileUV_NW = cornerNW*tileWidth+offset;
      float2 tileUV_SW = cornerSW*tileWidth+offset;
      float2 tileUV_NE = cornerNE*tileWidth+offset;
      float2 tileUV_SE = cornerSE*tileWidth+offset;

      // finally we can compute the coefficients for the four corner tangents
      coeffsNW = float2 (random(tileUV_NW), random(tileUV_NW+seed2))-0.5.xx; // top left
      coeffsSW = float2 (random(tileUV_SW), random(tileUV_SW+seed2))-0.5.xx; // lower left
      coeffsNE = float2 (random(tileUV_NE), random(tileUV_NE+seed2))-0.5.xx; // top right
      coeffsSE = float2 (random(tileUV_SE), random(tileUV_SE+seed2))-0.5.xx; // lower right

      // find tangents at corners
      float tanNW = dot(coeffsNW, imageUV-cornerNW);
      float tanSW = dot(coeffsSW, imageUV-cornerSW);
      float tanNE = dot(coeffsNE, imageUV-cornerNE);
      float tanSE = dot(coeffsSE, imageUV-cornerSE);

      // interpolate tangents for current intermediate point
      float tA = lerp(tanNW, tanNE, weights.x);
      float tB = lerp(tanSW, tanSE, weights.x);
      result += lerp(tA, tB, weights.y) * octaveWeight; // need to check sensible values for this - e.g. what is max possible value unscaled?
      upperBound += octaveWeight * octaveDivisor;
    }
  }
  // rescale noise component from range (-0.5, 0.5) to range (0, 1) (heuristic rule) to avoid black or whiteout
  result = 0.5 + boundFactor * result/upperBound; // produces standard cloud like noise
 // result = abs(2.0 * boundFactor * result/upperBound); // produces river effect
  Out.colour = float4 (0.0, result, 0.0, 1.0);
  return Out;
};
*/

PSOutput PShaderSeamless(PSInput In, PSOutput Out)
{ octaveList[0] = list1.x;    // will this fudge work? [Yes, it does - let's hope the shader compiler extracts this as a pre-shader.]
  octaveList[1] = list1.y;
  octaveList[2] = list1.z;
  octaveList[3] = list1.w;
  octaveList[4] = list2.x;
  octaveList[5] = list2.y;
  octaveList[6] = list2.z;
  octaveList[7] = list2.w;
  float octaveWeight = 1.0;
  float invTileWidth = 0.5;
  float tileWidth = 2.0;
  float result = 0.0; // Perlin noise should range between -1 and +1 so for colour need to centre and rescale at 0.5 at end
  float upperBound = 0.0;
  float2 coeffsNW;
  float2 coeffsSW;
  float2 coeffsNE;
  float2 coeffsSE;
  // several octaves
  for ( int i = 0; i < maxOctaves; i++ )
  { octaveWeight *= octaveDivisor;
    invTileWidth *= 2.0;
    tileWidth *= 0.5;
    float tolerance = tileWidth * 0.5;
    float tolerance2 = 1.0 - tileWidth - tolerance;
    if (octaveList[i] == 1)
    { // find relative position within current tile
      float2 imageUV = frac(invTileWidth * In.UV);
      // recalculate the weights
      float2 weights = imageUV * imageUV * imageUV * (6.0 * imageUV * imageUV - 15.0 * imageUV + 10.0); // revised Perlin weights
      // find random tangents for new corners
      float2 offset  = floor(invTileWidth * In.UV) * tileWidth;

      // default values of UV coordinates for tiles away from edges - change where necessary for seamless effect
      float2 tileUV_NW = cornerNW*tileWidth+offset;
      float2 tileUV_SW = cornerSW*tileWidth+offset;
      float2 tileUV_NE = cornerNE*tileWidth+offset;
      float2 tileUV_SE = cornerSE*tileWidth+offset;

      // introduced in attempt to get seeds working as intended
      float2 tempSeedNW = seed2;
      float2 tempSeedSW = seed2;
      float2 tempSeedNE = seed2;
      float2 tempSeedSE = seed2;

      // test for octave 0 first as it is special ( :) )
      if (i == 0)
      { // all four corners must be set to correct corner seeds
        tileUV_NW = 0.0.xx;
        tileUV_SW = 0.0.xx;
        tileUV_NE = 0.0.xx;
        tileUV_SE = 0.0.xx;
        tempSeedNW = seedNW;
        tempSeedSW = seedSW;
        tempSeedNE = seedNE;
        tempSeedSE = seedSE;
      }
      else
      if ((offset.x < tolerance) && (offset.y < tolerance)) // i.e. top left corner (NW) tile
      { // set seeds to correct corner and edge seeds
        tileUV_NW = 0.0.xx; tempSeedNW = seedNW;
        tileUV_NE.y = 0.0; tempSeedNE = seedN;
        tileUV_SW.x = 0.0; tempSeedSW = seedW;
      }
      else
      if ((offset.x < tolerance) && (offset.y > tolerance2)) // must be lower left corner (SW) tile
      { 
        tileUV_NW.x = 0.0; tempSeedNW = seedW;
        tileUV_SW = 0.0.xx; tempSeedSW = seedSW;
        tileUV_SE.y = 0.0; tempSeedSE = seedS;
      }
      else
      if ((offset.x > tolerance2) && (offset.y < tolerance)) // must be upper right (NE) tile
      { 
        tileUV_NW.y = 0.0; tempSeedNW = seedN;
        tileUV_NE = 0.0.xx; tempSeedNE = seedNE;
        tileUV_SE.x = 0.0; tempSeedSE = seedE;
      }
      else
      if ((offset.x > tolerance2) && (offset.y > tolerance2)) // must be lower right (SE) tile
      { tileUV_SW.y = 0.0; tempSeedSW = seedS;
        tileUV_NE.x = 0.0; tempSeedNE = seedE;
        tileUV_SE = 0.0.xx; tempSeedSE = seedSE;
      }
      // now include the extra tests for edge tiles between corners
      // each of these will require two coefficients to be reset
      else
      if ((offset.x < tolerance ) && (offset.y < tolerance2)) // must be left hand edge (W) tile
      {
        tileUV_NW.x = 0.0; tempSeedNW = seedW;
        tileUV_SW.x = 0.0; tempSeedSW = seedW;
      }
      else
      if ((offset.x < tolerance2 ) && (offset.y < tolerance)) // must be top edge (N) tile
      { 
        tileUV_NW.y = 0.0; tempSeedNW = seedN;
        tileUV_NE.y = 0.0; tempSeedNE = seedN;
      }
      else
      if ((offset.x < tolerance2 ) && (offset.y > tolerance2)) // must be bottom edge (S) tile
      {
        tileUV_SW.y = 0.0; tempSeedSW = seedS;
        tileUV_SE.y = 0.0; tempSeedSE = seedS;
      }
      else
      if ((offset.x > tolerance2 ) && (offset.y < tolerance2)) // must be right hand edge (E) tile
      {
        tileUV_NE.x = 0.0; tempSeedNE = seedE;
        tileUV_SE.x = 0.0; tempSeedSE = seedE;
      }

      // finally we can compute the coefficients for the four corner tangents
      coeffsNW = float2 (random(tileUV_NW), random(tileUV_NW+tempSeedNW))-0.5.xx; // top left
      coeffsSW = float2 (random(tileUV_SW), random(tileUV_SW+tempSeedSW))-0.5.xx; // lower left
      coeffsNE = float2 (random(tileUV_NE), random(tileUV_NE+tempSeedNE))-0.5.xx; // top right
      coeffsSE = float2 (random(tileUV_SE), random(tileUV_SE+tempSeedSE))-0.5.xx; // lower right

      // find tangents at corners
      float tanNW = dot(coeffsNW, imageUV-cornerNW);
      float tanSW = dot(coeffsSW, imageUV-cornerSW);
      float tanNE = dot(coeffsNE, imageUV-cornerNE);
      float tanSE = dot(coeffsSE, imageUV-cornerSE);

      // interpolate tangents for current intermediate point
      float tA = lerp(tanNW, tanNE, weights.x);
      float tB = lerp(tanSW, tanSE, weights.x);
      result += lerp(tA, tB, weights.y) * octaveWeight; // need to check sensible values for this - e.g. what is max possible value unscaled?
      upperBound += octaveWeight * octaveDivisor;
    }
  }
  // rescale noise component from range (-0.5, 0.5) to range (0, 1) (heuristic rule) to avoid black or whiteout
  result = 0.5 + boundFactor * result/upperBound; // produces standard cloud like noise
 // result = abs(2.0 * boundFactor * result/upperBound); // produces river effect
  Out.colour = float4 (0.0, result, 0.0, 1.0);
  return Out;
};

/*
technique perlinNoiseBasic
{ pass p0
  { VertexShader = compile vs_3_0 VShader();
    PixelShader = compile ps_3_0 PShaderBasic();
  }
}
*/

technique perlinNoiseSeamless
{ pass p0
  { VertexShader = compile vs_3_0 VShader();
    PixelShader = compile ps_3_0 PShaderSeamless();
  }
}
