diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml new file mode 100644 index 0000000..33c5730 --- /dev/null +++ b/.github/workflows/run_tests.yml @@ -0,0 +1,30 @@ +name: Run all default tests using pytest + +on: [ push, pull_request ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install .[test] +# - name: Install Ruff for linting and formatting +# run: pip install ruff +# - name: Check code formatting with Ruff +# run: ruff format --diff --target-version=py312 . +# continue-on-error: true + - name: Test with pytest + run: | + pytest diff --git a/geostatspy/GSLIB.py b/geostatspy/GSLIB.py index 829aa76..af759a3 100644 --- a/geostatspy/GSLIB.py +++ b/geostatspy/GSLIB.py @@ -33,6 +33,7 @@ image_type = "tif" dpi = 600 + def ndarray2GSLIB_3D(array, data_file, col_name): """Convert 1D or 2D or 3D numpy ndarray to a GSLIB Geo-EAS file for use with GSLIB methods. @@ -107,6 +108,7 @@ def GSLIB2ndarray(data_file, kcol, nx, ny): array[ix] = head.split()[kcol] return array, col_name + def GSLIB2ndarray_3D(data_file, kcol,nreal, nx, ny, nz): """Convert GSLIB Geo-EAS file to a 1D or 2D numpy ndarray for use with Python methods @@ -234,8 +236,8 @@ def hist(array, xmin, xmax, log, cumul, bins, weights, xlabel, title, fig_name): ) plt.title(title) plt.xlabel(xlabel) - if cumul == False: - plt.ylabel("Frequency") + if not cumul: + plt.ylabel("Frequency") else: plt.ylabel("Cumulative Probability") plt.ylim([0.0,1.0]) @@ -277,12 +279,12 @@ def hist_st(array, xmin, xmax, log, cumul, bins, weights, xlabel, title): plt.title(title) plt.xlabel(xlabel) if cumul == False: - plt.ylabel("Frequency") + plt.ylabel("Frequency") else: plt.ylabel("Cumulative Probability") plt.ylim([0.0,1.0]) plt.xlim([xmin,xmax]) - + def locmap( df, xcol, @@ -948,16 +950,19 @@ def affine(array, tmean, tstdev): return array -def nscore(x): +def nscore(x, quiet=False, clean=False): """Normal score transform, wrapper for nscore from GSLIB (.exe must be available in PATH or working directory). :param x: ndarray + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB + :return: ndarray """ nx = np.ma.size(x) ny = 1 - ndarray2GSLIB(x, "nscore.dat", "value") + ndarray2GSLIB_3D(x, "nscore.dat", "value") with open("nscore.par", "w") as f: f.write(" Parameters for NSCORE \n") @@ -973,8 +978,21 @@ def nscore(x): f.write("nscore.out -file for output \n") f.write("nscore.trn -file for output transformation table \n") - os.system("nscore.exe nscore.par") + command = "nscore.exe nscore.par" + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + y, name = GSLIB2ndarray("nscore.out", 1, nx, ny) + + # Remove temporary files + if clean: + os.remove('nscore.par') + os.remove('nscore.out') + os.remove('nscore.dat') + os.remove('nscore.trn') + return y @@ -1053,7 +1071,8 @@ def make_variogram( return var -def gamv_2d(df, xcol, ycol, vcol, nlag, lagdist, azi, atol, bstand): +def gamv_2d(df, xcol, ycol, vcol, nlag, lagdist, azi, atol, bstand, + quiet=False, clean=False): """Irregularly sampled variogram, 2D wrapper for gam from GSLIB (.exe must be available in PATH or working directory). @@ -1066,6 +1085,8 @@ def gamv_2d(df, xcol, ycol, vcol, nlag, lagdist, azi, atol, bstand): :param azi: TODO :param atol: TODO :param bstand: TODO + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ lag = [] @@ -1094,7 +1115,12 @@ def gamv_2d(df, xcol, ycol, vcol, nlag, lagdist, azi, atol, bstand): f.write("1 -number of variograms \n") f.write("1 1 1 -tail var., head var., variogram type \n") - os.system("gamv.exe gamv.par") + command = "gamv.exe gamv.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) with open("gamv.out") as f: next(f) # skip the first line @@ -1105,9 +1131,17 @@ def gamv_2d(df, xcol, ycol, vcol, nlag, lagdist, azi, atol, bstand): gamma.append(float(g)) npair.append(float(n)) + # Remove temporary files + if clean: + os.remove('gamv.par') + os.remove('gamv.out') + os.remove('gamv_out.dat') + return lag, gamma, npair -def gamv_3d(df, xcol, ycol, zcol, vcol, nlag, lagdist,lag_tol, azi, atol, bandh, dip, dtol, bandv, isill): + +def gamv_3d(df, xcol, ycol, zcol, vcol, nlag, lagdist, lag_tol, azi, atol, bandh, dip, dtol, bandv, isill, + quiet=False, clean=False, dry_run=False): """Irregularly sampled variogram, 3D wrapper for gam from GSLIB (.exe must be available in PATH or working directory). @@ -1121,6 +1155,9 @@ def gamv_3d(df, xcol, ycol, zcol, vcol, nlag, lagdist,lag_tol, azi, atol, bandh, :param azi: TODO :param atol: TODO :param bstand: TODO + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB + :param dry_run: if True, do not run GSLIB, just write gamv.par :return: TODO """ lag = [] @@ -1149,7 +1186,17 @@ def gamv_3d(df, xcol, ycol, zcol, vcol, nlag, lagdist,lag_tol, azi, atol, bandh, f.write("1 -number of variograms \n") f.write("1 1 1 -tail var., head var., variogram type \n") - os.system("gamv.exe gamv.par") + # If requested, exit prior to running GSLIB + if dry_run: + return + + command = "gamv.exe gamv.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + + os.system(command) with open("gamv.out") as f: next(f) # skip the first line @@ -1160,8 +1207,15 @@ def gamv_3d(df, xcol, ycol, zcol, vcol, nlag, lagdist,lag_tol, azi, atol, bandh, gamma.append(float(g)) npair.append(float(n)) + # Remove temporary files + if clean: + os.remove('gamv.par') + os.remove('gamv.out') + os.remove('gamv_out.dat') + return lag, gamma, npair + def varmapv_2d( df, xcol, @@ -1177,7 +1231,8 @@ def varmapv_2d( vlabel, cmap, fig_name, -): + quiet=False, + clean=False): """Irregular spaced data, 2D wrapper for varmap from GSLIB (.exe must be available in PATH or working directory). @@ -1195,6 +1250,8 @@ def varmapv_2d( :param vlabel: TODO :param cmap: colormap :param fig_name: figure name + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ df_ext = pd.DataFrame( @@ -1222,7 +1279,13 @@ def varmapv_2d( f.write("1 -number of variograms \n") f.write("1 1 1 -tail, head, variogram type \n") - os.system("varmap.exe varmap.par") + command = "varmap.exe varmap.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + nnx = nx * 2 + 1 nny = ny * 2 + 1 varmap_, name = GSLIB2ndarray("varmap.out", 0, nnx, nny) @@ -1247,6 +1310,13 @@ def varmapv_2d( cmap, fig_name ) + + # Remove temporary files + if clean: + os.remove('varmap.par') + os.remove('varmap.out') + os.remove('varmap_out.dat') + return varmap_ @@ -1264,6 +1334,8 @@ def varmap( vlabel, cmap, fig_name, + quiet=False, + clean=False, ): """Regular spaced data, 2D wrapper for varmap from GSLIB (.exe must be available in PATH or working directory). @@ -1281,9 +1353,11 @@ def varmap( :param vlabel: TODO :param cmap: colormap :param fig_name: figure name + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ - ndarray2GSLIB(array, "varmap_out.dat", "gam.dat") + ndarray2GSLIB_3D(array, "varmap_out.dat", "gam.dat") with open("varmap.par", "w") as f: f.write(" Parameters for VARMAP \n") @@ -1305,7 +1379,13 @@ def varmap( f.write("1 -number of variograms \n") f.write("1 1 1 -tail, head, variogram type \n") - os.system("varmap.exe varmap.par") + command = "varmap.exe varmap.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + nnx = nlagx * 2 + 1 nny = nlagy * 2 + 1 varmap_, name = GSLIB2ndarray("varmap.out", 0, nnx, nny) @@ -1330,78 +1410,159 @@ def varmap( cmap, fig_name ) + + # Remove temporary files + if clean: + os.remove('varmap.par') + os.remove('varmap.out') + os.remove('varmap_out.dat') + return varmap_ -def vmodel( - nlag, - step, - azi, - nug, - nst, - tstr1, - c1, - azi1, - rmaj1, - rmin1, - tstr2=1, - c2=0, - azi2=0, - rmaj2=0, - rmin2=0, -): - """Variogram model, 2D wrapper for vmodel from GSLIB (.exe must be +def write_variogram(output_file, nug, nst, tstr, c, + ang1, ang2, ang3, ahmax, ahmin, ahvert): + """Append variogram information to output file. This function is used to + write the variogram parameters to a file in the format required by GSLIB's + geostatistical programs. + + :param output_file: file to append variogram information to + :param nst: number of nested structures + :param tstr: type of structure (1=spherical, 2=exponential, 3=gaussian, 4=power + model, 5=hole effect). Either a float (if nst==1) or a list of floats (if nst>1). + :param c: contribution of structure(s) to variance. Either a float (if nst==1) + or a list of floats (if nst>1). + :param ang1: angle(s) defining azimuth of geometric anisotropy. Either a float + (if nst==1) or a list of floats (if nst>1). + :param ang2: angle(s) defining dip of geometric anisotropy. Either a float (if + nst==1) or a list of floats (if nst>1). + :param ang3: angle(s) defining tilt of geometric anisotropy. Either a float + (if nst==1) or a list of floats (if nst>1). + :param ahmax: range(s) of structure(s) in direction of anisotropy. Either a + float (if nst==1) or a list of floats (if nst>1). + :param ahmin: range(s) of structure(s) perpendicular to anisotropy. Either a + float (if nst==1) or a list of floats (if nst>1). + :param ahvert: vertical range(s) of structure(s). Either a float (if nst==1) + or a list of floats (if nst>1). + """ + + with open(output_file, 'a') as f: + f.write("%d %f -nst, nugget effect \n" % (nst, nug)) + + # Write structures + for i in range(nst): + f.write("%d %f %f %f %f -it,cc,ang1,ang2,ang3\n" % (tstr[i], c[i], ang1[i], ang2[i], ang3[i])) + f.write(" %f %f %f -a_hmax, a_hmin, a_vert\n" % (ahmax[i], ahmin[i], ahvert[i])) + + +def vmodel(ndir, nlag, azm, dip, lag_dist, nug, nst, + tstr, c, ang1, ang2, ang3, ahmax, ahmin, ahvert, + clean=False, quiet=False, dry_run=False): + """Variogram model, wrapper for vmodel from GSLIB (.exe must be available in PATH or working directory). - :param nlag: TODO - :param step: TODO - :param azi: TODO - :param nug: TODO - :param nst: TODO - :param tstr1: TODO - :param c1: TODO - :param azi1: TODO - :param rmaj1: TODO - :param rmin1: TODO - :param tstr2: TODO - :param c2: TODO - :param azi2: TODO - :param rmaj2: TODO - :param rmin2: TODO - :return: TODO + :param ndir: number of directions in which to calculate modeled variograms + :param nlag: number of lags + :param azm: azimuth of direction(s) in which to calculate variograms. Either + a float (if ndir==1) or a list of floats (if ndir>1). + :param dip: dip of direction(s) in which to calculate variograms. Either a + float (if ndir==1) or a list of floats (if ndir>1). + :param lag_dist: lag distance(s) in which to calculate variograms. Either a + float (if ndir==1) or a list of floats (if ndir>1). + :param nug: nugget effect + :param nst: number of nested structures + :param tstr: type of structure (1=spherical, 2=exponential, 3=gaussian, 4=power + model, 5=hole effect). Either a float (if nst==1) or a list of floats (if nst>1). + :param c: contribution of structure(s) to variance. Either a float (if nst==1) + or a list of floats (if nst>1). + :param ang1: angle(s) defining azimuth of geometric anisotropy. Either a float + (if nst==1) or a list of floats (if nst>1). + :param ang2: angle(s) defining dip of geometric anisotropy. Either a float (if + nst==1) or a list of floats (if nst>1). + :param ang3: angle(s) defining tilt of geometric anisotropy. Either a float + (if nst==1) or a list of floats (if nst>1). + :param ahmax: range(s) of structure(s) in direction of anisotropy. Either a + float (if nst==1) or a list of floats (if nst>1). + :param ahmin: range(s) of structure(s) perpendicular to anisotropy. Either a + float (if nst==1) or a list of floats (if nst>1). + :param ahvert: vertical range(s) of structure(s). Either a float (if nst==1) + or a list of floats (if nst>1). + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB + :return dry_run: if True, do not run GSLIB, just write vmodel.par + # :return var: gamma values, numpy array of shape (nlag, ndir) """ - lag = [] - gamma = [] + + # If only one direction, convert azm, dip and lag_dist to list + if isinstance(azm, float) or isinstance(azm, int): + azm = [azm] + if isinstance(dip, float) or isinstance(dip, int): + dip = [dip] + if isinstance(lag_dist, float) or isinstance(lag_dist, int): + lag_dist = [lag_dist] + + # If only one structure, convert tstr, c, ang1, ang2, ang3, ahmax, ahmin and ahvert to list + if isinstance(tstr, float) or isinstance(tstr, int): + tstr = [tstr] + if isinstance(c, float) or isinstance(c, int): + c = [c] + if isinstance(ang1, float) or isinstance(ang1, int): + ang1 = [ang1] + if isinstance(ang2, float) or isinstance(ang2, int): + ang2 = [ang2] + if isinstance(ang3, float) or isinstance(ang3, int): + ang3 = [ang3] + if isinstance(ahmax, float) or isinstance(ahmax, int): + ahmax = [ahmax] + if isinstance(ahmin, float) or isinstance(ahmin, int): + ahmin = [ahmin] + if isinstance(ahvert, float) or isinstance(ahvert, int): + ahvert = [ahvert] with open("vmodel.par", "w") as f: - f.write(" \n") - f.write(" Parameters for VMODEL \n") - f.write(" ********************* \n") - f.write(" \n") - f.write("START OF PARAMETERS: \n") - f.write("vmodel.var -file for variogram output \n") - f.write("1 " + str(nlag) + " -number of directions and lags \n") - f.write(str(azi) + " 0.0 " + str(step) + " -azm, dip, lag distance \n") - f.write(str(nst) + " " + str(nug) + " -nst, nugget effect \n") - f.write(str(tstr1) + " " + str(c1) + " " + str(azi1) + " 0.0 0.0 0.0 -it,cc,ang1,ang2,ang3 \n") - f.write(str(rmaj1) + " " + str(rmin1) + " 0.0 -a_hmax, a_hmin, a_vert \n") - f.write(str(tstr2) + " " + str(c2) + " " + str(azi2) + " 0.0 0.0 0.0 -it,cc,ang1,ang2,ang3 \n") - f.write(str(rmaj2) + " " + str(rmin2) + " 0.0 -a_hmax, a_hmin, a_vert \n") - - os.system("vmodel.exe vmodel.par") - - with open("vmodel.var") as f: - next(f) # skip the first line + f.write("\n") + f.write(" Parameters for VMODEL\n") + f.write(" *********************\n") + f.write("\n") + f.write("START OF PARAMETERS:\n") + f.write("vmodel.var -file for variogram output\n") + f.write("%d %d -number of directions and lags \n" % (ndir, nlag)) - for line in f: - _, l, g, *_ = line.split() - lag.append(float(l)) - gamma.append(float(g)) + # Write directions in which to calculate variograms + for i in range(ndir): + f.write("%f %f %f -azm, dip, lag distance \n" % (azm[i], dip[i], lag_dist[i])) + + write_variogram("vmodel.par", nug, nst, tstr, c, ang1, ang2, ang3, ahmax, ahmin, ahvert) + + # If requested, exit prior to running GSLIB + if dry_run: + return + + command = "vmodel.exe vmodel.par" + + if quiet: + # If quiet, suppress GSLIB output + command += ' 2&> /dev/null' + os.system(command) + + lag = np.empty((nlag+1, ndir), dtype=float) + gamma = np.empty((nlag+1, ndir), dtype=float) + + # Read variogram output to numpy array + for i in range(ndir): + output = np.loadtxt('vmodel.var', skiprows=i*(nlag+3)+1, usecols=[1, 2], max_rows=(nlag+1)) + lag[:, i] = output[:, 0] + gamma[:, i] = output[:, 1] + + # Remove temporary files + if clean: + os.remove('vmodel.par') + os.remove('vmodel.var') return lag, gamma -def declus(df, xcol, ycol, vcol, cmin, cmax, cnum, bmin): +def declus(df, xcol, ycol, vcol, cmin, cmax, cnum, bmin, quiet=False, clean=False): """Cell-based declustering, 2D wrapper for declus from GSLIB (.exe must be available in PATH or working directory). @@ -1413,6 +1574,8 @@ def declus(df, xcol, ycol, vcol, cmin, cmax, cnum, bmin): :param cmax: TODO :param cnum: TODO :param bmin: TODO + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ nrow = len(df) @@ -1447,15 +1610,27 @@ def declus(df, xcol, ycol, vcol, cmin, cmax, cnum, bmin): f.write(str(cnum) + " " + str(cmin) + " " + str(cmax) + " -number of cell sizes, min size, max size \n") f.write("5 -number of origin offsets \n") - os.system("declus.exe declus.par") + command = "declus.exe declus.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + df = GSLIB2Dataframe("declus.out") for irow in range(nrow): weights.append(df.iloc[irow, 3]) + # Remove temporary files + if clean: + os.remove('declus.par') + os.remove('declus.out') + os.remove('declus_out.dat') + return weights -def sgsim_uncond(nreal, nx, ny, hsiz, seed, var, output_file): +def sgsim_uncond(nreal, nx, ny, hsiz, seed, var, output_file, quiet=False, clean=False): """Sequential Gaussian simulation, 2D unconditional wrapper for sgsim from GSLIB (.exe must be available in PATH or working directory). @@ -1466,6 +1641,8 @@ def sgsim_uncond(nreal, nx, ny, hsiz, seed, var, output_file): :param seed: TODO :param var: TODO :param output_file: output file + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ nug = var["nug"] @@ -1525,12 +1702,26 @@ def sgsim_uncond(nreal, nx, ny, hsiz, seed, var, output_file): f.write(str(it2) + " " + str(cc2) + " " + str(azi2) + " 0.0 0.0 -it,cc,ang1,ang2,ang3\n") f.write(" " + str(hmaj2) + " " + str(hmin2) + " 1.0 - a_hmax, a_hmin, a_vert \n") - os.system("sgsim.exe sgsim.par") + command = "sgsim.exe sgsim.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + sim_array = GSLIB2ndarray(output_file, 0, nx, ny) + + # Remove temporary files + if clean: + os.remove('sgsim.par') + os.remove(output_file) + os.remove('nonw.dbg') + return sim_array[0] -def kb2d(df, xcol, ycol, vcol, nx, ny, hsiz, var, output_file): +def kb2d(df, xcol, ycol, vcol, nx, ny, hsiz, var, output_file='tmp.dat', ndmin=1, ndmax=30, + radius=None, quiet=False, clean=False, dry_run=False): """Kriging estimation, 2D wrapper for kb2d from GSLIB (.exe must be available in PATH or working directory). @@ -1542,7 +1733,14 @@ def kb2d(df, xcol, ycol, vcol, nx, ny, hsiz, var, output_file): :param ny: TODO :param hsiz: TODO :param var: TODO + :param ndmin: minimum number of data points to use for kriging a block. Default of 1 + :param ndmax: maximum number of data points to use for kriging a block. Default of 30 + :param radius: maximum isotropic search radius. If not provided, use a search radius equal + to the maximum of hmaj1 and hmin1 provided in var :param output_file: output file + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB + :param dry_run: if True, do not run GSLIB, just write kb2d.par :return: TODO """ df_temp = pd.DataFrame({"X": df[xcol], "Y": df[ycol], "Var": df[vcol]}) @@ -1563,6 +1761,10 @@ def kb2d(df, xcol, ycol, vcol, nx, ny, hsiz, var, output_file): max_range = max(hmaj1, hmaj2) hmn = hsiz * 0.5 + # Ensure nx and ny are integers + nx = int(nx) + ny = int(ny) + with open("kb2d.par", "w") as f: f.write(" Parameters for KB2D \n") f.write(" ******************** \n") @@ -1574,110 +1776,229 @@ def kb2d(df, xcol, ycol, vcol, nx, ny, hsiz, var, output_file): f.write("0 -debugging level: 0,1,2,3 \n") f.write("none.dbg -file for debugging output \n") f.write(str(output_file) + " -file for kriged output \n") - f.write(str(nx) + " " + str(hmn) + " " + str(hsiz) + " \n") - f.write(str(ny) + " " + str(hmn) + " " + str(hsiz) + " \n") + f.write(str(nx) + " " + str(hmn) + " " + str(hsiz) + " \n") + f.write(str(ny) + " " + str(hmn) + " " + str(hsiz) + " \n") f.write("1 1 -x and y block discretization \n") - f.write("1 30 -min and max data for kriging \n") + f.write("%d %d -min and max data for kriging \n" % (ndmin, ndmax)) f.write(str(max_range) + " -maximum search radius \n") f.write("1 -9999.9 -0=SK, 1=OK, (mean if SK) \n") f.write(str(nst) + " " + str(nug) + " -nst, nugget effect \n") f.write(str(it1) + " " + str(cc1) + " " + str(azi1) + " " + str(hmaj1) + " " + str(hmin1) + " -it, c ,azm ,a_max ,a_min \n") f.write(str(it2) + " " + str(cc2) + " " + str(azi2) + " " + str(hmaj2) + " " + str(hmin2) + " -it, c ,azm ,a_max ,a_min \n") - os.system("kb2d.exe kb2d.par") + # If requested, exit prior to running GSLIB + if dry_run: + return + + command = "kb2d.exe kb2d.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + est_array = GSLIB2ndarray(output_file, 0, nx, ny) var_array = GSLIB2ndarray(output_file, 1, nx, ny) - return est_array[0], var_array[0] + # Remove temporary files + if clean: + os.remove('kb2d.par') + os.remove(output_file) + os.remove('nonw.dbg') -def sgsim(nreal, df, xcol, ycol, vcol, nx, ny, hsiz, seed, var, output_file): - """Sequential Gaussian simulation, 2D wrapper for sgsim from GSLIB (.exe - must be available in PATH or working directory). + return est_array[0], var_array[0] - :param nreal: TODO - :param df: dataframe - :param xcol: TODO - :param ycol: TODO - :param vcol: TODO - :param nx: TODO - :param ny: TODO - :param hsiz: TODO - :param seed: TODO - :param var: TODO - :param output_file: output file - :return: TODO + +def sgsim(nreal, df, vcol, var, xcol=None, ycol=None, zcol=None, wtcol=None, + nx=1, ny=1, nz=1, dx=1, dy=1, dz=1, xmn=None, ymn=None, zmn=None, + ndmin=0, ndmax=8, ncnode=12, sstrat=0, + hmax=None, hmin=None, hvert=None, sang1=None, sang2=None, sang3=None, + seed=69069, output_file='sgsim_temp.dat', + quiet=False, clean=False, return_array=True, dry_run=False): + """Perform 1D, 2D or 3D sequential Gaussian simulation using GSLIB + + :param nreal: number of realizations to generate + :param df: dataframe containing the coordinates, values and weights, if any + :param vcol: name of the column containing the variable to be simulated + :param var: variogram parameters. This must be a dict in which each value + is a list of floats, with one value for each nested structure. To get a list + of required keys, see the documentation for the write_variogram function. + :param xcol: name of the column containing the x coordinates. Optional, if + not provided, sgsim will ignore this dimension (ie, a 1D or 2D simulation) + :param ycol: name of the column containing the y coordinates. Optional, if + not provided, sgsim will ignore this dimension (ie, a 1D or 2D simulation) + :param zcol: name of the column containing the z coordinates. Optional, if + not provided, sgsim will ignore this dimension (ie, a 1D or 2D simulation) + :param wtcol: name of the column containing the weights. Optional, if not + provided, all data will be weighted equally + :param nx: number of cells in x direction. Optional, default is 1 + :param ny: number of cells in y direction. Optional, default is 1 + :param nz: number of cells in z direction. Optional, default is 1 + :param dx: cell size in x direction. Optional, default is 1 + :param dy: cell size in y direction. Optional, default is 1 + :param dz: cell size in z direction. Optional, default is 1 + :param xmn: x coordinate of the first grid cell. Optional, default is dx/2 + :param ymn: y coordinate of the first grid cell. Optional, default is dy/2 + :param zmn: z coordinate of the first grid cell. Optional, default is dz/2 + :param ndmin: minimum number of original data used to simulate a grid node. + If there are fewer than ndmin data points, the node is not simulated. + Optional, default is 0. + :param ndmax: maximum number of original data used to simulate a grid node. + Optional, default is 8. + :param ncnode: maximum number of previously simulated nodes to use for the + simulation of a new node. Optional, default is 12. + :param sstrat: search strategy. If sstrat=0, search the hard data using a + super block search strategy, then search simulated data using spiral search + strategy. If sstrat=1, assign hard data to grid and search both hard data and + the simulated nodes using a spiral search strategy. Optional, default is 0. + :param hmax: search radius in max horizontal direction. Optional, default is + set by the ellipsoid of the first variogram structure in var + :param hmin: search radius in min horizontal direction. Optional, default is + set by the ellipsoid of the first variogram structure in var + :param hvert: search radius in vertical direction. Optional, default is set + by the ellipsoid of the first variogram structure in var + :param sang1: azimuth of the search ellipsoid. Optional, default is set by + the ellipsoid of the first variogram structure in var + :param sang2: dip of the search ellipsoid. Optional, default is set by the + ellipsoid of the first variogram structure in var + :param sang3: tilt of the search ellipsoid. Optional, default is set by the + ellipsoid of the first variogram structure in var + :param seed: random number seed. Optional, default is 69069 + :param output_file: output file. Optional, default is "sgsim_temp.dat" + :param quiet: if True, suppress output from GSLIB + :param clean: if True, remove sgsim.par and output_file after running sgsim + :param return_array: if True, return ndarray of the simulations. Otherwise, + do not load the array and just generate the simulation in `output_file` + :param dry_run: if True, do not run GSLIB, just write sgsim.par """ - x = df[xcol] - y = df[ycol] + + # Get number of dimensions based on columns provided v = df[vcol] var_min = v.values.min() var_max = v.values.max() - df_temp = pd.DataFrame({"X": x, "Y": y, "Var": v}) + df_temp = pd.DataFrame({"Var": v}) + + # Populate df_temp with dimensions and weights, if requested + if xcol is not None: + df_temp["X"] = df[xcol] + if ycol is not None: + df_temp["Y"] = df[ycol] + if zcol is not None: + df_temp["Z"] = df[zcol] + if wtcol is not None: + df_temp["Wt"] = df[wtcol] + + # Get indices of dimension and weight columns to provide these to GSLIB + # (Note that vcol is always the first column) + ixcol, iycol, izcol, iwtcol = 0, 0, 0, 0 # 0 means that dimension is not used + if xcol is not None: + ixcol = df_temp.columns.get_loc("X") + 1 + if ycol is not None: + iycol = df_temp.columns.get_loc("Y") + 1 + if zcol is not None: + izcol = df_temp.columns.get_loc("Z") + 1 + if wtcol is not None: + iwtcol = df_temp.columns.get_loc("Wt") + 1 + + if ixcol+iycol+izcol == 0: + raise ValueError("Must provide at least one dimension column") + + # If not provided, assume first grid node is 1/2 cell size from origin + if xmn is None: + xmn = dx/2 + if ymn is None: + ymn = dy/2 + if zmn is None: + zmn = dz/2 + Dataframe2GSLIB("data_temp.dat", df_temp) - nug = var["nug"] - nst = var["nst"] - it1 = var["it1"] - cc1 = var["cc1"] - azi1 = var["azi1"] - hmaj1 = var["hmaj1"] - hmin1 = var["hmin1"] - it2 = var["it2"] - cc2 = var["cc2"] - azi2 = var["azi2"] - hmaj2 = var["hmaj2"] - hmin2 = var["hmin2"] - max_range = max(hmaj1, hmaj2) - hmn = hsiz * 0.5 - hctab = int(max_range / hsiz) * 2 + 1 + if hmax is None: + hmax = var["ahmax"][0] + if hmin is None: + hmin = var["ahmin"][0] + if hvert is None: + hvert = var["ahvert"][0] + + if sang1 is None: + sang1 = var["ang1"][0] + if sang2 is None: + sang2 = var["ang2"][0] + if sang3 is None: + sang3 = var["ang3"][0] + + # Calculate size of covariance lookup table based on hmax and cell size + nctx = int(hmax / dx) * 2 + 1 + ncty = int(hmin / dy) * 2 + 1 + nctz = int(hvert / dz) * 2 + 1 with open("sgsim.par", "w") as f: - f.write(" Parameters for SGSIM \n") - f.write(" ******************** \n") - f.write(" \n") - f.write("START OF PARAMETER: \n") - f.write("data_temp.dat -file with data \n") - f.write("1 2 0 3 0 0 - columns for X,Y,Z,vr,wt,sec.var. \n") - f.write("-1.0e21 1.0e21 - trimming limits \n") - f.write("1 -transform the data (0=no, 1=yes) \n") - f.write("none.trn - file for output trans table \n") - f.write("0 - consider ref. dist (0=no, 1=yes) \n") - f.write("none.dat - file with ref. dist distribution \n") - f.write("1 0 - columns for vr and wt \n") - f.write(str(var_min) + " " + str(var_max) + " zmin,zmax(tail extrapolation) \n") - f.write("1 " + str(var_min) + " - lower tail option, parameter \n") - f.write("1 " + str(var_max) + " - upper tail option, parameter \n") - f.write("0 -debugging level: 0,1,2,3 \n") - f.write("nonw.dbg -file for debugging output \n") - f.write(str(output_file) + " -file for simulation output \n") - f.write(str(nreal) + " -number of realizations to generate \n") - f.write(str(nx) + " " + str(hmn) + " " + str(hsiz) + " \n") - f.write(str(ny) + " " + str(hmn) + " " + str(hsiz) + " \n") - f.write("1 0.0 1.0 - nz zmn zsiz \n") - f.write(str(seed) + " -random number seed \n") - f.write("0 8 -min and max original data for sim \n") - f.write("12 -number of simulated nodes to use \n") - f.write("0 -assign data to nodes (0=no, 1=yes) \n") - f.write("1 3 -multiple grid search (0=no, 1=yes),num \n") - f.write("0 -maximum data per octant (0=not used) \n") - f.write(str(max_range) + " " + str(max_range) + " 1.0 -maximum search (hmax,hmin,vert) \n") - f.write(str(azi1) + " 0.0 0.0 -angles for search ellipsoid \n") - f.write(str(hctab) + " " + str(hctab) + " 1 -size of covariance lookup table \n") - f.write("0 0.60 1.0 -ktype: 0=SK,1=OK,2=LVM,3=EXDR,4=COLC \n") - f.write("none.dat - file with LVM, EXDR, or COLC variable \n") - f.write("4 - column for secondary variable \n") - f.write(str(nst) + " " + str(nug) + " -nst, nugget effect \n") - f.write(str(it1) + " " + str(cc1) + " " + str(azi1) + " 0.0 0.0 -it,cc,ang1,ang2,ang3\n") - f.write(" " + str(hmaj1) + " " + str(hmin1) + " 1.0 - a_hmax, a_hmin, a_vert \n") - f.write(str(it2) + " " + str(cc2) + " " + str(azi2) + " 0.0 0.0 -it,cc,ang1,ang2,ang3\n") - f.write(" " + str(hmaj2) + " " + str(hmin2) + " 1.0 - a_hmax, a_hmin, a_vert \n") + f.write(" Parameters for SGSIM \n") + f.write(" ******************** \n") + f.write("\n") + f.write("START OF PARAMETERS: \n") + f.write("data_temp.dat - file with data \n") + f.write("%d %d %d 1 %d 0 - columns for X,Y,Z,vr,wt \n" % (ixcol, iycol, + izcol, iwtcol)) + f.write("-1.0e21 1.0e21 - trimming limits \n") + f.write("0 - transform the data (0=no, 1=yes)\n") + f.write("none.trn - file for output trans table \n") + f.write("0 - consider ref. dist (0=no, 1=yes) \n") + f.write("none.dat - file with ref. dist distribution \n") + f.write("0 0 - columns of vr and wt within none.dat \n") + f.write("%f %f - zmin,zmax \n" % (var_min, var_max)) + f.write("1 %f - lower tail option, parameter \n" % var_min) + f.write("1 %f - upper tail option, parameter \n" % var_max) + f.write("0 - debugging level: 0,1,2,3 \n") + f.write("nonw.dbg - file for debugging output \n") + f.write("%s - file for simulation output \n" % output_file) + f.write("%d - number of realizations to generate \n" % nreal) + f.write("%d %f %f - nx, xmn, xsiz \n" % (nx, xmn, dx)) + f.write("%d %f %f - ny, ymn, ysiz \n" % (ny, ymn, dy)) + f.write("%d %f %f - nz, zmn, zsiz \n" % (nz, zmn, dz)) + f.write("%s - random number seed \n" % seed) + f.write("%d %d - min/max data to use \n" % (ndmin, ndmax)) + f.write("%d - max simulated nodes to use \n" % ncnode) + f.write("%d - assign data to nodes (0=no, 1=yes) \n" % sstrat) + f.write("1 3 - multiple grid search (0=no, 1=yes), nmult \n") + f.write("0 - maximum data per octant (0=not used) \n") + f.write("%f %f %f - search radii \n" % (hmax, hmin, hvert)) + f.write("%.1f %.1f %.1f - search ellipsoid \n" % (sang1, sang2, sang3)) + f.write("%d %d %d - covariance lookup table \n" % (nctx, ncty, nctz)) + f.write("0 0.60 1.0 - ktype: 0=SK,1=OK,2=LVM,3=EXDR,4=COLC \n") + f.write("none.dat - file with LVM, EXDR, or COLC variable \n") + f.write("4 - column for secondary variable \n") + + write_variogram("sgsim.par", **var) + + # If requested, exit prior to running GSLIB + if dry_run: + return + + command = "sgsim sgsim.par" + if quiet: + # If quiet, suppress GSLIB output + command += ' 2&> /dev/null' + os.system(command) + + # Only read and return array if requested + if return_array: + sim_array = GSLIB2ndarray_3D(output_file, 0, nx, ny)[0] + else: + sim_array = None - os.system("sgsim.exe sgsim.par") - sim_array = GSLIB2ndarray(output_file, 0, nx, ny) - return sim_array[0] + # Remove temporary files + if clean: + os.remove('sgsim.par') + os.remove('data_temp.dat') + os.remove('sgsim_temp.dat') + os.remove('nonw.dbg') + + return sim_array -def cosgsim_uncond(nreal, nx, ny, hsiz, seed, var, sec, correl, output_file): +def cosgsim_uncond(nreal, nx, ny, hsiz, seed, var, sec, correl, output_file, + quiet=False, clean=False): """Sequential Gaussian simulation, 2D unconditional wrapper for sgsim from GSLIB (.exe must be available in PATH or working directory). @@ -1690,6 +2011,8 @@ def cosgsim_uncond(nreal, nx, ny, hsiz, seed, var, sec, correl, output_file): :param sec: TODO :param correl: TODO :param output_file: output file + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ nug = var["nug"] @@ -1709,7 +2032,7 @@ def cosgsim_uncond(nreal, nx, ny, hsiz, seed, var, sec, correl, output_file): hmn = hsiz * 0.5 hctab = int(max_range / hsiz) * 2 + 1 - ndarray2GSLIB(sec, "sec.dat", "sec_dat") + ndarray2GSLIB_3D(sec, "sec.dat", "sec_dat") with open("sgsim.par", "w") as f: f.write(" Parameters for SGSIM \n") @@ -1752,8 +2075,21 @@ def cosgsim_uncond(nreal, nx, ny, hsiz, seed, var, sec, correl, output_file): f.write(str(it2) + " " + str(cc2) + " " + str(azi2) + " 0.0 0.0 -it,cc,ang1,ang2,ang3 \n") f.write(" " + str(hmaj2) + " " + str(hmin2) + " 1.0 - a_hmax, a_hmin, a_vert \n") - os.system("sgsim.exe sgsim.par") + command = "sgsim.exe sgsim.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) + sim_array = GSLIB2ndarray(output_file, 0, nx, ny) + + if clean: + os.remove('sgsim.par') + os.remove('sec.dat') + os.remove(output_file) + os.remove('nonw.dbg') + return sim_array[0] @@ -1987,7 +2323,8 @@ def make_variogram_3D( return var -def sgsim_3D(nreal, df, xcol, ycol, zcol, vcol, nx, ny, nz, hsiz, vsiz, seed, var, output_file): +def sgsim_3D(nreal, df, xcol, ycol, zcol, vcol, nx, ny, nz, hsiz, vsiz, seed, var, output_file, + quiet=False, clean=False): """Sequential Gaussian simulation, 2D wrapper for sgsim from GSLIB (.exe must be available in PATH or working directory). @@ -2002,6 +2339,8 @@ def sgsim_3D(nreal, df, xcol, ycol, zcol, vcol, nx, ny, nz, hsiz, vsiz, seed, va :param seed: TODO :param var: TODO :param output_file: output file + :param quiet: if True, suppress terminal output from GSLIB + :param clean: if True, remove temporary files after running GSLIB :return: TODO """ x = df[xcol] @@ -2073,10 +2412,23 @@ def sgsim_3D(nreal, df, xcol, ycol, zcol, vcol, nx, ny, nz, hsiz, vsiz, seed, va f.write("4 - column for secondary variable \n") f.write(str(nst) + " " + str(nug) + " -nst, nugget effect \n") f.write(str(it1) + " " + str(cc1) + " " + str(azi1) + " " + str(dip1) +" 0.0 -it,cc,ang1,ang2,ang3\n") - f.write(" " + str(hmax1) + " " + str(hmed1) + " " + str(hmin1) + " - a_hmax, a_hmin, a_vert \n") + f.write(" " + str(hmax1) + " " + str(hvert1) + " " + str(hmin1) + " - a_hmax, a_hmin, a_vert \n") f.write(str(it2) + " " + str(cc2) + " " + str(azi2) + " " + str(dip2) +" 0.0 -it,cc,ang1,ang2,ang3\n") - f.write(" " + str(hmax2) + " " + str(hmed2) + " " +str(hmin2) + " - a_hmax, a_hmin, a_vert \n") + f.write(" " + str(hmax2) + " " + str(hvert2) + " " +str(hmin2) + " - a_hmax, a_hmin, a_vert \n") + + command = "sgsim.exe sgsim.par" + + # Suppress GSLIB output + if quiet: + command += ' > %s' % os.devnull + os.system(command) - os.system("sgsim.exe sgsim.par") sim_array = GSLIB2ndarray_3D(output_file, 0, nreal, nx, ny, nz) + + if clean: + os.remove('sgsim.par') + os.remove('data_temp.dat') + os.remove(output_file) + os.remove('nonw.dbg') + return sim_array[0] diff --git a/geostatspy/geostats.py b/geostatspy/geostats.py index 2fb0686..8dcf954 100644 --- a/geostatspy/geostats.py +++ b/geostatspy/geostats.py @@ -1,6 +1,6 @@ """ This file includes the reimplementations of GSLIB functionality in Python. While -this code will not be as well-tested and robust as the original GSLIBctabel it does +this code will not be as well-tested and robust as the original GSLIB, it does provide the opportunity to build 2D spatial modeling projects in Python without the need to rely on compiled Fortran code from GSLIB. If you want to use the GSLIB compiled code called from Python workflows use the functions available @@ -669,17 +669,18 @@ def srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nc ncnode = ncnode + 1 # moved to account for origin 0 return ncnode, icnode, cnodev, cnodex, cnodey -def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval): +def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar, + middle,mpar,utail,utpar,zval,cdfval): """GSLIB's BEYOND subroutine (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at - Austin (March, 2019). + Austin (March 2019). Note this was simplified to 2D only. """ EPSLON = 1.0e-20; UNEST=-1.0 # Check for both "zval" and "cdfval" defined or undefined: - ierr = 1; - if zval > UNEST and cdfva > UNEST: + ierr = 1 + if zval > UNEST and cdfval > UNEST: return -1 if zval <= UNEST and cdfval <= UNEST: return - 1 @@ -699,7 +700,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # ipart = 2 - upper tail ierr = 0 ipart = 1 - if zva > UNEST: + if zval > UNEST: if zval <= ccut[0]: ipart = 0 if zval >= ccut[nccut-1]: @@ -758,7 +759,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar if idat <= -1 or idat >= ncut-1: # adjusted for 0 origin zval = powint(0.0,cdf[0],zmin,cut[0],cdfval,1.) else: - zval = powint(cdf[idat],cdf[idat+1],cut[dat],cut[idat+1],temp,1.) + zval = powint(cdf[idat],cdf[idat+1],cut[idat],cut[idat+1],temp,1.) else: # Error situation - unacceptable option: @@ -813,7 +814,7 @@ def beyond(ivtype,nccut,ccut,ccdf,ncut,cut,cdf,zmin,zmax,ltail,ltpar,middle,mpar # Straight linear interpolation if no data; otherwise, local linear # interpolation: - if ilow <= -1 or ilow >= ncut-1 or iup <= -1 or iupp >= ncut-1 or iupp < ilow: + if ilow <= -1 or ilow >= ncut-1 or iupp <= -1 or iupp >= ncut-1 or iupp < ilow: zval=powint(ccdf[cclow],ccdf[cchigh],ccut[cclow],ccut[cchigh],cdfval,1.) else: temp=powint(ccdf[cclow],ccdf[cchigh],cdf[ilow],cdf[iupp],cdfval,1.) @@ -1505,7 +1506,7 @@ def ordrel(ivtype,ncut,ccdf): # Return with corrected CDF: return ccdfo -def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax): +def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax, verbose=True): """GSLIB's DECLUS program (Deutsch and Journel, 1998) converted from the original Fortran to Python by Michael Pyrcz, the University of Texas at Austin (Jan, 2019). @@ -1520,6 +1521,7 @@ def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax): :param ncell: number of cell sizes :param cmin: min cell size :param cmax: max cell size + :param verbose: whether to print descriptive statistics :return: TODO """ # Load data and set up arrays @@ -1549,11 +1551,12 @@ def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax): xcs_mat[0] = 0.0 vrcr_mat[0] = vmean vrop = vmean # include the naive case - - print(f"There are {nd} data with:") - print(f" mean of {vmean} ") - print(f" min and max {vmin} and {vmax}") - print(f" standard dev {vstdev} ") + + if verbose: + print(f"There are {nd} data with:") + print(f" mean of {vmean} ") + print(f" min and max {vmin} and {vmax}") + print(f" standard dev {vstdev} ") # Define a "lower" origin to use for the cell sizes xo1 = xmin - 0.01 @@ -1573,7 +1576,7 @@ def declus(df, xcol, ycol, vcol, iminmax, noff, ncell, cmin, cmax): # Main loop over cell sizes # 0 index is the 0.0 cell, note n + 1 in Fortran - for lp in tqdm(range(1, ncell + 2)): + for lp in tqdm(range(1, ncell + 2), disable=not verbose): xcs = xcs + xinc ycs = ycs + yinc @@ -1879,7 +1882,7 @@ def declus_kriging( if i == j: cov = cov - c0 cbb = cbb + cov - cbb = cbb/real(ndb*ndb) + cbb = cbb/np.real(ndb*ndb) # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: nk = 0 @@ -1934,7 +1937,7 @@ def declus_kriging( dy = yy - ydb(i) if (dx*dx+dy*dy) < EPSLON: cb = cb - c0 - cb = cb / real(ndb) + cb = cb / np.real(ndb) if ktype == 0: s[0] = cb/cbb est = s[0]*vra[0] + (1.0-s[0])*skmean @@ -1981,7 +1984,7 @@ def declus_kriging( dy = yy - ydb[j1] if (dx*dx+dy*dy) < EPSLON: cb = cb - c0 - cb = cb / real(ndb) + cb = cb / np.real(ndb) r[j] = cb rr[j] = r[j] @@ -2878,7 +2881,7 @@ def kb2d( if i == j: cov = cov - c0 cbb = cbb + cov - cbb = cbb/real(ndb*ndb) + cbb = cbb/np.real(ndb*ndb) # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: nk = 0 @@ -2933,7 +2936,7 @@ def kb2d( dy = yy - ydb(i) if (dx*dx+dy*dy) < EPSLON: cb = cb - c0 - cb = cb / real(ndb) + cb = cb / np.real(ndb) if ktype == 0: s[0] = cb/cbb est = s[0]*vra[0] + (1.0-s[0])*skmean @@ -2973,7 +2976,7 @@ def kb2d( dy = yy - ydb[j1] if (dx*dx+dy*dy) < EPSLON: cb = cb - c0 - cb = cb / real(ndb) + cb = cb / np.real(ndb) r[j] = cb rr[j] = r[j] @@ -3075,6 +3078,7 @@ def kb3d(df,xcol,ycol,zcol,vcol,tmin,tmax, MAXKRG = MAXKD * MAXKD vario3D_array = variogram3D2ndarray(vario3D) maxcov = vario3D['nug']+vario3D['cc'][0]+vario3D['cc'][1] # assume the sill as maxcov + c0 = vario3D['nug'] #rotmat3D = setrot3D(vario3D) # rotation matrix with anisotropy ratios rotmat3D = setrot3D_array(vario3D_array) # rotation matrix with anisotropy ratios # Allocate the needed memory: @@ -3162,7 +3166,7 @@ def kb3d(df,xcol,ycol,zcol,vcol,tmin,tmax, if i == j: cov = cov - c0 cbb = cbb + cov - cbb = cbb/real(ndb*ndb) + cbb = cbb/np.real(ndb*ndb) # MAIN LOOP OVER ALL THE BLOCKS IN THE GRID: nk = 0 @@ -3244,7 +3248,7 @@ def kb3d(df,xcol,ycol,zcol,vcol,tmin,tmax, dx = xx - xdb(i); dy = yy - ydb(i); dz = zz - zdb(i) if (dx*dx+dy*dy+dz*dz) < EPSLON: cb = cb - c0 - cb = cb / real(ndb) + cb = cb / np.real(ndb) if ktype == 0: #print(cbb) #print(s) @@ -3301,7 +3305,7 @@ def kb3d(df,xcol,ycol,zcol,vcol,tmin,tmax, dz = zz - zdb[j1] if (dx*dx+dy*dy+dz*dz) < EPSLON: cb = cb - c0 - cb = cb / real(ndb) + cb = cb / np.real(ndb) r[j] = cb rr[j] = r[j] @@ -3441,8 +3445,10 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm # Load the data df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax MAXDAT = len(df_extract) + nd = MAXDAT MAXCUT = ncut MAXNST = 2 + UNEST = -999. MAXROT = MAXNST*MAXCUT+ 1 ikout = np.zeros((nx,ny,ncut)) maxcov = np.zeros(ncut) @@ -3654,7 +3660,7 @@ def ik2d(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,nx,xm return ikout def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtcol,zmin,zmax,ltail,ltpar,utail,utpar,nsim, - nx,xmn,xsiz,ny,ymn,ysiz,seed,ndmin,ndmax,nodmax,mults,nmult,noct,ktype,colocorr,sec_map,vario): + nx,xmn,xsiz,ny,ymn,ysiz,nz,zmn,zsiz,seed,ndmin,ndmax,nodmax,mults,nmult,noct,ktype,colocorr,sec_map,vario): # Hard Code Some Parameters for Ease of Use, Fixed Covariance Table - Added Mar. 21, 2024, radius = max(vario['hmaj1'],vario['hmaj2']) @@ -3727,30 +3733,35 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc nums = np.zeros(ndmax,dtype = int) # Perform some quick checks - if nx > MAXX or ny> MAXY: - print('ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.') - print(' you have asked for : ' + str(nx) + ',' + str(ny) + ',' + str(nz) + '.') - return sim + if nx > MAXX or ny> MAXY or nz > MAXZ: + message = 'ERROR: available grid size: ' + str(MAXX) + ',' + str(MAXY) + ',' + str(MAXZ) +'.' + message += '\n you have asked for : ' + str(nx) + ',' + str(ny) + ',' + str(nz) + '.' + raise ValueError(message) + if ltail != 1 and ltail != 2: - print('ERROR invalid lower tail option ' + str(ltail)) - print(' only allow 1 or 2 - see GSLIB manual ') - return sim + message = 'ERROR invalid lower tail option ' + str(ltail) + message += '\n only allow 1 or 2 - see GSLIB manual ' + raise ValueError(message) + if utail != 1 and utail != 2 and utail != 4: - print('ERROR invalid upper tail option ' + str(ltail)) - print(' only allow 1,2 or 4 - see GSLIB manual ') - return sim + message = 'ERROR invalid upper tail option ' + str(ltail) + message += '\n only allow 1,2 or 4 - see GSLIB manual ' + raise ValueError(message) + if utail == 4 and utpar < 1.0: - print('ERROR invalid power for hyperbolic tail' + str(utpar)) - print(' must be greater than 1.0!') - return sim + message = 'ERROR invalid power for hyperbolic tail' + str(utpar) + message += '\n must be greater than 1.0!' + raise ValueError(message) + if ltail == 2 and ltpar < 0.0: - print('ERROR invalid power for power model' + str(ltpar)) - print(' must be greater than 0.0!') - return sim + message = 'ERROR invalid power for power model' + str(ltpar) + message += '\n must be greater than 0.0!' + raise ValueError(message) + if utail == 2 and utpar < 0.0: - print('ERROR invalid power for power model' + str(utpar)) - print(' must be greater than 0.0!') - return sim + message = 'ERROR invalid power for power model' + str(utpar) + message += '\n must be greater than 0.0!' + raise ValueError(message) # Load the data df_extract = df.loc[(df[vcol] >= tmin) & (df[vcol] <= tmax)] # trim values outside tmin and tmax @@ -3871,9 +3882,9 @@ def sgsim(df,xcol,ycol,vcol,wcol,scol,tmin,tmax,itrans,ismooth,dftrans,tcol,twtc # Do we need to get an external drift attribute for the data? if ktype == 3: for idd in range(0,nd): - if sec[i] != UNEST: - ix = getindx(nx,xmn,xsiz,x[idd]) - iy = getindx(ny,ymn,ysiz,y[idd]) + if sec[idd] != UNEST: + ix = getindex(nx,xmn,xsiz,x[idd]) + iy = getindex(ny,ymn,ysiz,y[idd]) ind = ix + (iy)*nx sec[ind] = lvm[ind] @@ -4153,11 +4164,10 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin """ # Checks if utail == 3 or ltail == 3 or middle == 3: - print('ERROR - distribution extrapolation option 3 with table is not available') - return sim_out + raise ValueError('ERROR - distribution extrapolation option 3 with table is not available') + if xcol == "" or ycol == "": - print('ERROR - must have x and y column in the DataFrame') - return sim_out + raise ValueError('ERROR - must have x and y column in the DataFrame') # Hard Code Some Parameters for Ease of Use, Fixed Covariance Table - Added Mar. 21, 2024, radius = 0.0; radius1 = 0.0 @@ -4173,7 +4183,7 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin if ltail == 1: ltpar = zmin if utail == 1: - utpar == zmax + utpar = zmax sim_out = np.zeros((nreal,ny,nx)) @@ -4357,7 +4367,7 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin cov_table,tmp2,order,ixnode,iynode,nlooku,nctx,ncty = ctable(MAXNOD,MAXCXY,MAXCTX,MAXCTY,MXY, xsiz,ysiz,isrot,nx,ny,nst[0],c0[0],cc[0],aa[0],it[0],ang[0],anis[0],global_rotmat,radsqd) - + # print('spiral search number nodes '); print(nlooku) # print('ixnode,iynode'); print(ixnode,iynode) # Initialize accumulators: # not setup yet @@ -4384,86 +4394,86 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # MAIN LOOP OVER ALL THE SIMULAUTIONS: for ireal in range(0,nreal): -# Work out a random path for this realization: + # Work out a random path for this realization: sim = np.random.rand(nx*ny) order = np.zeros(nxy) ind = 0 - for ixy in range(0,nxy): - order[ixy] = ind + for ixy in range(0,nxy): + order[ixy] = ind ind = ind + 1 -# Multiple grid search works with multiples of 4 (yes, that is -# soat arbitrary): - if mults == 1: - for imult in range(0,nmult): - nny = int(max(1,ny/((imult+1)*4))) - nnx = int(max(1,nx/((imult+1)*4))) + # Multiple grid search works with multiples of 4 (yes, that is + # sort arbitrary): + if mults == 1: + for imult in range(0,nmult): + nny = int(max(1,ny/((imult+1)*4))) + nnx = int(max(1,nx/((imult+1)*4))) # print('multi grid - nnx, nny'); print(nnx,nny) - jy = 1 - jx = 1 - for iy in range(0,nny): - if nny > 0: jy = iy*(imult+1)*4 - for ix in range(0,nnx): - if nnx > 0: jx = ix*(imult+1)*4 - index = jx + (jy-1)*nx - sim[index] = sim[index] - (imult+1) + jy = 1 + jx = 1 + for iy in range(0,nny): + if nny > 0: jy = iy*(imult+1)*4 + for ix in range(0,nnx): + if nnx > 0: jx = ix*(imult+1)*4 + index = jx + (jy-1)*nx + sim[index] = sim[index] - (imult+1) -# Inlize the simulation: - sim, order = dsortem(0,nxy,sim,2,b=order) - sim.fill(UNEST) - tmp.fill(0.0) - print('Working on realization ' + str(ireal)) -# print('Random Path'); print(order) +# Initialize the simulation: + sim, order = dsortem(0,nxy,sim,2,b=order) + sim.fill(UNEST) + tmp.fill(0.0) + print('Working on realization ' + str(ireal)) +# print('Random Path'); print(order) # As the data to the closest grid node: - TINY = 0.0001 - for idd in range(0,nd): + TINY = 0.0001 + for idd in range(0,nd): # print('data'); print(x[idd],y[idd]) - ix = getindex(nx,xmn,xsiz,x[idd]) - iy = getindex(ny,ymn,ysiz,y[idd]) - ind = ix + (iy-1)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz + ix = getindex(nx,xmn,xsiz,x[idd]) + iy = getindex(ny,ymn,ysiz,y[idd]) + ind = ix + (iy-1)*nx + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz # print('xx, yy' + str(xx) + ',' + str(yy)) - test = abs(xx-x[idd]) + abs(yy-y[idd]) + test = abs(xx-x[idd]) + abs(yy-y[idd]) # As this data to the node (unless there is a closer data): - if sstrat == 1 or (sstrat == 0 and test <= TINY): - if sim[ind] > UNEST: - id2 = int(sim[ind]+0.5) - test2 = abs(xx-x[id2]) + abs(yy-y[id2]) - if test <= test2: - sim[ind] = idd - else: + if sstrat == 1 or (sstrat == 0 and test <= TINY): + if sim[ind] > UNEST: + id2 = int(sim[ind]+0.5) + test2 = abs(xx-x[id2]) + abs(yy-y[id2]) + if test <= test2: sim[ind] = idd - -# As a flag so that this node does not get simulated: + else: + sim[ind] = idd +# As a flag so that this node does not get simulated: + # Another data values into the simulated grid: - for ind in range(0,nxy): - idd = int(sim[ind]+0.5) - if idd > 0: - sim[ind] = vr[idd] - else: - tmp[ind] = sim[ind] - sim[ind] = UNEST - irepo = max(1,min((nxy/10),10000)) + for ind in range(0,nxy): + idd = int(sim[ind]+0.5) + if idd > 0: + sim[ind] = vr[idd] + else: + tmp[ind] = sim[ind] + sim[ind] = UNEST + irepo = max(1,min((nxy/10),10000)) # LOOP OVER ALL THE NODES: - for ind in range(0,nxy): - if (int(ind/irepo)*irepo) == ind: - print(' currently on node ' + str(ind)) + for ind in range(0,nxy): + if (int(ind/irepo)*irepo) == ind: + print(' currently on node ' + str(ind)) # Find the index on the random path, check if assigned data and get location - index = int(order[ind]+0.5) - if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue - iy = int((index)/nx) - ix = index - (iy)*nx - xx = xmn + (ix)*xsiz - yy = ymn + (iy)*ysiz - current_node = (yy,xx) + index = int(order[ind]+0.5) + if (sim[index] > (UNEST+EPSLON)) or (sim[index] < (UNEST*2.0)): continue + iy = int((index)/nx) + ix = index - (iy)*nx + xx = xmn + (ix)*xsiz + yy = ymn + (iy)*ysiz + current_node = (yy,xx) # print('Current_node'); print(current_node) # Now we'll simulate the point ix,iy,iz. First, get the close data @@ -4471,23 +4481,23 @@ def sisim(df,xcol,ycol,vcol,ivtype,koption,ncut,thresh,gcdf,trend,tmin,tmax,zmin # we'll only keep the closest "ndmax" data, and look for previously # simulated grid nodes: - if sstrat == 0: + if sstrat == 0: # print('searching for nearest data') - na = -1 # accounting for 0 as first index - if ndmax == 1: - dist = np.zeros(1); nums = np.zeros(1) - dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search - else: - dist, nums = tree.query(current_node,ndmax) + na = -1 # accounting for 0 as first index + if ndmax == 1: + dist = np.zeros(1); nums = np.zeros(1) + dist[0], nums[0] = tree.query(current_node,ndmax) # use kd tree for fast nearest data search + else: + dist, nums = tree.query(current_node,ndmax) # remove any data outside search radius # print('nums'); print(nums) # print('dist'); print(dist) - na = len(dist) - nums = nums[dist 0: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') - else: - for icut in range(0,ncut): - cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') - cnodeiv[ncut,:] = cnodev + ncnode, icnode, cnodev, cnodex, cnodey = srchnd(ix,iy,nx,ny,xmn,ymn,xsiz,ysiz,sim,noct,nodmax,ixnode,iynode,nlooku,nctx,ncty,UNEST) + if ncnode > 0: + for icut in range(0,ncut): + cnodeiv[icut,:] = np.where((cnodev <= thresh[icut] + 0.5) & (cnodev > thresh[icut] - 0.5), '1', '0') + else: + for icut in range(0,ncut): + cnodeiv[icut,:] = np.where(cnodev <= thresh[icut], '1', '0') + cnodeiv[ncut,:] = cnodev # print('indicator transformed nearest nodes'); print(cnodeiv) # print('srchnd'); print(ncnode,icnode,cnodev,cnodex,cnodey) # print('Result of srchnd, cnodex = '); print(cnodex) - nclose = na + nclose = na # print('*****srch node, nclose ' + str(nclose) + ', ncnode ' + str(ncnode)) # print('near data'); print(nums) # print('near data distance'); print(dist) # print('nums'); print(nums) # What cdf value are we looking for? - zval = UNEST - cdfval = np.random.rand() + zval = UNEST + cdfval = np.random.rand() # Use the global distribution? # check inputs # print('nst'); print(nst) - if nclose + ncnode <= 0: + if nclose + ncnode <= 0: # print('nclose & ncnode'); print(nclose, ncnode) - zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) - else: + zval = beyond(ivtype,ncut,thresh,gcdf,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + else: # print('kriging') # Estimate the local distribution by indicator kriging: # print('maxcov'); print(maxcov) - for ic in range(0,ncut): + for ic in range(0,ncut): # print('check kriging cov model- icut '); print(ic) # print('node data values for kriging'); print(cnodev) # print(cc[ic],aa[ic],it[ic],ang[ic],anis[ic],rotmat[ic],maxcov[ic]) #ccdf([ic] = krige(ix,iy,iz,xx,yy,zz,ic,cdf(ic),MAXCTX,MAXCTY,MAXCTZ,MAXKR1,ccdf(ic),MAXROT) - if ktype == 0: - gmean = gcdf[ic] - elif ktype == 2: - gmean = trend1d[index,ic] - else: - gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used + if ktype == 0: + gmean = gcdf[ic] + elif ktype == 2: + gmean = trend1d[index,ic] + else: + gmean = 0 # if locally variable mean it is set from trend in ikrige, otherwise not used # print('gmean'); print(gmean) - ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, + ccdf[ic], cstdev = ikrige(ix,iy,nx,ny,xx,yy,ktype,x,y,vr[:,ic],sec,colocorr,gmean,trend[:,ic],nums,cov_table,nctx,ncty, icnode,ixnode,iynode,cnodeiv[ic],cnodex,cnodey,nst[ic],c0[ic],9999.9,cc[ic],aa[ic],it[ic],ang[ic],anis[ic], rotmat[ic],maxcov[ic],MAXCTX,MAXCTY,MAXKR1,MAXKR2) # print('ccdf'); print(ccdf) # Correct order relations: - ccdfo = ordrel(ivtype,ncut,ccdf) + ccdfo = ordrel(ivtype,ncut,ccdf) # Draw from the local distribution: - zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) - sim[index] = zval + zval = beyond(ivtype,ncut,thresh,ccdfo,ng,gcut,gcdf,zmin,zmax,ltail,ltpar,middle,mpar,utail,utpar,zval,cdfval) + sim[index] = zval # print('zval'); print(zval) # END MAIN LOOP OVER SIMULATIONS: - for ind in range(0,nxy): - iy = int((ind)/nx) - ix = ind - (iy)*nx - sim_out[ireal,ny-iy-1,ix] = sim[ind] + for ind in range(0,nxy): + iy = int((ind)/nx) + ix = ind - (iy)*nx + sim_out[ireal,ny-iy-1,ix] = sim[ind] return sim_out @@ -4945,7 +4955,7 @@ def setrot3D(vario3D): alpha = (450.0 - ang_azi) * DEG2RAD beta = -1.0 * ang_dip *DEG2RAD theta = 0.0 * DEG2RAD # assume 0 plunge - + sina = np.sin(alpha) sinb = np.sin(beta) sint = np.sin(theta) @@ -4953,7 +4963,7 @@ def setrot3D(vario3D): cosb = np.cos(beta) cost = np.cos(theta) ### Construct the rotation matrix in the required memory - + afac1 = 1.0/max(anis_hori, EPSLON) afac2 = 1.0/max(anis_vert, EPSLON) rotmat = np.zeros((2,3,3)) @@ -5372,7 +5382,7 @@ def vmodel_3D( MAXROT=MAXNST+1 EPSLON = 1.0e-20 VERSION= 1.01 - + # Declare arrays index = np.zeros(nlag+1) h = np.zeros(nlag+1) @@ -5389,7 +5399,7 @@ def vmodel_3D( dip = np.zeros(nst) anis = np.zeros(nst) anis_v = np.zeros(nst) - + cc = vario['cc'] it = vario['it'] azi = vario['azi'] @@ -5400,7 +5410,7 @@ def vmodel_3D( xoff = math.sin(DEG2RAD*mazm)*math.cos(DEG2RAD*mdip)*xlag yoff = math.cos(DEG2RAD*mazm)*math.cos(DEG2RAD*mdip)*xlag zoff = math.sin(DEG2RAD*mdip)*xlag - print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff) + ',' + str(zoff)) + print(' x,y,z offsets = ' + str(xoff) + ',' + str(yoff) + ',' + str(zoff)) rotmat = setrot3D(vario) maxcov = vario['nug'] + np.sum(vario['cc']) xx = 0.0; yy = 0.0; zz = 0.0; @@ -5413,7 +5423,7 @@ def vmodel_3D( zz = zz + zoff gam[il] = maxcov - cov[il] ro[il] = cov[il]/maxcov - + # finished return index,h,gam,cov,ro @@ -5472,7 +5482,7 @@ def cova3(x1, y1, z1, x2, y2, z2, vario3D, rotmat, maxcov): :param it: TODO :param ang: TODO: not used :param anis: Horizontal aspect ratio - :param anis_v: Vertical aspect ratio + :param anis_v: Vertical aspect ratio :param rotmat: rotation matrices :param maxcov: TODO :return: TODO @@ -5499,7 +5509,7 @@ def cova3(x1, y1, z1, x2, y2, z2, vario3D, rotmat, maxcov): # dx1 = dx * rotmat[ist,0,0] + dy * rotmat[ist,0,1] + dz * rotmat[ist,0,2] # dy1 = (dx * rotmat[ist,1,0] + dy * rotmat[ist,1,1] + dz * rotmat[ist,1,2] ) / anis_hori # dz1 = (dx * rotmat[ist,2,0] + dy * rotmat[ist,2,1] + dz * rotmat[ist,2,2] ) / anis_vert - + dx1 = dx * rotmat[ist,0,0] + dy * rotmat[ist,0,1] + dz * rotmat[ist,0,2] dy1 = (dx * rotmat[ist,1,0] + dy * rotmat[ist,1,1] + dz * rotmat[ist,1,2] ) dz1 = (dx * rotmat[ist,2,0] + dy * rotmat[ist,2,1] + dz * rotmat[ist,2,2] ) @@ -5522,8 +5532,8 @@ def cova3(x1, y1, z1, x2, y2, z2, vario3D, rotmat, maxcov): # Gaussian model hh = -3.0 * (h * h) / (vario3D["hmaj"][ist] * vario3D["hmaj"][ist]) cova3_ = cova3_ + vario3D["cc"][ist] * np.exp(hh) - return cova3_ - + return cova3_ + @jit(nopython=True) def cova3_array(x1, y1, z1, x2, y2, z2, vario3D_array, rotmat, maxcov): """Calculate the covariance associated with a variogram model specified by a @@ -5565,7 +5575,7 @@ def cova3_array(x1, y1, z1, x2, y2, z2, vario3D_array, rotmat, maxcov): anis_hori = vario3D_array[ist*7+2+5] / vario3D_array[ist*7+2+4] anis_vert = vario3D_array[ist*7+2+6] / vario3D_array[ist*7+2+4] it = vario3D_array[ist*7+2+0] - + # Compute the appropriate structural distance # dx1 = dx * rotmat[ist,0,0] + dy * rotmat[ist,0,1] + dz * rotmat[ist,0,2] # dy1 = (dx * rotmat[ist,1,0] + dy * rotmat[ist,1,1] + dz * rotmat[ist,1,2] ) / anis_hori diff --git a/pyproject.toml b/pyproject.toml index f017627..515c306 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,13 @@ classifiers = [ ] requires-python = ">=3.7" dependencies = [ + "numba", + "scipy", + "numpy", + "pandas", + "matplotlib", + "statsmodels", + "tqdm" ] [project.urls] @@ -35,3 +42,15 @@ include = [ [tool.setuptools.dynamic] version = {attr = "geostatspy.__version__"} + +[tool.pytest.ini_options] +addopts = "-v" +testpaths = [ + "tests" +] + +[project.optional-dependencies] +test = [ + "pytest", + "pytest-cov" +] \ No newline at end of file diff --git a/tests/test_GSLIB.py b/tests/test_GSLIB.py new file mode 100644 index 0000000..1822907 --- /dev/null +++ b/tests/test_GSLIB.py @@ -0,0 +1,186 @@ +import pytest +import os +import pandas as pd +from geostatspy.GSLIB import kb2d, sgsim + + +def test_kb2d(): + # Create dummy data + data = { + 'xcol': [0, 1, 2, 3, 4], + 'ycol': [0, 1, 2, 3, 4], + 'varcol': [0.1, 0.2, 0.3, 0.4, 0.5] + } + df = pd.DataFrame(data) + + # Variogram model parameters + var_model = { + 'nug': 0.0, + 'nst': 2, + 'hmaj1': 600, + 'hmaj2': 2300, + 'hmin1': 500, + 'hmin2': 2200, + 'azi1': 5, + 'azi2': 10, + 'it1': 2, + 'it2': 3, + 'cc1': 0.08, + 'cc2': 0.22 + } + + # Setup kb2d parameters + nx = 10 + ny = 10 + hsiz = 1.0 + + # Call kb2d with dry_run=True to generate parameter file without executing kriging + kb2d(df, 'xcol', 'ycol', 'varcol', nx, ny, hsiz, var_model, dry_run=True) + + # Read back the parameters from the file + params = {} + with open('kb2d.par', 'r') as file: + for i, line in enumerate(file): + split_line = line.strip().split() + if i == 9: + assert split_line[0] == 'tmp.dat', "Incorrect data file in kb2d.par" + elif i == 10: + assert int(split_line[0]) == nx, "Incorrect nx in kb2d.par" + assert float(split_line[1]) == 0.5, "Incorrect xmn in kb2d.par" + assert float(split_line[2]) == hsiz, "Incorrect xsiz in kb2d.par" + elif i == 11: + assert int(split_line[0]) == ny, "Incorrect ny in kb2d.par" + assert float(split_line[1]) == 0.5, "Incorrect ymn in kb2d.par" + assert float(split_line[2]) == hsiz, "Incorrect ysiz in kb2d.par" + elif i == 13: + assert int(split_line[0]) == 1, "Incorrect ndmin in kb2d.par" + assert int(split_line[1]) == 30, "Incorrect ndmax in kb2d.par" + elif i == 14: + assert float(split_line[0]) == 2300, "Incorrect max_radius in kb2d.par" + elif i == 16: + params['nst'] = int(split_line[0]) + params['nug'] = float(split_line[1]) + elif i == 17: + params['it1'] = int(split_line[0]) + params['cc1'] = float(split_line[1]) + params['azi1'] = float(split_line[2]) + params['hmaj1'] = float(split_line[3]) + params['hmin1'] = float(split_line[4]) + elif i == 18: + params['it2'] = int(split_line[0]) + params['cc2'] = float(split_line[1]) + params['azi2'] = float(split_line[2]) + params['hmaj2'] = float(split_line[3]) + params['hmin2'] = float(split_line[4]) + + for key, value in var_model.items(): + assert params[key] == value, f"Incorrect {key} in variogram model in kb2d.par" + + # Clean up temporary files + os.remove('kb2d.par') + os.remove('data_temp.dat') + + +def test_sgsim(): + # Create dummy data + data = { + 'xcol': [0, 1, 2, 3, 4], + 'zcol': [0, 1, 2, 3, 4], + 'ycol': [0, 1, 2, 3, 4], + 'wtcol': [1, 1, 1, 1, 1], + 'varcol': [0.1, 0.2, 0.3, 0.4, 0.5] + } + df = pd.DataFrame(data) + + # Variogram model parameters for sgsim + var_model = { + 'nug': 0.05, + 'nst': 2, + 'c': [0.5, 0.45], + 'tstr': [1, 2], + 'ahmax': [2200, 600], + 'ahmin': [2100, 500], + 'ahvert': [400, 50], + 'ang1': [5, 8], + 'ang2': [80, 83], + 'ang3': [30, 33], + } + + # Setup sgsim parameters + nx, ny, nz = 10, 10, 10 + dx, dy, dz = 0.5, 0.5, 0.5 + + # Call sgsim with dry_run=True to generate sgsim.par + sgsim(1, df, 'varcol', var_model, + xcol='xcol', ycol='ycol', zcol='zcol', wtcol='wtcol', + nx=nx, ny=ny, nz=30, dx=dx, dy=dy, dz=dz, + ndmin=3, ndmax=30, dry_run=True) + + # Read back the parameters from the file + params = {} + with open('sgsim.par', 'r') as file: + for i, line in enumerate(file): + split_line = line.strip().split() + if i == 5: + assert int(split_line[0]) == 2, "Incorrect xcol in sgsim.par" + assert int(split_line[1]) == 3, "Incorrect ycol in sgsim.par" + assert int(split_line[2]) == 4, "Incorrect zcol in sgsim.par" + assert int(split_line[3]) == 1, "Incorrect vcol in sgsim.par" + assert int(split_line[4]) == 5, "Incorrect wtcol in sgsim.par" + elif i == 12: + assert float(split_line[0]) == 0.1, "Incorrect lower tail in sgsim.par" + assert float(split_line[1]) == 0.5, "Incorrect upper tail in sgsim.par" + elif i == 18: + assert int(split_line[0]) == 1, "Incorrect nsim in sgsim.par" + elif i == 19: + assert int(split_line[0]) == 10, "Incorrect nx in sgsim.par" + assert float(split_line[1]) == 0.25, "Incorrect xmin in sgsim.par" + assert float(split_line[2]) == 0.5, "Incorrect dx in sgsim.par" + elif i == 20: + assert int(split_line[0]) == 10, "Incorrect ny in sgsim.par" + assert float(split_line[1]) == 0.25, "Incorrect ymin in sgsim.par" + assert float(split_line[2]) == 0.5, "Incorrect dy in sgsim.par" + elif i == 21: + assert int(split_line[0]) == 30, "Incorrect nz in sgsim.par" + assert float(split_line[1]) == 0.25, "Incorrect zmin in sgsim.par" + assert float(split_line[2]) == 0.5, "Incorrect dz in sgsim.par" + elif i == 23: + assert int(split_line[0]) == 3, "Incorrect ndmin in sgsim.par" + assert int(split_line[1]) == 30, "Incorrect ndmax in sgsim.par" + elif i == 24: + assert int(split_line[0]) == 12, "Incorrect # of simulated nodes in sgsim.par" + elif i == 28: + assert float(split_line[0]) == var_model['ahmax'][0], "Incorrect search radius" + assert float(split_line[1]) == var_model['ahmin'][0], "Incorrect search radius" + assert float(split_line[2]) == var_model['ahvert'][0], "Incorrect search radius" + elif i == 29: + assert float(split_line[0]) == var_model['ang1'][0], "Incorrect search ellipsoid" + assert float(split_line[1]) == var_model['ang2'][0], "Incorrect search ellipsoid" + assert float(split_line[2]) == var_model['ang3'][0], "Incorrect search ellipsoid" + elif i == 34: + assert int(split_line[0]) == 2, "Incorrect nst effect in sgsim.par" + assert float(split_line[1]) == var_model['nug'], "Incorrect nugget in sgsim.par" + elif i == 35: + assert int(split_line[0]) == 1, "Incorrect it1 in sgsim.par" + assert float(split_line[1]) == var_model['c'][0], "Incorrect cc1 in sgsim.par" + assert float(split_line[2]) == var_model['ang1'][0], "Incorrect ang1 in sgsim.par" + assert float(split_line[3]) == var_model['ang2'][0], "Incorrect ang2 in sgsim.par" + assert float(split_line[4]) == var_model['ang3'][0], "Incorrect ang3 in sgsim.par" + elif i == 36: + assert float(split_line[0]) == var_model['ahmax'][0], 'Incorrect ahmax in sgsim.par' + assert float(split_line[1]) == var_model['ahmin'][0], 'Incorrect ahmin in sgsim.par' + assert float(split_line[2]) == var_model['ahvert'][0], 'Incorrect ahvert in sgsim.par' + elif i == 37: + assert int(split_line[0]) == 2, "Incorrect it2 in sgsim.par" + assert float(split_line[1]) == var_model['c'][1], "Incorrect cc2 in sgsim.par" + assert float(split_line[2]) == var_model['ang1'][1], "Incorrect ang1 in sgsim.par" + assert float(split_line[3]) == var_model['ang2'][1], "Incorrect ang2 in sgsim.par" + assert float(split_line[4]) == var_model['ang3'][1], "Incorrect ang3 in sgsim.par" + elif i == 38: + assert float(split_line[0]) == var_model['ahmax'][1], 'Incorrect ahmax in sgsim.par' + assert float(split_line[1]) == var_model['ahmin'][1], 'Incorrect ahmin in sgsim.par' + assert float(split_line[2]) == var_model['ahvert'][1], 'Incorrect ahvert in sgsim.par' + + # Clean up temporary files + os.remove('sgsim.par') + os.remove('data_temp.dat') \ No newline at end of file diff --git a/tests/test_data/test_declus.csv b/tests/test_data/test_declus.csv new file mode 100644 index 0000000..fc64d5a --- /dev/null +++ b/tests/test_data/test_declus.csv @@ -0,0 +1,271 @@ +X,Y,Por +290.0,869.0,19.91895970753067 +140.0,779.0,23.18192978847793 +180.0,619.0,16.87956433428224 +480.0,19.0,14.563101544862107 +130.0,809.0,21.566379372288836 +190.0,849.0,24.778735829018032 +70.0,889.0,21.589075879156816 +750.0,949.0,13.20365355450575 +90.0,749.0,21.635480931531536 +80.0,889.0,27.004395861034922 +20.0,919.0,22.6942213024491 +250.0,769.0,20.646681106584964 +780.0,9.0,20.420621975764533 +50.0,459.0,12.328778980505025 +80.0,959.0,24.648893635394252 +250.0,919.0,26.038081422197514 +170.0,749.0,27.067270659730447 +180.0,999.0,24.599068079427518 +430.0,929.0,19.09418827020901 +440.0,259.0,11.435087304428782 +450.0,9.0,13.715613897280441 +50.0,829.0,22.392680430350254 +120.0,809.0,23.175303436855362 +180.0,969.0,22.356170355870788 +870.0,249.0,16.846657323334178 +210.0,139.0,18.48377870867734 +360.0,169.0,13.29816458196088 +40.0,749.0,19.836252294232022 +420.0,929.0,17.25118275432911 +370.0,639.0,16.23488959359062 +790.0,429.0,15.286817877233489 +830.0,9.0,20.396410298065536 +80.0,979.0,23.590791696701302 +270.0,829.0,21.903816441562032 +0.0,79.0,20.769133262205706 +420.0,989.0,17.086116366945255 +730.0,69.0,13.15974580586624 +310.0,949.0,24.2213229133554 +20.0,229.0,13.637288328530772 +230.0,869.0,21.39334427217903 +100.0,59.0,18.196081500805597 +300.0,959.0,22.215107643014026 +560.0,89.0,13.711914009572384 +410.0,829.0,15.674229399071473 +80.0,689.0,19.579052343283653 +10.0,799.0,24.31949261381442 +130.0,909.0,24.287551641421764 +0.0,59.0,15.713142430095315 +150.0,559.0,12.632539194104517 +290.0,699.0,22.706156688317854 +490.0,159.0,13.531384990700456 +830.0,479.0,16.069237770923092 +360.0,249.0,11.562936540324303 +300.0,639.0,18.378080552616755 +50.0,949.0,25.927230929687283 +750.0,899.0,13.537693955185933 +320.0,709.0,21.6996156510517 +100.0,489.0,13.772092415188236 +870.0,139.0,17.888939768002835 +710.0,49.0,16.151435271267538 +920.0,49.0,19.989605500889624 +870.0,489.0,15.10482191311221 +110.0,79.0,18.841969967589694 +270.0,949.0,21.66430612365115 +340.0,869.0,17.846177321610973 +925.0,325.0,13.532441662584597 +80.0,29.0,21.64699189274554 +740.0,349.0,19.8185179818503 +720.0,289.0,15.168149368484782 +170.0,909.0,24.977843805883772 +10.0,869.0,21.933494492399426 +280.0,259.0,14.534612515318027 +300.0,859.0,20.909028325492198 +320.0,519.0,15.358028958924045 +670.0,729.0,11.25750092466748 +790.0,99.0,19.820810278904283 +100.0,79.0,15.4779387341865 +50.0,989.0,22.719282032766007 +100.0,729.0,14.42787310063987 +610.0,709.0,12.236954488404146 +170.0,869.0,22.248923303172745 +280.0,829.0,23.26994997908278 +90.0,49.0,16.150751023819957 +30.0,979.0,19.644446706544834 +300.0,999.0,19.68751362384349 +410.0,909.0,16.08300904836243 +410.0,439.0,20.610354949135374 +830.0,419.0,13.377973179848198 +125.0,875.0,23.32517077393795 +160.0,699.0,21.77832661207232 +910.0,79.0,20.099189166374728 +790.0,819.0,9.510972147394343 +20.0,639.0,11.225054081529088 +420.0,369.0,14.456974214731904 +610.0,519.0,12.550036926925625 +70.0,599.0,16.977271275018598 +730.0,509.0,13.117774189065267 +50.0,139.0,17.17983056596174 +430.0,669.0,16.118478312478928 +280.0,849.0,20.418969357084826 +240.0,769.0,20.665109737627542 +240.0,689.0,23.403507054786076 +770.0,909.0,15.497229586407842 +940.0,349.0,13.436354645479325 +490.0,179.0,14.283914485521393 +60.0,49.0,18.576034585105177 +160.0,949.0,25.809264194992124 +250.0,259.0,11.274287600571164 +780.0,79.0,15.755543169248291 +120.0,949.0,22.485452686390037 +40.0,79.0,15.62095758794202 +20.0,29.0,17.704460413531557 +340.0,919.0,18.568582497580426 +370.0,969.0,16.136983255373927 +120.0,989.0,21.80816831836151 +120.0,769.0,21.744020292162634 +720.0,119.0,15.705813735898031 +140.0,259.0,15.33269096983388 +200.0,939.0,22.013782784006054 +160.0,969.0,26.023676068001137 +100.0,899.0,20.731537269002402 +630.0,469.0,10.342825783832092 +390.0,99.0,17.47755023464745 +100.0,979.0,23.95676013711359 +180.0,939.0,23.97923099645694 +160.0,419.0,12.679975875912874 +180.0,929.0,23.967664118031216 +910.0,69.0,20.9867325932849 +110.0,749.0,19.60177492219071 +40.0,719.0,15.02077336385892 +90.0,39.0,19.603568031031546 +160.0,869.0,26.728055053270463 +550.0,299.0,9.645082084754822 +270.0,929.0,19.867456656304473 +170.0,689.0,19.19584364364689 +10.0,19.0,18.402477452804586 +610.0,559.0,17.573787545748075 +200.0,989.0,20.89186299701817 +230.0,889.0,19.85807105247154 +380.0,629.0,11.098159098520648 +330.0,269.0,14.310262679485769 +725.0,325.0,20.727860047530523 +230.0,169.0,13.052648625972065 +170.0,499.0,18.04100358781941 +140.0,219.0,14.109193323504474 +590.0,79.0,13.97368724250824 +330.0,399.0,13.343616574519821 +20.0,739.0,15.65983650145937 +75.0,875.0,21.63803085765802 +870.0,59.0,19.41104394690114 +90.0,29.0,20.70594415754922 +110.0,699.0,19.347026862570427 +920.0,129.0,19.715145068739453 +260.0,159.0,18.408813186832447 +40.0,959.0,22.905382574475595 +400.0,49.0,14.884529144228466 +190.0,809.0,21.69659139945754 +750.0,99.0,17.306230774266407 +870.0,939.0,14.077357899907021 +330.0,209.0,17.23432406798434 +230.0,139.0,18.37698611117252 +90.0,919.0,24.593446116103465 +380.0,699.0,18.553936355931086 +230.0,999.0,21.2480384616009 +20.0,989.0,21.531914729842313 +280.0,659.0,19.399546559455825 +0.0,969.0,22.745657388100575 +125.0,275.0,11.632724438820354 +670.0,349.0,19.77105233784861 +350.0,59.0,17.77924079914838 +290.0,449.0,13.199714762838452 +220.0,859.0,21.336353684817823 +150.0,939.0,24.450753661144727 +80.0,799.0,22.648503923675115 +70.0,969.0,22.965596103369883 +860.0,949.0,14.860537860709496 +125.0,925.0,24.265082419809218 +140.0,769.0,23.487903598576665 +250.0,959.0,23.332571286207347 +350.0,809.0,20.094252923131144 +180.0,479.0,11.81491246310133 +180.0,949.0,26.40048020141171 +470.0,659.0,10.358774941331163 +80.0,919.0,26.45893983313578 +630.0,589.0,13.74062883810807 +710.0,269.0,20.971971151392697 +890.0,339.0,12.152673925714609 +270.0,969.0,24.761210161815832 +460.0,289.0,12.263892375504025 +550.0,779.0,14.926871172193001 +80.0,929.0,22.97398748489903 +125.0,225.0,14.312134939808988 +210.0,39.0,15.212822321836109 +460.0,969.0,16.17845185556444 +540.0,89.0,11.343006364463598 +720.0,319.0,15.790511184186308 +790.0,679.0,9.224354317579358 +660.0,399.0,14.690445610388872 +420.0,959.0,18.66267854095298 +860.0,629.0,13.634834070609575 +0.0,809.0,23.04000008767983 +290.0,979.0,22.35329350417443 +150.0,719.0,22.061525032215837 +330.0,939.0,23.013928935708513 +400.0,539.0,11.773805451163012 +370.0,569.0,12.786093159999458 +150.0,829.0,25.315865320989857 +0.0,709.0,17.263970593830805 +110.0,729.0,20.74493242613012 +160.0,79.0,19.787303849866767 +950.0,89.0,15.745460362563568 +660.0,639.0,16.196469804902083 +430.0,539.0,14.17401386173268 +630.0,329.0,16.3385512570378 +370.0,209.0,11.42710137699514 +175.0,925.0,22.707997907227195 +40.0,59.0,20.796081367496537 +380.0,419.0,19.63261615907623 +220.0,29.0,20.749663545376 +300.0,989.0,21.731316625465055 +750.0,169.0,15.05029687112512 +870.0,199.0,16.291435406526197 +750.0,159.0,15.522401882905992 +860.0,889.0,12.507532603047933 +225.0,825.0,24.077199812068045 +110.0,709.0,18.841627781830205 +180.0,709.0,17.8580601936863 +660.0,259.0,18.863479713245294 +880.0,219.0,19.95431912402367 +200.0,99.0,14.205996115856113 +700.0,699.0,12.540393626728205 +310.0,819.0,19.328055788144674 +190.0,969.0,21.37389412664047 +280.0,89.0,17.63081558921223 +320.0,869.0,22.71292029311281 +240.0,979.0,27.594891730048346 +70.0,609.0,11.484916816133676 +790.0,9.0,19.497026454079144 +660.0,429.0,13.796814542763622 +370.0,589.0,12.502070656812164 +330.0,969.0,19.42695392142408 +10.0,189.0,15.161882168479636 +90.0,669.0,13.370517266632213 +180.0,9.0,16.861891781151318 +260.0,979.0,22.084158665807113 +125.0,75.0,18.597037074361882 +110.0,119.0,17.02412811536146 +10.0,839.0,19.061938325322622 +940.0,169.0,17.183274246584432 +160.0,909.0,24.778903844263667 +870.0,129.0,18.231025761839703 +180.0,809.0,22.4887400862943 +610.0,389.0,13.05664383100119 +100.0,779.0,24.309250875393268 +280.0,669.0,17.975969703031634 +290.0,989.0,21.630859657865667 +180.0,589.0,13.374971590261227 +170.0,779.0,23.625849018647962 +160.0,999.0,23.936376546306196 +310.0,849.0,21.46689397897487 +800.0,9.0,22.18001108481895 +570.0,309.0,12.596271663645197 +190.0,989.0,22.336602991891173 +740.0,229.0,19.351773998637483 +710.0,449.0,18.07596954973169 +530.0,539.0,13.339673026507645 +980.0,839.0,10.857180101732546 +220.0,879.0,20.758688676981702 +620.0,549.0,13.495207950133771 +350.0,859.0,19.95718334280078 diff --git a/tests/test_geostats.py b/tests/test_geostats.py new file mode 100644 index 0000000..0e1035e --- /dev/null +++ b/tests/test_geostats.py @@ -0,0 +1,49 @@ +import pytest +import numpy as np +import pandas as pd +from geostatspy.geostats import backtr, gcum, declus + + +def test_backtr(): + # Setup + df = pd.DataFrame({'values': [0.1, 0.5, 0.9]}) + vr = np.array([0, 0.3, 0.6, 1.0]) + vrg = np.array([0, 0.3, 0.7, 1.0]) + zmin, zmax = 0, 1.0 + ltail, ltpar, utail, utpar = 1, 1.5, 1, 1.5 + + # Expected output + expected_backtr = np.array([0.1, 0.5, 0.9]) # Simplified expected result for demonstration + + # Test + result = backtr(df, 'values', vr, vrg, zmin, zmax, ltail, ltpar, utail, utpar) + assert np.allclose(result, expected_backtr, atol=0.1), "Back transformation did not match expected values" + + +def test_gcum(): + # Test value + x_values = np.array([-1.96, 0, 1.96]) + # Expected probabilities for these z-scores + expected_probs = np.array([0.025, 0.5, 0.975]) + + # Test + results = np.array([gcum(x) for x in x_values]) + assert np.allclose(results, expected_probs, atol=0.01), "Cumulative probabilities did not match expected values" + + +def test_declus(): + df = pd.read_csv('tests/test_data/test_declus.csv') + + ncell = 100 + cmin, cmax = 1, 5000 + noff = 10 + iminmax = 1 + + wts, cell_sizes, dmeans = declus(df, 'X', 'Y', 'Por', iminmax=iminmax, noff=noff, + ncell=ncell, cmin=cmin, cmax=cmax, verbose=False) + + correct_wts = np.array([0.35, 0.27, 0.92, 2.06, 0.27]) + correct_dmeans = np.array([18.38, 18.38, 17.14, 16.29, 16.05]) + + assert np.allclose(wts[:5], correct_wts, atol=0.1), "Declus weights did not match expected values" + assert np.allclose(dmeans[:5], correct_dmeans, atol=0.1), "Declus declustered means did not match expected values"