From e1cf6af2fa7979ce9966f813692ea64ec8584f45 Mon Sep 17 00:00:00 2001 From: yisangriB Date: Tue, 10 Mar 2026 22:35:38 -0500 Subject: [PATCH] sy - updating quoFEM with the latest stochastic emulator code --- modules/performFEM/surrogateGP/gpPredict.py | 267 ++++++-- .../performUQ/SimCenterUQ/surrogateBuild.py | 591 +++++++++++------- 2 files changed, 559 insertions(+), 299 deletions(-) diff --git a/modules/performFEM/surrogateGP/gpPredict.py b/modules/performFEM/surrogateGP/gpPredict.py index e73f7df9e..8874bc1b1 100644 --- a/modules/performFEM/surrogateGP/gpPredict.py +++ b/modules/performFEM/surrogateGP/gpPredict.py @@ -147,6 +147,7 @@ def error_warning(msg): did_stochastic = sur['doStochastic'] did_logtransform = sur['doLogtransform'] did_normalization = sur['doNormalization'] + did_multioutput_corr = sur.get('doMultiOutputCorr', False) # noqa: N806 kernel = sur['kernName'] if kernel == 'Radial Basis': @@ -214,7 +215,7 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 self.set_XY(X, Y) - def get_stochastic_variance(X, Y, x, ny): # noqa: C901, N803 + def get_stochastic_variance(X, Y, x, ny, did_multioutput_corr=False, var_pred_train=None, var_pred_test=None): # noqa: C901, N803 # X_unique, X_idx, indices, counts = np.unique(X, axis=0, return_index=True, return_counts=True, return_inverse=True) X_unique, dummy, indices, counts = np.unique( # noqa: N806 X, axis=0, return_index=True, return_counts=True, return_inverse=True @@ -270,53 +271,66 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: C901, N803 X_unique = X # noqa: N806 Y_mean = Y # noqa: N806 indices = range(Y.shape[0]) + if not did_multioutput_corr: - # - # check if we have an old example file - to be deleted in the future - # - old_version = False - for key, val in sur['modelInfo'][g_name_sur[ny] + '_Var'].items(): # noqa: B007, PERF102 - if 'sum' in key: - old_version = True - break - - if old_version: - print( # noqa: T201 - 'The surrogate model was trained using an older version of the tool. Please retrain the model using this version or use older version.', - file=sys.stderr, + # + # check if we have an old example file - to be deleted in the future + # + old_version = False + for key, val in sur['modelInfo'][g_name_sur[ny] + '_Var'].items(): # noqa: B007, PERF102 + if 'sum' in key: + old_version = True + break + + if old_version: + print( # noqa: T201 + 'The surrogate model was trained using an older version of the tool. Please retrain the model using this version or use older version.', + file=sys.stderr, + ) + exit(-1) # noqa: PLR1722 + + log_vars = np.atleast_2d( + sur['modelInfo'][g_name_sur[ny] + '_Var']['TrainingSamplesY'] + ).T + + kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True) + + m_var = GPy.models.GPRegression( + X, log_vars, kernel_var, normalizer=True, Y_metadata=None ) - exit(-1) # noqa: PLR1722 - - log_vars = np.atleast_2d( - sur['modelInfo'][g_name_sur[ny] + '_Var']['TrainingSamplesY'] - ).T - kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True) + # print("Variance field obtained for ny={}".format(ny)) + for key, val in sur['modelInfo'][g_name_sur[ny] + '_Var'].items(): # noqa: B007, PERF102 + exec('m_var.' + key + '= np.array(val)') # noqa: S102 - m_var = GPy.models.GPRegression( - X, log_vars, kernel_var, normalizer=True, Y_metadata=None - ) + if not did_multioutput_corr: + log_var_pred, dum = m_var.predict(X) + var_pred = np.exp(log_var_pred) + else: + # in fact it was not log + var_pred, dum = m_var.predict(X) - # print("Variance field obtained for ny={}".format(ny)) - for key, val in sur['modelInfo'][g_name_sur[ny] + '_Var'].items(): # noqa: B007, PERF102 - exec('m_var.' + key + '= np.array(val)') # noqa: S102 + if did_normalization: + # Y_normFact = np.var(Y) # noqa: N806, RUF100 + Y_normFact = np.mean(var_pred.T[0]) # noqa: N806 - log_var_pred, dum = m_var.predict(X) - var_pred = np.exp(log_var_pred) + else: + Y_normFact = 1 # noqa: N806 - if did_normalization: - # Y_normFact = np.var(Y) # noqa: N806, RUF100 - Y_normFact = np.mean(var_pred.T[0]) # noqa: N806 + norm_var_str = ( + (var_pred.T[0]) / Y_normFact + ) # if normalization was used.. + log_var_pred_x, dum = m_var.predict(x) + nugget_var_pred_x = np.exp(log_var_pred_x.T[0]) / Y_normFact else: - Y_normFact = 1 # noqa: N806 - - norm_var_str = ( - (var_pred.T[0]) / Y_normFact - ) # if normalization was used.. + if did_normalization: + Y_normFact = np.mean(var_pred_train.T[0]) + else: + Y_normFact = 1 + norm_var_str = (var_pred_train.T[0]/Y_normFact) + nugget_var_pred_x = var_pred_test/Y_normFact - log_var_pred_x, dum = m_var.predict(x) - nugget_var_pred_x = np.exp(log_var_pred_x.T[0]) / Y_normFact return ( X_unique, @@ -326,6 +340,65 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: C901, N803 nugget_var_pred_x, np.var(Y_mean), ) + + + def get_stochastic_correlation(X, Y, x): # noqa: C901, N803 + + y_dim = Y.shape[1] + # + # Tertiary results to reconstruct cov observations.. + # + n_samp = X.shape[0] + n_new_samp = x.shape[0] + + Y_pre_mean = np.zeros((n_samp, y_dim)) + for ny in range(y_dim): + Y_pre_mean[:,ny:ny+1] = np.atleast_2d(sur['modelInfo'][g_name_sur[ny] + '_TertMean']['TrainingSamplesY']).T + + + tril_idx = np.tril_indices(y_dim) + diag_in_vec = np.where(tril_idx[0] == tril_idx[1])[0] + n_elem = len(tril_idx[0]) + + + cov_vec = np.zeros([n_samp, n_elem]) + for ns in range(n_samp): + cov_samp = (Y_pre_mean[ns:ns+1,:]-Y[ns:ns+1,:]).T @ (Y_pre_mean[ns:ns+1,:]-Y[ns:ns+1,:]) + cov_vec[ns,:] = cov_samp[tril_idx] + + kernel_var = GPy.kern.Matern52(input_dim=nrv_sur, ARD=True) + + gps = [] + cov_pred = np.zeros([n_new_samp, n_elem]) + cov_pred_train = np.zeros([n_samp, n_elem]) + for i in range(n_elem): + gp = GPy.models.GPRegression(X.copy(), cov_vec[:, i:i+1], kernel=kernel_var) + + # Use the parameter values of 1st variance (it is parallel GP, so the parameters are the same) + for key, val in sur['modelInfo'][g_name_sur[0] + '_Var'].items(): # noqa: B007, PERF102 + exec('gp.' + key + '= np.array(val)') # noqa: S102 + + gps.append(gp) + cov_pred[:,i:i+1], dum = gp.predict(x) + cov_pred_train[:,i:i+1], dum = gp.predict(X) + + cov_pred, projection_counter = make_psd(cov_pred,1.e-6, tril_idx, y_dim) # ensure PSD + cov_pred_train, projection_counter = make_psd(cov_pred_train,1.e-6, tril_idx, y_dim) # ensure PSD + + + var_pred = cov_pred[:,diag_in_vec] + corr_pred = np.zeros_like(cov_pred) + idx = 0 + for i in range(y_dim): + for j in range(i+1): + corr_pred[:,idx] = cov_pred[:,idx] / np.sqrt(var_pred[:,i] * var_pred[:,j]) + idx += 1 + + + var_pred_test = var_pred + var_pred_train = cov_pred_train[:,diag_in_vec] + return corr_pred, var_pred_train, var_pred_test, tril_idx + # REQUIRED: rv_name, y_var @@ -569,6 +642,10 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: C901, N803 nugget_var_list = [0] * ng_sur if not did_mf: + + if did_multioutput_corr: + cor_vect, var_pred_train, var_pred_test, tril_idx = get_stochastic_correlation(X, Y, rv_val) + for ny in range(ng_sur): if did_stochastic[ny]: m_list = m_list + [ # noqa: RUF005 @@ -586,18 +663,27 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: C901, N803 counts, nugget_var_pred, Y_normFact, # noqa: N806 - ) = get_stochastic_variance(X, Y[:, ny][np.newaxis].T, rv_val, ny) + ) = get_stochastic_variance(X, Y[:, ny][np.newaxis].T, rv_val, ny, did_multioutput_corr, var_pred_train[:,ny:ny+1], var_pred_test[:,ny:ny+1]) Y_metadata = {'variance_structure': norm_var_str / counts} # noqa: N806 m_list[ny].set_XY2(X_unique, Y_mean, Y_metadata=Y_metadata) for key, val in sur['modelInfo'][g_name_sur[ny]].items(): # noqa: B007, PERF102 exec('m_list[ny].' + key + '= np.array(val)') # noqa: S102 + if did_normalization: + norm_var = m_list[ny].normalizer.std**2 + else: + norm_var = 1 + nugget_var_list[ny] = ( m_list[ny].Gaussian_noise.parameters * nugget_var_pred - * Y_normFact + * norm_var ) + # + # Get output + # + else: m_list = m_list + [ # noqa: RUF005 GPy.models.GPRegression( @@ -644,83 +730,119 @@ def get_stochastic_variance(X, Y, x, ny): # noqa: C901, N803 y_q1m = np.zeros([nsamp, y_dim]) y_q3m = np.zeros([nsamp, y_dim]) + y_pred_median_tmp = np.zeros([nsamp, y_dim]) for ny in range(y_dim): y_data_var[:, ny] = np.var(m_list[ny].Y) if ny in constIdx: - y_pred_median_tmp, y_pred_var_tmp[ny], y_pred_var_m_tmp[ny] = ( + y_pred_median_tmp[:,ny], y_pred_var_tmp[ny], y_pred_var_m_tmp[ny] = ( np.ones([nsamp]) * constVal[constIdx.index(ny)], np.zeros([nsamp]), np.zeros([nsamp]), ) else: - y_pred_median_tmp, y_pred_var_tmp_tmp = predict( + y_pred_median_tmp[:,ny:ny+1], y_pred_var_tmp_tmp = predict( m_list[ny], rv_val, did_mf ) # noiseless - y_pred_median_tmp = np.squeeze(y_pred_median_tmp) + y_pred_median_tmp[:,ny] = np.squeeze(y_pred_median_tmp[:,ny]) y_pred_var_tmp_tmp = np.squeeze(y_pred_var_tmp_tmp) if did_linear: y_lin_pred = lin_list[ny].predict(rv_val[:, lin_index]) - y_pred_median_tmp = y_pred_median_tmp + y_lin_pred + y_pred_median_tmp[:,ny] = y_pred_median_tmp[:,ny] + y_lin_pred y_pred_var_tmp[:, ny] = y_pred_var_tmp_tmp y_pred_var_m_tmp[:, ny] = y_pred_var_tmp_tmp + np.squeeze( nugget_var_list[ny] ) - y_samp_tmp = np.random.normal( - y_pred_median_tmp, np.sqrt(y_pred_var_m_tmp[:, ny]) - ) if did_logtransform: - y_pred_median[:, ny] = np.exp(y_pred_median_tmp) + y_pred_median[:, ny] = np.exp(y_pred_median_tmp[:,ny]) y_pred_var[:, ny] = np.exp( - 2 * y_pred_median_tmp + y_pred_var_tmp[:, ny] + 2 * y_pred_median_tmp[:,ny] + y_pred_var_tmp[:, ny] ) * (np.exp(y_pred_var_tmp[:, ny]) - 1) y_pred_var_m[:, ny] = np.exp( - 2 * y_pred_median_tmp + y_pred_var_m_tmp[:, ny] + 2 * y_pred_median_tmp[:,ny] + y_pred_var_m_tmp[:, ny] ) * (np.exp(y_pred_var_m_tmp[:, ny]) - 1) - y_samp[:, ny] = np.exp(y_samp_tmp) y_q1[:, ny] = lognorm.ppf( 0.05, s=np.sqrt(y_pred_var_tmp[:, ny]), - scale=np.exp(y_pred_median_tmp), + scale=np.exp(y_pred_median_tmp[:,ny]), ) y_q3[:, ny] = lognorm.ppf( 0.95, s=np.sqrt(y_pred_var_tmp[:, ny]), - scale=np.exp(y_pred_median_tmp), + scale=np.exp(y_pred_median_tmp[:,ny]), ) y_q1m[:, ny] = lognorm.ppf( 0.05, s=np.sqrt(y_pred_var_m_tmp[:, ny]), - scale=np.exp(y_pred_median_tmp), + scale=np.exp(y_pred_median_tmp[:,ny]), ) y_q3m[:, ny] = lognorm.ppf( 0.95, s=np.sqrt(y_pred_var_m_tmp[:, ny]), - scale=np.exp(y_pred_median_tmp), + scale=np.exp(y_pred_median_tmp[:,ny]), ) else: - y_pred_median[:, ny] = y_pred_median_tmp + y_pred_median[:, ny] = y_pred_median_tmp[:,ny] y_pred_var[:, ny] = y_pred_var_tmp[:, ny] y_pred_var_m[:, ny] = y_pred_var_m_tmp[:, ny] - y_samp[:, ny] = y_samp_tmp y_q1[:, ny] = norm.ppf( - 0.05, loc=y_pred_median_tmp, scale=np.sqrt(y_pred_var_tmp[:, ny]) + 0.05, loc=y_pred_median_tmp[:,ny], scale=np.sqrt(y_pred_var_tmp[:, ny]) ) y_q3[:, ny] = norm.ppf( - 0.95, loc=y_pred_median_tmp, scale=np.sqrt(y_pred_var_tmp[:, ny]) + 0.95, loc=y_pred_median_tmp[:,ny], scale=np.sqrt(y_pred_var_tmp[:, ny]) ) y_q1m[:, ny] = norm.ppf( - 0.05, loc=y_pred_median_tmp, scale=np.sqrt(y_pred_var_m_tmp[:, ny]) + 0.05, loc=y_pred_median_tmp[:,ny], scale=np.sqrt(y_pred_var_m_tmp[:, ny]) ) y_q3m[:, ny] = norm.ppf( - 0.95, loc=y_pred_median_tmp, scale=np.sqrt(y_pred_var_m_tmp[:, ny]) + 0.95, loc=y_pred_median_tmp[:,ny], scale=np.sqrt(y_pred_var_m_tmp[:, ny]) ) + y_samp_tmp = np.zeros([nsamp, y_dim]) + + sample_statistics = [None] * nsamp + for ns in range(nsamp): + sample_statistics[ns] = {} + if not did_multioutput_corr: + cov_mat = np.diag(y_pred_var_m_tmp[ns, :]) + print_extra_outputs = False + else: + print_extra_outputs = True + std_mat = np.sqrt(np.diag(y_pred_var_m_tmp[ns, :])) + cor_mat = np.zeros([y_dim, y_dim]) + cor_mat[tril_idx] = cor_vect[ns, :] + cor_mat = cor_mat + cor_mat.T - np.diag(np.diag(cor_mat)) + cov_mat = std_mat @ cor_mat @ std_mat + y_samp_tmp[ns,:] = np.random.multivariate_normal(y_pred_median_tmp[ns,:], cov_mat) + + if print_extra_outputs: + sample_statistics[ns]["id"] = ns + if did_logtransform: + sample_statistics[ns]["log_mean"] = y_pred_median_tmp[ns,:].tolist() + sample_statistics[ns]["log_corr"] = cor_vect[ns, :].tolist() + sample_statistics[ns]["log_var_epis"] = y_pred_var_tmp[:,ny].tolist() + sample_statistics[ns]["log_var_alea"] = np.squeeze(nugget_var_list[ny]).tolist() + else: + sample_statistics[ns]["mean"] = y_pred_median_tmp[ns,:].tolist() + sample_statistics[ns]["corr"] = cor_vect[ns, :].tolist() + sample_statistics[ns]["var_epis"] = y_pred_var_tmp[:,ny].tolist() + sample_statistics[ns]["var_alea"] = np.squeeze(nugget_var_list[ny]).tolist() + + if print_extra_outputs: + with open("sample_statistics.json", "w") as f: + json.dump(sample_statistics, f) + + if did_logtransform: + y_samp = np.exp(y_samp_tmp) + else: + y_samp = y_samp_tmp + + for ny in range(y_dim): if np.isnan(y_samp[:, ny]).any(): y_samp[:, ny] = np.nan_to_num(y_samp[:, ny]) if np.isnan(y_pred_var[:, ny]).any(): @@ -1030,6 +1152,29 @@ def predict(m, X, did_mf): # noqa: N803, D103 X_list_h = X_list[X.shape[0] :] # noqa: N806 return m.predict(X_list_h) +def make_psd(A_vectorized, tol, tril_idx, dim): + + n_samp = A_vectorized.shape[0] + A_psd_vectorized = A_vectorized.copy() + counter = 0 + for s in range(n_samp): + A = vect_to_mat(A_vectorized[s, :], tril_idx, dim) + A = 0.5 * (A+ A.T) + eigvals, eigvecs = np.linalg.eigh(A) + if np.all(eigvals >= tol): + pass + else: + eigvals[eigvals < tol] = 0.0 + A_psd = eigvecs @ np.diag(eigvals) @ eigvecs.T + A_psd_vectorized[s, :] = A_psd[tril_idx] + counter += 1 + return A_psd_vectorized, counter + +def vect_to_mat(vect, tril_idx, dim): + mat = np.zeros([dim, dim]) + mat[tril_idx] = vect + mat = mat + mat.T - np.diag(np.diag(mat)) + return mat if __name__ == '__main__': error_file = open('../surrogate.err', 'w') # noqa: SIM115, PTH123 diff --git a/modules/performUQ/SimCenterUQ/surrogateBuild.py b/modules/performUQ/SimCenterUQ/surrogateBuild.py index a3702f9c1..2f5a42a98 100644 --- a/modules/performUQ/SimCenterUQ/surrogateBuild.py +++ b/modules/performUQ/SimCenterUQ/surrogateBuild.py @@ -41,6 +41,8 @@ # Jan 31, 2023: let's not use GPy calibration parallel for now, because it seems to give local maxima +new_sto_emul = True + import copy import json import os @@ -49,6 +51,7 @@ import sys import time import warnings +from scipy.optimize import minimize import numpy as np @@ -60,9 +63,6 @@ from UQengine import UQengine # noqa: E402 -# import pip installed modules - - try: moduleName = 'numpy' # noqa: N816 import numpy as np @@ -98,12 +98,12 @@ work_dir_tmp = inputs[1].replace(os.sep, '/') errFileName = os.path.join(work_dir_tmp, 'dakota.err') # noqa: N816, PTH118 -# develop_mode = len(sys.argv) == 7 # a flag for develop mode develop_mode = False # ABS - disable developer mode for running on DesignSafe if develop_mode: import matplotlib - matplotlib.use('TkAgg') + # matplotlib.use('TkAgg') + matplotlib.use('Qt5Agg') import matplotlib.pyplot as plt print('Developer mode') # noqa: T201 @@ -149,6 +149,16 @@ def randomize(self, rand_gen=None, *args, **kwargs): # noqa: D103 self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist] self.update_model(updates) + @monkeypatch_method(GPy.core.GP) + def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 + if Y_metadata is not None: + if self.Y_metadata is None: + self.Y_metadata = Y_metadata + else: + self.Y_metadata.update(Y_metadata) + print('metadata_updated') # noqa: T201 + self.set_XY(X, Y) + # Main function @@ -424,16 +434,6 @@ def gaussian_variance(self, Y_metadata=None): # noqa: N803 else: # noqa: RET505 return self.variance * Y_metadata['variance_structure'] - @monkeypatch_method(GPy.core.GP) - def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 - if Y_metadata is not None: - if self.Y_metadata is None: - self.Y_metadata = Y_metadata - else: - self.Y_metadata.update(Y_metadata) - print('metadata_updated') # noqa: T201 - self.set_XY(X, Y) - @monkeypatch_method(GPy.core.GP) def subsample_XY(self, idx): # noqa: N802, N803, RUF100 if self.Y_metadata is not None: @@ -663,7 +663,7 @@ def create_gp_model(self): # noqa: D102 kr = self.create_kernel(x_dim) X_dummy = np.zeros((1, x_dim)) # noqa: N806 - Y_dummy = np.zeros((1, y_dim)) # noqa: N806 + Y_dummy = np.zeros((1, 1)) # noqa: N806 # for single fidelity case self.set_normalizer = True @@ -936,12 +936,12 @@ def set_XY( # noqa: C901, N802, D102 return m_tmp def predictStoVars(self, X_repl, Y_var_repl, X_new, Y_mean, counts): # noqa: ARG002, D102, N802, N803 - my_x_dim = X_repl.shape[1] + x_dim = X_repl.shape[1] # kernel_var = GPy.kern.Matern52( - # input_dim=my_x_dim, ARD=True - # ) + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) + # input_dim=x_dim, ARD=True + # ) + GPy.kern.Linear(input_dim=x_dim, ARD=True) - kernel_var = GPy.kern.Matern52(input_dim=my_x_dim, ARD=True) + kernel_var = GPy.kern.Matern52(input_dim=x_dim, ARD=True) log_vars = np.log(Y_var_repl) m_var = GPy.models.GPRegression( X_repl, log_vars, kernel_var, normalizer=True, Y_metadata=None @@ -1022,32 +1022,309 @@ def predictStoVars(self, X_repl, Y_var_repl, X_new, Y_mean, counts): # noqa: AR plt.show() return Y_metadata, m_var, norm_var_str + + + def predictStoCov(self, X, Y, Y_mean, m_list): # noqa: ARG002, D102, N802, N803 + + + x_dim = X.shape[1] + y_dim = Y.shape[1] + n_samp = X.shape[0] + myrange = np.max(X, axis=0) - np.min(X, axis=0) + + + # ----------------------------- + # Predict mean + # ----------------------------- + + + # Replacing set XY + + if self.do_logtransform: + if np.min(Y) < 0: + msg = 'Error running SimCenterUQ - Response contains negative values. Please uncheck the log-transform option in the UQ tab' + self.exit(msg) + Y = np.log(Y) # noqa: N806 + + if self.do_linear: + self.linear_list = [True] * x_dim # SY - TEMPORARY + + for ny in range(y_dim): + self.lin_list[ny] = LinearRegression().fit(X, Y[:,ny:ny+1]) + y_pred = self.lin_list[ny].predict(X) + Y[:,ny:ny+1] = Y[:,ny:ny+1] - y_pred # noqa: N806 + + else: + self.linear_list = [False] * x_dim # SY - TEMPORARY + + + Y_mean = np.zeros((n_samp, y_dim)) + initial_noise_variance = [0]*y_dim + initial_process_variance = [0]*y_dim + tert_param = [0]*y_dim + for ny in range(y_dim): + kernel_mean = GPy.kern.Matern52(input_dim=x_dim, ARD=True, lengthscale=myrange) + m_mean = GPy.models.GPRegression(X, Y[:,ny:ny+1], kernel_mean, normalizer=True) + for parname in m_mean.parameter_names(): + if parname.endswith('lengthscale'): + for nx in range(X.shape[1]): + + if myrange[nx] == 0: + msg = f'Error training surrogate: input parameter {nx} has zero range. Please remove this parameter or make sure it has non-zero range.' + self.exit(msg) + + + if self.do_linear: + m_mean.kern.lengthscale[[nx]].constrain_bounded( + myrange[nx] / X.shape[0] * 50, + myrange[nx] * 10000, + warning=False, + ) + else: + m_mean.Gaussian_noise.constrain_bounded( + 0.2, 10, warning=False + ) + m_mean.kern.lengthscale[[nx]].constrain_bounded( + myrange[nx] / X.shape[0] * 10, + myrange[nx] * 100, + warning=False, + ) + + print('calibrating tertiary surrogate') # noqa: T201 + m_mean.optimize(messages=True, max_f_eval=1000) + # m_mean = my_optimize_restart(m_mean, self.nopt) + Y_mean[:,ny:ny+1], mean_var = m_mean.predict(X) + initial_noise_variance[ny] = float(m_mean.Gaussian_noise.variance) + initial_process_variance[ny] = float(m_mean.Mat52.variance) + tert_param[ny] = m_mean.param_array + # ----------------------------- + # Prepare covariance data + # ----------------------------- + + #only for no replication case + tril_idx = np.tril_indices(y_dim) + diag_in_vec = np.where(tril_idx[0] == tril_idx[1])[0] + n_elem = len(tril_idx[0]) + + cov_vec = np.zeros([n_samp, n_elem]) + for ns in range(n_samp): + cov_samp = (Y_mean[ns:ns+1,:]-Y[ns:ns+1,:]).T @ (Y_mean[ns:ns+1,:]-Y[ns:ns+1,:]) + cov_vec[ns,:] = cov_samp[tril_idx] + + gps = [] + for i in range(n_elem): + kernel = GPy.kern.Matern52(input_dim=x_dim, variance=1., lengthscale=1., ARD=True) + gp = GPy.models.GPRegression(X.copy(), cov_vec[:, i:i+1], kernel=kernel) + gps.append(gp) + + # + # Define some functions + # + if y_dim >= 6: # do only diagonals + gp_diags = [gps[int(i)] for i in diag_in_vec] + else: + gp_diags = gps + + def joint_objective_and_grad(param_values): + # Update kernel parameters in-place (fast, no per-GP assignment) + kernel.variance[:] = param_values[0] + kernel.lengthscale[:] = param_values[1:1+x_dim] + + nll = 0.0 + grad = np.zeros_like(param_values) + + # Only update Gaussian noise for each GP (cheap) + for gp in gp_diags: + # for gp in gps: + gp.Gaussian_noise.variance[:] = param_values[1 + x_dim] + # gp.update_model(True) # stop auto recompute for now + + # Compute NLL and gradient + for gp in gp_diags: + # for gp in gps: + nll -= gp.log_likelihood() + grad += -gp.gradient + + return nll, grad + + def joint_objective(param_values): + # Update kernel parameters in-place (fast, no per-GP assignment) + kernel.variance[:] = param_values[0] + kernel.lengthscale[:] = param_values[1:1+x_dim] + + nll = 0.0 + grad = np.zeros_like(param_values) + + # Only update Gaussian noise for each GP (cheap) + for gp in gps: + gp.Gaussian_noise.variance[:] = param_values[1 + x_dim] + + # Compute NLL and gradient + for gp in gps: + nll -= gp.log_likelihood() + + return nll + + def make_psd(A_vectorized, tol, tril_idx, dim): + # enforcing positive semi-definiteness + n_samp = A_vectorized.shape[0] + A_psd_vectorized = A_vectorized.copy() + counter = 0 + for s in range(n_samp): + A = vect_to_mat(A_vectorized[s, :], tril_idx, dim) + A = 0.5 * (A+ A.T) + eigvals, eigvecs = np.linalg.eigh(A) + if np.all(eigvals >= tol): + pass + else: + eigvals[eigvals < tol] = 0.0 + A_psd = eigvecs @ np.diag(eigvals) @ eigvecs.T + A_psd_vectorized[s, :] = A_psd[tril_idx] + counter += 1 + return A_psd_vectorized, counter + + def vect_to_mat(vect, tril_idx, dim): + mat = np.zeros([dim, dim]) + mat[tril_idx] = vect + mat = mat + mat.T - np.diag(np.diag(mat)) + return mat + + dx = myrange / 100 + + # ----------------------------- + # Optimize jointly + # ----------------------------- + + bounds = [] + bounds.append((1e-5, None)) # variance + for nx in range(x_dim): + bounds.append((dx[nx], myrange[nx])) # lengthscale + bounds.append((1e-5, None)) # noise variance + + init_val = np.sum(np.array(tert_param), axis=0) + # res = minimize(joint_objective, init_val, method='L-BFGS-B', bounds=bounds) + res = minimize(joint_objective_and_grad,x0=init_val, jac=True, method='L-BFGS-B', bounds=bounds) + + # ----------------------------- + # Update GPs with optimized parameters + # ----------------------------- + + for gp in gps: + gp.kern.variance[:] = res.x[0] + gp.kern.lengthscale[:] = res.x[1:1+x_dim] + gp.Gaussian_noise.variance[:] = res.x[1+x_dim] + + cov_pred = [] + for gp in gps: + mu, var = gp.predict(X) + cov_pred.append(mu) + + print('Calibrating Secondary (cov) surrogate') # noqa: T201 + + cov_pred = np.hstack(cov_pred) # shape (N_new, P) + cov_pred, projection_counter = make_psd(cov_pred,1.e-6, tril_idx, y_dim) # ensure PSD + var_pred = cov_pred[:,diag_in_vec] + + corr_vec = np.zeros_like(cov_vec) + idx = 0 + for i in range(y_dim): + for j in range(i+1): + corr_vec[:,idx] = cov_pred[:,idx] / np.sqrt(var_pred[:,i] * var_pred[:,j]) + idx += 1 + + # ----------------------------- + # Predict variance structure + # ----------------------------- + + Y_metadata = [0]*y_dim + + for ny in range(y_dim): + + norm_var_str = (var_pred[:,ny]) / max(var_pred[:,ny]) + self.var_norm = 1 + if self.set_normalizer: + self.var_norm = np.mean(var_pred[:,ny]) + # norm_var_str = (var_pred.T[0]) / np.var(Y_mean) + norm_var_str = var_pred[:,ny] / self.var_norm + # if normalization was used.. + else: + norm_var_str = var_pred[:,ny] # if normalization was not used.. + + # norm_var_str = (X_new+2)**2/max((X_new+2)**2) + Y_metadata[ny] = {'variance_structure': norm_var_str} + self.var_str[ny] = norm_var_str# noqa: N806 + + # ----------------------------- + # Update GPs with thed optimized variance + # ----------------------------- + + # reset GP for EEUQ + x_current_dim = x_dim + for ny in range(y_dim): + for parname in m_list[ny].parameter_names(): + if parname.endswith('lengthscale'): + + obj = m_list[ny] + for attr in parname.split('.'): + obj = getattr(obj, attr) + x_current_dim = len(obj) + # exec('x_current_dim = len(m_list[ny].' + parname + ')') # noqa: S102 + + if x_current_dim != X.shape[1]: + kr = self.create_kernel(X.shape[1]) + X_dummy = np.zeros((1, X.shape[1])) # noqa: N806 + Y_dummy = np.zeros((1, 1)) # noqa: N806 + m_new = self.create_gpy_model(X_dummy, Y_dummy, kr) + m_list[ny] = m_new.copy() + + self.m_var_list = [0]*y_dim + for ny in range(y_dim): + m_list[ny].set_XY2(X, Y[:,ny:ny+1], Y_metadata=Y_metadata[ny]) + self.m_var_list[ny] = gps[diag_in_vec[ny]] + + self.indices_unique = range(n_samp) + self.n_unique_hf = n_samp + self.Y_mean = [Y[:,ny] for ny in range(y_dim)] + self.init_noise_var = initial_noise_variance + self.init_process_var = initial_process_variance + self.stochastic = [True]*y_dim + + for ny in range(y_dim): + if self.set_normalizer: + self.normMeans[ny] = m_list[ny].normalizer.mean + self.normVars[ny] = m_list[ny].normalizer.std**2 + + self.tertiary_mean = Y_mean.T.tolist() + + if develop_mode: + plt.figure(3) + nx = 0 + plt.scatter(X[:, nx], cov_vec[:,0], alpha=0.1) + plt.scatter(X[:, nx], cov_pred[:,0], alpha=0.1) + plt.show() + plt.figure(1) + + plt.title('Sto Log-var QoI') + plt.scatter(cov_vec[:,0], cov_pred[:,0], alpha=0.5) + plt.scatter(cov_vec[:,0], cov_vec[:,0], alpha=0.5) + plt.xlabel('exact') + plt.ylabel('pred') + plt.grid() + plt.show() + + print(gps[0]) + print(gps[0].kern.lengthscale) + + return m_list + + def predictStoMeans(self, X, Y): # noqa: N802, N803, D102 # under homoscedasticity - my_x_dim = X.shape[1] + x_dim = X.shape[1] myrange = np.max(X, axis=0) - np.min(X, axis=0) kernel_mean = GPy.kern.Matern52( - input_dim=my_x_dim, ARD=True, lengthscale=myrange + input_dim=x_dim, ARD=True, lengthscale=myrange ) - # kernel_mean = GPy.kern.Matern52(input_dim=my_x_dim, ARD=True) + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) - # if self.do_linear and not (self.isEEUQ or self.isWEUQ or self.isHydroUQ): - # kernel_mean = kernel_mean + GPy.kern.Linear(input_dim=my_x_dim, ARD=True) - # - # if sum(self.linear_list)>0: - # - # lin_tmp = LinearRegression().fit(X[:, self.linear_list], Y) - # y_pred = lin_tmp.predict(X[:, self.linear_list]) - # - # # Set GP - # - # m_mean = GPy.models.GPRegression( - # X, Y - y_pred, kernel_mean, normalizer=True, Y_metadata=None - # ) - # - # else: - # m_mean = GPy.models.GPRegression( - # X, Y, kernel_mean, normalizer=True, Y_metadata=None - # ) m_mean = GPy.models.GPRegression( X, Y, kernel_mean, normalizer=True, Y_metadata=None ) @@ -1056,23 +1333,7 @@ def predictStoMeans(self, X, Y): # noqa: N802, N803, D102 for parname in m_mean.parameter_names(): if parname.endswith('lengthscale'): for nx in range(X.shape[1]): - # m_mean.kern.Mat52.lengthscale[[nx]]= myrange[nx]*100 - # m_mean.kern.Mat52.lengthscale[[nx]].constrain_bounded(myrange[nx]/X.shape[0]*50, myrange[nx]*100) - # if self.isEEUQ: - # # m_mean.kern.lengthscale[[nx]] = myrange[nx] * 100 - # # m_mean.kern.lengthscale[[nx]].constrain_bounded( - # # myrange[nx] / X.shape[0] * 50, - # # myrange[nx] * 100, - # # warning=False, - # # ) - # # m_mean.kern.lengthscale[[nx]] = myrange[nx] - # m_mean.kern.lengthscale[[nx]].constrain_bounded( - # myrange[nx] / X.shape[0], - # myrange[nx] * 10000, - # warning=False, - # ) if self.do_linear: - # m_mean.kern.Mat52.lengthscale[[nx]] = myrange[nx] * 5000 m_mean.kern.Mat52.lengthscale[[nx]].constrain_bounded( myrange[nx] / X.shape[0] * 50, myrange[nx] * 10000, @@ -1090,23 +1351,9 @@ def predictStoMeans(self, X, Y): # noqa: N802, N803, D102 ) # """ - # m_mean.optimize(messages=True, max_f_eval=1000) - # # m_mean.Gaussian_noise.variance = np.var(Y) # First calibrate parameters - # m_mean.optimize_restarts( - # self.nopt, parallel=self.is_paralle_opt_safe, num_processes=self.n_processor, verbose=True - # ) # First calibrate parameters print('calibrating tertiary surrogate') # noqa: T201 m_mean = my_optimize_restart(m_mean, self.nopt) - # m_mean.optimize(messages=True, max_f_eval=1000) - - # if self.do_linear: - - # m_mean.Gaussian_noise.variance=m_mean.Mat52.variance+m_mean.Gaussian_noise.variance - # else: - # m_mean.Gaussian_noise.variance=m_mean.RBF.variance+m_mean.Gaussian_noise.variance.variance - # m_mean.optimize_restarts(10,parallel=True) - mean_pred, mean_var = m_mean.predict(X) initial_noise_variance = float(m_mean.Gaussian_noise.variance) @@ -1223,28 +1470,6 @@ def calibrate(self): # noqa: C901, D102, RUF100 plt.ylabel('pred') plt.show() - # because EE-UQ results are more likely to have huge nugget. - # if False: - # if self.isEEUQ: - # if self.heteroscedastic: - # variance_keyword = 'het_Gauss.variance' - # else: - # variance_keyword = 'Gaussian_noise.variance' - # - # for ny in range(self.y_dim): - # for parname in self.m_list[ny].parameter_names(): - # if parname.endswith('variance') and ('Gauss' not in parname): - # exec( # noqa: RUF100, S102 - # 'my_new_var = max(self.m_list[ny].' - # + variance_keyword - # + ', 10*self.m_list[ny].' - # + parname - # + ')' - # ) - # exec('self.m_list[ny].' + variance_keyword + '= my_new_var') # noqa: RUF100, S102 - # - # self.m_list[ny].optimize() - self.calib_time = time.time() - t_opt print(f' Calibration time: {self.calib_time:.2f} s', flush=True) # noqa: T201 Y_preds, Y_pred_vars, Y_pred_vars_w_measures, e2 = ( # noqa: N806 @@ -1392,79 +1617,34 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 self.exit(msg) stoch_idx = [] # noqa: F841 - for i in range(y_dim): - print('Setting up QoI {} among {}'.format(i + 1, y_dim)) # noqa: T201, UP032 - self.m_list[i] = self.set_XY( - self.m_list[i], - i, - self.X_hf, - self.Y_hf[:, i][np.newaxis].transpose(), - self.X_lf, - self.Y_lf[:, i][np.newaxis].transpose(), - ) # log-transform is inside set_XY + Y_mean1 = np.zeros((self.X_hf.shape[0], y_dim), float) + + if new_sto_emul and self.nugget_opt == 'Heteroscedastic': + print('Trying to run the new sto emul algorithm') # noqa: T201, UP032 + self.m_list = self.predictStoCov(self.X_hf, self.Y_hf, Y_mean1, self.m_list) + self.do_multioutput_corr = True + else: + for i in range(y_dim): + print('Setting up QoI {} among {}'.format(i + 1, y_dim)) # noqa: T201, UP032 + self.m_list[i] = self.set_XY( + self.m_list[i], + i, + self.X_hf, + self.Y_hf[:, i][np.newaxis].transpose(), + self.X_lf, + self.Y_lf[:, i][np.newaxis].transpose(), + ) + self.do_multioutput_corr = False + # log-transform is inside set_XY # check stochastic ? - # if self.stochastic[i] and not self.do_mf: - # # see if we can run it parallel - # X_new, X_idx, indices, counts = np.unique( # noqa: N806, RUF100 - # self.X_hf, - # axis=0, - # return_index=True, - # return_counts=True, - # return_inverse=True, - # ) - # n_unique = X_new.shape[0] - # if n_unique == self.X_hf.shape[0]: # no repl - # stoch_idx += [i] - # - # else: - # # - # # original calibration - # # - # print("Setting up QoI {} among {}".format(i+1,y_dim)) - # self.m_list[i] = self.set_XY( - # self.m_list[i], - # i, - # self.X_hf, - # self.Y_hf[:, i][np.newaxis].transpose(), - # self.X_lf, - # self.Y_lf[:, i][np.newaxis].transpose(), - # ) # log-transform is inside set_XY - - # # parllel run - # if len(stoch_idx)>0: - # iterables = ( - # ( - # copy.deepcopy(self.m_list[i]), - # i, - # self.x_dim, - # self.X_hf, - # self.Y_hf[:, i][np.newaxis].transpose(), - # self.create_kernel, - # self.create_gpy_model, - # self.do_logtransform, - # self.predictStoMeans, - # self.set_normalizer - # ) - # for i in stoch_idx - # ) - # result_objs = list(self.pool.starmap(set_XY_indi, iterables)) - # for ny, m_tmp_, Y_mean_, normMeans_, normVars_, m_var_list_, var_str_, indices_unique_, n_unique_hf_ in result_objs: # noqa: N806, RUF100 - # self.m_tmp[ny] = m_tmp_ - # self.Y_mean[ny] = Y_mean_ - # self.normMeans[ny] = normMeans_ - # self.normVars[ny] = normVars_ - # self.m_var_list[ny] = m_var_list_ - # self.var_str[ny] = var_str_ - # self.indices_unique = indices_unique_ - # self.n_unique_hf[ny] = n_unique_hf_ # # Verification measures # self.NRMSE_hist = np.zeros((1, y_dim), float) - self.NRMSE_idx = np.zeros((1, 1), int) + # self.NRMSE_idx = np.zeros((1, 1), int) print('======== RUNNING GP DoE ===========', flush=True) # noqa: T201 @@ -1476,6 +1656,7 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 nc1 = self.nc1 nq = self.nq n_new = 0 + i = 0 while exit_flag == False: # noqa: E712 # Initial calibration @@ -1537,7 +1718,7 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 NRMSE_val = self.normalized_mean_sq_error(self.Y_cvs, Y_hfs) # noqa: N806 self.NRMSE_hist = np.vstack((self.NRMSE_hist, np.array(NRMSE_val))) - self.NRMSE_idx = np.vstack((self.NRMSE_idx, i)) + # self.NRMSE_idx = np.vstack((self.NRMSE_idx, i)) if ( self.n_unique_hf @@ -1745,15 +1926,6 @@ def FEM_batch_lf(X, id_sim): # noqa: N802, N803 plt.grid() plt.show() - # plt.scatter(np.log10(self.Y_hf[:, ny]),np.log10(np.exp(log_Y_cv_sample)), alpha=1,marker='x'); - # plt.plot(np.log10(self.Y_hf[:, ny]),np.log10(self.Y_hf[:, ny]),alpha=1,color='r'); - # mycor = np.corrcoef(self.Y_hf[:, ny], np.exp(log_Y_cv_sample))[1,0] - # plt.title(f"train CV samples rho={round(mycor*100)/100}") - # plt.xlabel("QoI exact"); - # plt.ylabel("QoI pred sample"); - # plt.grid() - # plt.show() - X_test = np.genfromtxt( # noqa: N806 r'C:\Users\SimCenter\Dropbox\SimCenterPC\Stochastic_GP_validation\input_test.csv', delimiter=',', @@ -2203,6 +2375,7 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 results['doLogtransform'] = self.do_logtransform results['doLinear'] = self.do_linear results['doMultiFidelity'] = self.do_mf + results['doMultiOutputCorr'] = self.do_multioutput_corr results['kernName'] = self.kernel results['terminationCode'] = self.exit_code results['valTime'] = self.sim_time @@ -2335,6 +2508,12 @@ def save_model(self, filename): # noqa: C901, D102, PLR0915 results['modelInfo'][self.g_name[ny]][parname] = list( eval('self.m_list[ny].' + parname) # noqa: S307 ) + # + # Save the mean prediction from tertiary for cov + # + if new_sto_emul and self.nugget_opt == 'Heteroscedastic': + results['modelInfo'][self.g_name[ny] + '_TertMean'] = {} + results['modelInfo'][self.g_name[ny] + '_TertMean']['TrainingSamplesY'] = self.tertiary_mean[ny] # # Save the linear @@ -2892,10 +3071,6 @@ def sampling(N): # noqa: N803 Y_stack = np.vstack([Y_stack, np.zeros((1, 1))]) # noqa: N806 # m_stack.set_XY(X=X_stack, Y=Y_stack) - # if doeIdx.startswith("HF"): - # self.set_XY(m_stack, X_stack, Y_stack) - # elif doeIdx.startswith("LF"): # any variables - # self.set_XY(m_stack, self.X_hf, self.Y_hf, X_stack, Y_stack) if doeIdx == 'HFHF': m_stack = self.set_XY( @@ -3046,6 +3221,11 @@ def get_cross_validation_err(self): # noqa: D102 Y_pred2[ns, ny] = Y_pred_tmp Y_pred_var2[ns, ny] = Y_err_tmp + + # if self.do_linear: + # y_lin_pred = self.lin_list[ny].predict(x_loo) + # Y_pred2[ns, ny] = Y_pred2[ns, ny] + y_lin_pred + if self.do_logtransform: Y_exact = np.log(Y_hf[ns, ny]) # noqa: N806 else: @@ -3085,8 +3265,8 @@ def get_cross_validation_err(self): # noqa: D102 f' Cross validation calculation time: {time.time() - time_tmp:.2f} s', flush=True, ) - return Y_pred, Y_pred_var, Y_pred_var_w_measure, e2 + return Y_pred, Y_pred_var, Y_pred_var_w_measure, e2 def imse(m_tmp, xcandi, xq, phiqr, i, y_idx, doeIdx='HF'): # noqa: ARG001, N803, D103 if doeIdx == 'HF': @@ -3383,26 +3563,6 @@ def resampling(self, X, n): # noqa: N803, D102 return X_samples - # def set_FEM(self, rv_name, do_parallel, y_dim, t_init): - # self.rv_name = rv_name - # self.do_parallel = do_parallel - # self.y_dim = y_dim - # self.t_init = t_init - # self.total_sim_time = 0 - # - # def run_FEM(self,X, id_sim): - # tmp = time.time() - # if self.is_model: - # X, Y, id_sim = self.run_FEM_batch(X, id_sim, self.rv_name, self.do_parallel, self.y_dim, self.t_init, self.thr_t, runIdx=self.runIdx) - # - # else: - # X, Y, id_sim = np.zeros((0, self.x_dim)), np.zeros((0, self.y_dim)), id_sim - # - # self.total_sim_time += tmp - # self.avg_sim_time = self.total_sim_time / id_sim - # - # return X, Y, id_sim - # Additional functions @@ -3513,11 +3673,7 @@ def calibrating( # noqa: C901, D103 exec( # noqa: S102 'm_tmp.' + parname + '[[nx]] = myrange[nx]*1' ) - - # m_tmp[parname][nx] = myrange[nx]*0.1 - # m_tmp[parname][nx].constrain_bounded(myrange[nx] / X.shape[0], myrange[nx]*100) # TODO change the kernel # noqa: TD002, TD004 - # m_tmp[variance_keyword].constrain_bounded(0.05/np.mean(m_tmp.Y_metadata['variance_structure']),2/np.mean(m_tmp.Y_metadata['variance_structure']),warning=False) m_tmp.Gaussian_noise.constrain_bounded( 0.5 * init_noise_var, 2 * init_noise_var, warning=False ) @@ -3531,16 +3687,10 @@ def calibrating( # noqa: C901, D103 m_tmp.gpy_model.mixed_noise.Gaussian_noise_1.unfix() elif nugget_opt_tmp == 'Fixed Values': - # m_tmp.gpy_model.mixed_noise.Gaussian_noise.constrain_fixed(self.nuggetVal[ny]) - # m_tmp.gpy_model.mixed_noise.Gaussian_noise_1.constrain_fixed(self.nuggetVal[ny]) msg = 'Currently Nugget Fixed Values option is not supported' # self.exit(msg) elif nugget_opt_tmp == 'Fixed Bounds': - # m_tmp.gpy_model.mixed_noise.Gaussian_noise.constrain_bounded(self.nuggetVal[ny][0], - # self.nuggetVal[ny][1]) - # m_tmp.gpy_model.mixed_noise.Gaussian_noise_1.constrain_bounded(self.nuggetVal[ny][0], - # self.nuggetVal[ny][1]) msg = 'Currently Nugget Fixed Bounds option is not supported' # self.exit(msg) @@ -3560,24 +3710,6 @@ def calibrating( # noqa: C901, D103 print('Calibrating final surrogate') # noqa: T201 m_tmp = my_optimize_restart(m_tmp, nopt) - # noqa: RUF100, W293 - # if develop_mode: - # print(m_tmp) - # #print(m_tmp.rbf.lengthscale) - # tmp = m_tmp.predict(m_tmp.X) - # plt.title("Original Mean QoI") - # plt.scatter(m_tmp.Y, tmp[0],alpha=0.1) - # plt.scatter(m_tmp.Y, m_tmp.Y,alpha=0.1) - # plt.xlabel("exact") - # plt.ylabel("pred") - # plt.show() - - # m_tmp.optimize_restarts( - # num_restarts=nopt, - # parallel=is_paralle_opt_safe, - # num_processes=n_processor, - # verbose=True, - # ) else: m_tmp.gpy_model.optimize_restarts( num_restarts=nopt, @@ -3664,17 +3796,9 @@ def set_XY2(self, X=None, Y=None, Y_metadata=None): # noqa: N802, N803 ) ) - # import matplotlib.pyplot as plt - # tmp_all = m_subset.predict(X_full); plt.scatter(tmp_all[0][:, 0], Y_full[:, 0],alpha=0.1); plt.scatter(Y_full[:, 0], Y_full[:, 0],alpha=0.1); plt.show() - # tmp_subset = m_subset.predict(X_full[inside_cluster]); plt.scatter(tmp_subset[0][:, 0], Y_full[inside_cluster, 0],alpha=0.1); plt.scatter(Y_full[inside_cluster, 0], Y_full[inside_cluster, 0],alpha=0.1); plt.show() - # best_cluster = np.argmin(errors) # not capturing skedasticity best_cluster = np.argmax(log_likelihoods) m = m_list[best_cluster] - # m.kern.parameters = kernels[best_cluster][0] - # m.Gaussian_noise.parameters = kernels[best_cluster][1] - # m.parameters_changed() - # tmp = m.predict(X_full[inside_cluster]); plt.scatter(tmp[0][:, 0], Y_full[inside_cluster, 0]); plt.scatter(Y_full[inside_cluster, 0], Y_full[inside_cluster, 0]); plt.show() print('Elapsed time: {:.2f} s'.format(time.time() - init)) # noqa: T201, UP032 return m @@ -3804,12 +3928,3 @@ def read_txt(text_dir, exit_fun): # noqa: D103 if __name__ == '__main__': main(sys.argv) sys.stderr.close() - - # try: - # main(sys.argv) - # open(os.path.join(os.getcwd(), errFileName ), 'w').close() - # except Exception: - # f = open(os.path.join(os.getcwd(), errFileName ), 'w') - # traceback.print_exc(file=f) - # f.close() - # exit(1)