From 86325741764a07738a9646c717d86453e6bc2ecc Mon Sep 17 00:00:00 2001 From: IsaacMtz19 Date: Fri, 31 Oct 2025 08:45:19 -0600 Subject: [PATCH] fix: normalize to two decimals --- Murray/main.py | 85 ++++++++++++++++++++++++----------------- Murray/plots.py | 4 +- Murray/post_analysis.py | 70 ++++++++++++++++----------------- 3 files changed, 86 insertions(+), 73 deletions(-) diff --git a/Murray/main.py b/Murray/main.py index 47c1f24..8123e2a 100644 --- a/Murray/main.py +++ b/Murray/main.py @@ -330,6 +330,9 @@ def filter_controls_by_weights(self, control_group, min_weight_threshold=0.001): if np.sum(filtered_weights) > 0: filtered_weights = filtered_weights / np.sum(filtered_weights) + # Round weights to 2 decimal places + filtered_weights = np.round(filtered_weights, 2) + return filtered_control_group, filtered_weights @@ -357,7 +360,7 @@ def evaluate_group( logger.debug(f"Starting evaluation for treatment group: {treatment_group}") treatment_Y = data[data["location"].isin(treatment_group)]["Y"].sum() - holdout_percentage = (1 - (treatment_Y / total_Y)) * 100 + holdout_percentage = round((1 - (treatment_Y / total_Y)) * 100, 2) logger.debug( f"Treatment Y: {treatment_Y}, Holdout percentage: {holdout_percentage:.2f}%" @@ -428,27 +431,31 @@ def evaluate_group( ) logger.debug("Calculating metrics") - MAPE = ( + MAPE = round( np.mean( np.abs( (y_original[split_index:] - counterfactual_full_original[split_index:]) / (y_original[split_index:] + 1e-10) ) ) - * 100 + * 100, + 2 ) - SMAPE_value = smape( - y_original[split_index:], counterfactual_full_original[split_index:] + SMAPE_value = round( + smape( + y_original[split_index:], counterfactual_full_original[split_index:] + ), + 2 ) - observed_conformity = np.mean(y_original - counterfactual_full_original) + observed_conformity = round(float(np.mean(y_original - counterfactual_full_original)), 2) return ( treatment_group, filtered_control_group, MAPE, SMAPE_value, - y_original, - counterfactual_full_original, + np.round(y_original, 2), + np.round(counterfactual_full_original, 2), filtered_weights, observed_conformity, ) @@ -678,7 +685,7 @@ def evaluate_group_exclusive( ) treatment_Y = data[data["location"].isin(treatment_group)]["Y"].sum() - holdout_percentage = (1 - (treatment_Y / total_Y)) * 100 + holdout_percentage = round((1 - (treatment_Y / total_Y)) * 100, 2) logger.debug( f"Treatment Y: {treatment_Y}, Holdout percentage: {holdout_percentage:.2f}%" @@ -750,28 +757,32 @@ def evaluate_group_exclusive( ) logger.debug("Calculating metrics") - MAPE = ( + MAPE = round( np.mean( np.abs( (y_original[split_index:] - counterfactual_full_original[split_index:]) / (y_original[split_index:] + 1e-10) ) ) - * 100 + * 100, + 2 ) - SMAPE_value = smape( - y_original[split_index:], counterfactual_full_original[split_index:] + SMAPE_value = round( + smape( + y_original[split_index:], counterfactual_full_original[split_index:] + ), + 2 ) - observed_conformity = np.mean(y_original - counterfactual_full_original) + observed_conformity = round(float(np.mean(y_original - counterfactual_full_original)), 2) return ( treatment_group, filtered_control_group, MAPE, SMAPE_value, - y_original, - counterfactual_full_original, + np.round(y_original, 2), + np.round(counterfactual_full_original, 2), filtered_weights, observed_conformity, ) @@ -818,10 +829,10 @@ def BetterGroups( """ unique_locations = data["location"].unique() no_locations = len(unique_locations) - # max_group_size = round(no_locations * 0.35) - # min_elements_in_treatment = round(no_locations * 0.20) - max_group_size = round(no_locations * 0.45) - min_elements_in_treatment = round(no_locations * 0.15) + max_group_size = round(no_locations * 0.35) + min_elements_in_treatment = round(no_locations * 0.20) + # max_group_size = round(no_locations * 0.45) + # min_elements_in_treatment = round(no_locations * 0.15) min_holdout = 100 - (maximum_treatment_percentage * 100) total_Y = data["Y"].sum() @@ -1062,7 +1073,7 @@ def BetterGroups( treatment_Y = data[data["location"].isin(best_treatment_group)]["Y"].sum() if total_Y > 0: - holdout_percentage = ((total_Y - treatment_Y) / total_Y) * 100 + holdout_percentage = round(((total_Y - treatment_Y) / total_Y) * 100, 2) else: holdout_percentage = 0.0 @@ -1395,8 +1406,8 @@ def optimize_global_multicell( logger.error(f"STILL HAVE OVERLAP in cell {i+1}: {overlap}") treatment_Y = data[data["location"].isin(treatment_group)]["Y"].sum() - holdout_percentage = ( - ((total_Y - treatment_Y) / total_Y) * 100 if total_Y > 0 else 0.0 + holdout_percentage = round( + ((total_Y - treatment_Y) / total_Y) * 100 if total_Y > 0 else 0.0, 2 ) result_dict = { @@ -1735,17 +1746,17 @@ def simulate_power( null_stats = np.array(null_stats) # Two-sided test - p_value = np.mean(np.abs(null_stats) >= np.abs(observed_stat)) + p_value = round(float(np.mean(np.abs(null_stats) >= np.abs(observed_stat))), 2) p_values.append(p_value) if p_value < significance_level: rejected_tests += 1 - power = rejected_tests / n_power_simulations + power = round(rejected_tests / n_power_simulations, 2) # Calculate confidence interval for power estimate power_se = np.sqrt(power * (1 - power) / n_power_simulations) - power_ci = (max(0, power - 1.95 * power_se), min(1, power + 1.95 * power_se)) + power_ci = (round(max(0, power - 1.95 * power_se), 2), round(min(1, power + 1.95 * power_se), 2)) y_with_lift_sample = apply_lift(y_real, delta, start_treatment, end_treatment) @@ -1753,7 +1764,7 @@ def simulate_power( f"Power simulation completed: power={power:.4f}, CI=({power_ci[0]:.4f}, {power_ci[1]:.4f}), mean p-value={np.mean(p_values):.4f}" ) - return delta, power, power_ci, y_with_lift_sample, np.mean(p_values) + return delta, power, power_ci, y_with_lift_sample, round(float(np.mean(p_values)), 2) def run_simulation( @@ -1914,7 +1925,7 @@ def evaluate_sensitivity( logger.debug(f"Status update failed: {e}") statistical_power = [ - (res[0], res[1], res[2], res[4]) for res in results + (round(res[0], 2), res[1], res[2], res[4]) for res in results ] # (delta, power, power_ci, p_value) mde = next( ( @@ -1925,20 +1936,20 @@ def evaluate_sensitivity( None, ) - p_value = None + mde_p_value = None power_ci = None - power = None + mde_power = None if mde is not None: for delta, power, ci, p_value in statistical_power: if delta == mde: - p_value = p_value + mde_p_value = p_value power_ci = ci - power = power + mde_power = power break # Format values safely for logging - p_value_str = f"{p_value:.4f}" if p_value is not None else "None" - power_str = f"{power:.4f}" if power is not None else "None" + p_value_str = f"{mde_p_value:.4f}" if mde_p_value is not None else "None" + power_str = f"{mde_power:.4f}" if mde_power is not None else "None" power_ci_str = ( f"({power_ci[0]:.4f} - {power_ci[1]:.4f})" if power_ci is not None @@ -1955,9 +1966,9 @@ def evaluate_sensitivity( results_by_period[period] = { "Statistical Power": statistical_power, "MDE": mde, - "P-Value": p_value, + "P-Value": mde_p_value, "MDE_CI": power_ci, - "Power": power, + "Power": mde_power, } sensitivity_results[size] = results_by_period @@ -2093,6 +2104,8 @@ def run_geo_analysis_streamlit_app( periods = list(np.arange(*periods_range)) deltas = np.arange(*deltas_range) + # logger.info(f'Deltas: {deltas}') + # logger.info(f'Periods: {periods}') # Step 1: Generate market correlations logger.info("Step 1: Generating market correlations.....") diff --git a/Murray/plots.py b/Murray/plots.py index 4be9a0a..c40b9d1 100644 --- a/Murray/plots.py +++ b/Murray/plots.py @@ -2146,7 +2146,7 @@ def calculate_confidence_bands( lower = predicted - band_width / 2 upper = predicted + band_width / 2 - return lower, upper + return np.round(lower, 2), np.round(upper, 2) def calculate_optimal_noise_scale(predictions, actual_values, min_relative_scale=0.005): @@ -2167,4 +2167,4 @@ def calculate_optimal_noise_scale(predictions, actual_values, min_relative_scale relative_scale = max(np.median(relative_errors), min_relative_scale) final_scale = max(scale_mad, relative_scale * np.median(np.abs(actual_values))) - return final_scale + return round(float(final_scale), 2) diff --git a/Murray/post_analysis.py b/Murray/post_analysis.py index a4f4859..752d264 100644 --- a/Murray/post_analysis.py +++ b/Murray/post_analysis.py @@ -139,8 +139,8 @@ def smape(A, F): logger.info("Calculating metrics...") logger.info(f"Data shapes - treatment: {treatment.shape}, counterfactual: {counterfactual.shape}") - MAPE = np.mean(np.abs((y_original - counterfactual) / (y_original + 1e-10))) * 100 - SMAPE = smape(y_original, counterfactual) + MAPE = round(np.mean(np.abs((y_original - counterfactual) / (y_original + 1e-10))) * 100, 2) + SMAPE = round(smape(y_original, counterfactual), 2) # Calculate percentage lift (only during treatment period) treatment_period_sum = np.sum(treatment[start_position_treatment:end_position_treatment]) @@ -151,7 +151,7 @@ def smape(A, F): logger.info(f"Counterfactual period sum: {counterfactual_period_sum}") logger.info(f"Lift difference (treatment - counterfactual): {lift_difference}") - percenge_lift = (lift_difference / np.abs(counterfactual_period_sum)) * 100 + percenge_lift = round((lift_difference / np.abs(counterfactual_period_sum)) * 100, 2) def compute_residuals(y_treatment, y_control): return y_treatment - y_control @@ -178,8 +178,8 @@ def stat_func(x): null_stats = np.array(null_stats) logger.info("Permutation test completed, calculating p-value and power...") - p_value = np.mean(abs(null_stats) >= abs(observed_stat)) - power = np.mean(p_value < significance_level) + p_value = round(float(np.mean(abs(null_stats) >= abs(observed_stat))), 2) + power = round(float(np.mean(p_value < significance_level)), 2) length_treatment = len(treatment_group) @@ -193,21 +193,21 @@ def stat_func(x): results_evaluation = { "MAPE": MAPE, "SMAPE": SMAPE, - "counterfactual": counterfactual, - "treatment": treatment, + "counterfactual": np.round(counterfactual, 2), + "treatment": np.round(treatment, 2), "p_value": p_value, "power": power, "percenge_lift": percenge_lift, "control_group": filtered_control_group, - "observed_stat": observed_stat, - "null_stats": null_stats, - "weights": filtered_weights, + "observed_stat": round(float(observed_stat), 2), + "null_stats": np.round(null_stats, 2), + "weights": np.round(filtered_weights, 2), "period": period, - "spend": spend, + "spend": round(float(spend), 2), "length_treatment": length_treatment, # Complete data for plotting (including post-treatment) - "counterfactual_complete": counterfactual_complete, - "treatment_complete": treatment_complete, + "counterfactual_complete": np.round(counterfactual_complete, 2), + "treatment_complete": np.round(treatment_complete, 2), "time_index_full": time_index_full, # Period information for plotting zones "start_position_treatment": start_position_treatment, @@ -307,37 +307,37 @@ def get_evaluation_chart_data( chart_data = { # Base series "dates": dates, - "treatment": treatment.tolist(), - "counterfactual": counterfactual.tolist(), - "point_difference": point_difference.tolist(), - "cumulative_effect": cumulative_effect, + "treatment": np.round(treatment, 2).tolist(), + "counterfactual": np.round(counterfactual, 2).tolist(), + "point_difference": np.round(point_difference, 2).tolist(), + "cumulative_effect": np.round(cumulative_effect, 2).tolist(), # Treatment period data "treatment_dates": dates[start_position_treatment:], - "y_treatment": y_treatment.tolist(), - "point_difference_treatment": point_difference_treatment.tolist(), - "cumulative_effect_treatment": cumulative_effect_treatment, + "y_treatment": np.round(y_treatment, 2).tolist(), + "point_difference_treatment": np.round(point_difference_treatment, 2).tolist(), + "cumulative_effect_treatment": np.round(cumulative_effect_treatment, 2).tolist(), # Confidence bands - "lower_bound": lower_bound.tolist(), - "upper_bound": upper_bound.tolist(), - "lower_bound_pd": lower_bound_pd.tolist(), - "upper_bound_pd": upper_bound_pd.tolist(), - "lower_bound_ce": lower_bound_ce.tolist(), - "upper_bound_ce": upper_bound_ce.tolist(), + "lower_bound": np.round(lower_bound, 2).tolist(), + "upper_bound": np.round(upper_bound, 2).tolist(), + "lower_bound_pd": np.round(lower_bound_pd, 2).tolist(), + "upper_bound_pd": np.round(upper_bound_pd, 2).tolist(), + "lower_bound_ce": np.round(lower_bound_ce, 2).tolist(), + "upper_bound_ce": np.round(upper_bound_ce, 2).tolist(), # Aggregate values - "lower_bound_value": float(lower_bound_value), - "upper_bound_value": float(upper_bound_value), - "prediction_value": float(prediction_value), - "att": float(att), - "incremental": float(incremental), + "lower_bound_value": round(float(lower_bound_value), 2), + "upper_bound_value": round(float(upper_bound_value), 2), + "prediction_value": round(float(prediction_value), 2), + "att": round(float(att), 2), + "incremental": round(float(incremental), 2), # Pre/post treatment periods - "pre_treatment": pre_treatment.tolist(), - "pre_counterfactual": pre_counterfactual.tolist(), - "post_treatment": post_treatment.tolist(), - "post_counterfactual": post_counterfactual.tolist(), + "pre_treatment": np.round(pre_treatment, 2).tolist(), + "pre_counterfactual": np.round(pre_counterfactual, 2).tolist(), + "post_treatment": np.round(post_treatment, 2).tolist(), + "post_counterfactual": np.round(post_counterfactual, 2).tolist(), # Metadata "start_position_treatment": start_position_treatment,