Skip to content
73 changes: 39 additions & 34 deletions bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,14 +389,13 @@ def convert_data(data, dtype, data_order, data_format):
# Secondly, change format of data
if data_format == 'numpy':
return data
elif data_format == 'pandas':
if data_format == 'pandas':
import pandas as pd

if data.ndim == 1:
return pd.Series(data)
else:
return pd.DataFrame(data)
elif data_format == 'cudf':
return pd.DataFrame(data)
if data_format == 'cudf':
import cudf
import pandas as pd

Expand Down Expand Up @@ -512,36 +511,42 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
def print_output(library, algorithm, stages, params, functions,
times, metric_type, metrics, data, alg_instance=None,
alg_params=None):
if params.output_format == 'json':
output = []
for i, stage in enumerate(stages):
result = gen_basic_dict(library, algorithm, stage, params,
data[i], alg_instance, alg_params)
result.update({'time[s]': times[i]})
if metric_type is not None:
if isinstance(metric_type, str):
result.update({f'{metric_type}': metrics[i]})
elif isinstance(metric_type, list):
for ind, val in enumerate(metric_type):
if metrics[ind][i] is not None:
result.update({f'{val}': metrics[ind][i]})
if hasattr(params, 'n_classes'):
result['input_data'].update({'classes': params.n_classes})
if hasattr(params, 'n_clusters'):
if algorithm == 'kmeans':
result['input_data'].update(
{'n_clusters': params.n_clusters})
elif algorithm == 'dbscan':
result.update({'n_clusters': params.n_clusters})
# replace non-string init with string for kmeans benchmarks
if alg_instance is not None:
if 'init' in result['algorithm_parameters'].keys():
if not isinstance(result['algorithm_parameters']['init'], str):
result['algorithm_parameters']['init'] = 'random'
if 'handle' in result['algorithm_parameters'].keys():
del result['algorithm_parameters']['handle']
output.append(result)
print(json.dumps(output, indent=4))
if params.output_format != 'json':
return

output = []
for i, stage in enumerate(stages):
result = gen_basic_dict(library, algorithm, stage, params,
data[i], alg_instance, alg_params)
result.update({'time[s]': times[i]})

if metric_type is not None:
if isinstance(metric_type, str):
result.update({f'{metric_type}': metrics[i]})
elif isinstance(metric_type, list):
for ind, val in enumerate(metric_type):
if metrics[ind][i] is not None:
result.update({f'{val}': metrics[ind][i]})

if hasattr(params, 'n_classes'):
result['input_data'].update({'classes': params.n_classes})
if hasattr(params, 'n_clusters'):
if algorithm == 'kmeans':
result['input_data'].update(
{'n_clusters': params.n_clusters})
elif algorithm == 'dbscan':
result.update({'n_clusters': params.n_clusters})

# replace non-string init with string for kmeans benchmarks
if alg_instance is not None:
if 'init' in result['algorithm_parameters'].keys():
if not isinstance(result['algorithm_parameters']['init'], str):
result['algorithm_parameters']['init'] = 'random'
if 'handle' in result['algorithm_parameters'].keys():
del result['algorithm_parameters']['handle']
output.append(result)

print(json.dumps(output, indent=4))


def run_with_context(params, function):
Expand Down
9 changes: 7 additions & 2 deletions configs/xgboost/xgb_gpu_additional_config.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
{
"common": {
"lib": "xgboost",
"data-format": "cudf",
"data-order": "F",
"dtype": "float32",
"algorithm": "gbt",
"tree-method": "gpu_hist",
"count-dmatrix": "",
"max-depth": 8,
"learning-rate": 0.1,
"reg-lambda": 1,
Expand All @@ -15,6 +13,7 @@
"cases": [
{
"objective": "binary:logistic",
"data-format": "pandas",
"scale-pos-weight": 2.1067817411664587,
"dataset": [
{
Expand All @@ -33,6 +32,7 @@
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"scale-pos-weight": 173.63348001466812,
"dataset": [
{
Expand All @@ -51,6 +51,7 @@
},
{
"objective": "multi:softmax",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -68,6 +69,7 @@
},
{
"objective": "binary:logistic",
"data-format": "pandas",
"scale-pos-weight": 2.0017715678375363,
"dataset": [
{
Expand All @@ -86,6 +88,7 @@
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"scale-pos-weight": 578.2868020304569,
"dataset": [
{
Expand All @@ -104,6 +107,7 @@
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"scale-pos-weight": 1.8872389605086624,
"dataset": [
{
Expand All @@ -122,6 +126,7 @@
},
{
"objective": "reg:squarederror",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand Down
36 changes: 21 additions & 15 deletions configs/xgboost/xgb_gpu_main_config.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"common": {
"lib": "xgboost",
"data-format": "cudf",
"data-order": "F",
"dtype": "float32",
"algorithm": "gbt",
"tree-method": "gpu_hist",
"count-dmatrix": ""
"tree-method": "gpu_hist"
},
"cases": [
{
"objective": "reg:squarederror",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -26,10 +26,11 @@
],
"learning-rate": 0.03,
"max-depth": 6,
"n-estimators": 1000,
"objective": "reg:squarederror"
"n-estimators": 1000
},
{
"objective": "binary:logistic",
"data-format": "pandas",
"dataset": [
{
"source": "npy",
Expand All @@ -53,10 +54,11 @@
"min-child-weight": 0,
"max-depth": 8,
"max-leaves": 256,
"n-estimators": 1000,
"objective": "binary:logistic"
"n-estimators": 1000
},
{
"objective": "binary:logistic",
"data-format": "pandas",
"dataset": [
{
"source": "npy",
Expand All @@ -81,10 +83,11 @@
"max-depth": 8,
"max-leaves": 256,
"n-estimators": 1000,
"objective": "binary:logistic",
"inplace-predict": ""
},
{
"objective": "multi:softprob",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -101,10 +104,11 @@
],
"learning-rate": 0.03,
"max-depth": 6,
"n-estimators": 1000,
"objective": "multi:softprob"
"n-estimators": 1000
},
{
"objective": "multi:softprob",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -122,10 +126,11 @@
"min-child-weight": 1,
"min-split-loss": 0.1,
"max-depth": 8,
"n-estimators": 200,
"objective": "multi:softprob"
"n-estimators": 200
},
{
"objective": "reg:squarederror",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -137,7 +142,6 @@
}
],
"n-estimators": 100,
"objective": "reg:squarederror",
"max-depth": 8,
"scale-pos-weight": 2,
"learning-rate": 0.1,
Expand All @@ -148,6 +152,8 @@
"max-leaves": 256
},
{
"objective": "multi:softprob",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -163,12 +169,13 @@
}
],
"n-estimators": 60,
"objective": "multi:softprob",
"max-depth": 7,
"subsample": 0.7,
"colsample-bytree": 0.7
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -184,7 +191,6 @@
}
],
"n-estimators": 10000,
"objective": "binary:logistic",
"max-depth": 1,
"subsample": 0.5,
"eta": 0.1,
Expand Down
Loading