aboutsummaryrefslogtreecommitdiff
path: root/gensvm/gridsearch.py
diff options
context:
space:
mode:
Diffstat (limited to 'gensvm/gridsearch.py')
-rw-r--r--gensvm/gridsearch.py55
1 files changed, 32 insertions, 23 deletions
diff --git a/gensvm/gridsearch.py b/gensvm/gridsearch.py
index fbb9168..d62ab1d 100644
--- a/gensvm/gridsearch.py
+++ b/gensvm/gridsearch.py
@@ -99,6 +99,23 @@ class _MockEstimator(ClassifierMixin):
return self.predictions
+def _wrap_score(y_pred, y_true, scorers, is_multimetric):
+ start_time = time.time()
+ results = {}
+ # we use -1 to signify missing predictions because numpy has no integer NaN
+ if np.any(y_pred < 0):
+ if is_multimetric:
+ for name in scorers:
+ results[name] = np.nan
+ else:
+ results["score"] = np.nan
+ else:
+ estimator = _MockEstimator(y_pred)
+ results = _score(estimator, None, y_true, scorers, is_multimetric)
+ score_time = time.time() - start_time
+ return results, score_time
+
+
def _format_results(
results,
cv_idx,
@@ -136,46 +153,38 @@ def _format_results(
# Out must be a list of dicts of size n_params x n_splits that iterates
# over the params in the list and for each param iterates over the splits.
- for param, duration, predictions in zip(
- results["params"], results["duration"], results["predictions"]
+ for param, durations, predictions in zip(
+ results["params"], results["durations"], results["predictions"]
):
- for test_idx in np.unique(cv_idx):
+ fit_times = durations
+ is_missing = np.any(np.isnan(durations))
+ for test_idx in sorted(np.unique(cv_idx)):
ret = []
score_time = 0
if return_train_score:
train_pred = predictions[cv_idx != test_idx,]
y_train = true_y[cv_idx != test_idx,]
- train_mock = _MockEstimator(train_pred)
- start_time = time.time()
- train_scores = _score(
- train_mock, None, y_train, scorers, is_multimetric
+ train_score, score_t = _wrap_score(
+ train_pred, y_train, scorers, is_multimetric
)
- score_time += time.time() - start_time
- ret.append(train_scores)
+ score_time += score_t
+ ret.append(train_score)
test_pred = predictions[cv_idx == test_idx,]
y_test = true_y[cv_idx == test_idx,]
- test_mock = _MockEstimator(test_pred)
- start_time = time.time()
- test_scores = _score(
- test_mock, None, y_test, scorers, is_multimetric
+ test_score, score_t = _wrap_score(
+ test_pred, y_test, scorers, is_multimetric
)
- score_time += time.time() - start_time
- ret.append(test_scores)
+ score_time += score_t
+ ret.append(test_score)
if return_n_test_samples:
ret.append(len(y_test))
if return_times:
- # Note, the C library returns the duration for a task (i.e. all
- # splits). The _skkl_format_cv_results() computes the mean of
- # the values, which should represent the average time per
- # split. To compute this correctly, we here divide by the
- # number of splits. Since we calculate the mean later, the mean
- # is still correct, but this is not the exact fit_time for this
- # fold.
- fit_time = duration / n_splits
+ fit_time = fit_times[test_idx]
+ score_time = np.nan if is_missing else score_time
ret.extend([fit_time, score_time])
if return_parameters:
ret.append(param)