Cross-Validation for Time Series: Avoiding Data Leakage
Standard cross-validation destroys financial models. Here's how to properly validate time series predictions.
The Problem with Random Splitting
In standard K-fold CV, data is randomly shuffled:
Random Split (WRONG for time series):
Fold 1: [Day 5, Day 12, Day 3] → predict [Day 8, Day 1]
Fold 2: [Day 1, Day 8, Day 15] → predict [Day 3, Day 10]
Problems:
- Temporal leakage: Future data informs past predictions
- Autocorrelation: Nearby samples are correlated
- Overly optimistic: Inflated performance metrics
Walk-Forward Validation
The gold standard for time series: train on past, test on future.
Expanding Window
As increases, training set grows.
Rolling Window
Fixed-size training window slides forward.
1from sklearn.model_selection import TimeSeriesSplit
2import numpy as np
3
4class WalkForwardCV:
5 """
6 Walk-forward cross-validation for time series.
7 """
8 def __init__(self, n_splits=5, test_size=None, gap=0, expanding=True):
9 """
10 Parameters:
11 n_splits -- number of folds
12 test_size -- size of test set (None = 1/n_splits of data)
13 gap -- number of samples to skip between train and test (embargo period)
14 expanding -- if True, training set grows; if False, rolling window
15 """
16 self.n_splits = n_splits
17 self.test_size = test_size
18 self.gap = gap
19 self.expanding = expanding
20
21 def split(self, X, y=None, groups=None):
22 n_samples = len(X)
23
24 if self.test_size is None:
25 test_size = n_samples // (self.n_splits + 1)
26 else:
27 test_size = self.test_size
28
29 # Minimum training size
30 min_train_size = n_samples - (self.n_splits * test_size) - (self.n_splits * self.gap)
31
32 for i in range(self.n_splits):
33 if self.expanding:
34 train_start = 0
35 else:
36 train_start = i * test_size
37
38 train_end = min_train_size + i * test_size
39 test_start = train_end + self.gap
40 test_end = test_start + test_size
41
42 train_indices = np.arange(train_start, train_end)
43 test_indices = np.arange(test_start, min(test_end, n_samples))
44
45 yield train_indices, test_indices
46
47 def get_n_splits(self, X=None, y=None, groups=None):
48 return self.n_splits
49
50
51# Usage example
52cv = WalkForwardCV(n_splits=5, gap=5, expanding=True)
53
54for fold, (train_idx, test_idx) in enumerate(cv.split(X)):
55 X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
56 y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
57
58 print(f"Fold {fold + 1}:")
59 print(f" Train: {train_idx[0]} to {train_idx[-1]} ({len(train_idx)} samples)")
60 print(f" Test: {test_idx[0]} to {test_idx[-1]} ({len(test_idx)} samples)")Purged K-Fold Cross-Validation
When you need K-fold but with temporal structure preserved.
The Embargo Period
To prevent leakage from labels that span multiple days:
Implementation
1class PurgedKFold:
2 """
3 Purged K-Fold cross-validation for financial data.
4
5 Removes training samples that could leak information to test set.
6 """
7 def __init__(self, n_splits=5, embargo_pct=0.01):
8 """
9 Parameters:
10 n_splits -- number of folds
11 embargo_pct -- percentage of data to embargo after each test set
12 """
13 self.n_splits = n_splits
14 self.embargo_pct = embargo_pct
15
16 def split(self, X, y=None, groups=None):
17 n_samples = len(X)
18 indices = np.arange(n_samples)
19 embargo = int(n_samples * self.embargo_pct)
20
21 # Sort by time (assuming index is datetime or sequential)
22 test_size = n_samples // self.n_splits
23
24 for i in range(self.n_splits):
25 test_start = i * test_size
26 test_end = (i + 1) * test_size if i < self.n_splits - 1 else n_samples
27
28 test_indices = indices[test_start:test_end]
29
30 # Purge: remove training samples within embargo of test set
31 train_indices = np.concatenate([
32 indices[:max(0, test_start - embargo)],
33 indices[min(n_samples, test_end + embargo):]
34 ])
35
36 yield train_indices, test_indices
37
38 def get_n_splits(self, X=None, y=None, groups=None):
39 return self.n_splitsCombinatorial Purged Cross-Validation (CPCV)
For more robust validation, combine multiple train/test paths.
1from itertools import combinations
2
3class CombinatorialPurgedCV:
4 """
5 Combinatorial Purged Cross-Validation.
6
7 Tests on all possible combinations of folds while purging.
8 """
9 def __init__(self, n_splits=5, n_test_splits=2, embargo_pct=0.01):
10 self.n_splits = n_splits
11 self.n_test_splits = n_test_splits
12 self.embargo_pct = embargo_pct
13
14 def split(self, X, y=None, groups=None):
15 n_samples = len(X)
16 indices = np.arange(n_samples)
17 embargo = int(n_samples * self.embargo_pct)
18
19 # Split into groups
20 group_size = n_samples // self.n_splits
21 groups_list = [indices[i*group_size:(i+1)*group_size]
22 for i in range(self.n_splits)]
23
24 # Handle remainder
25 remainder = n_samples % self.n_splits
26 if remainder:
27 groups_list[-1] = np.concatenate([
28 groups_list[-1],
29 indices[-remainder:]
30 ])
31
32 # Generate all combinations of test groups
33 for test_group_indices in combinations(range(self.n_splits), self.n_test_splits):
34 test_indices = np.concatenate([groups_list[i] for i in test_group_indices])
35 test_indices.sort()
36
37 # Purge training indices
38 train_mask = np.ones(n_samples, dtype=bool)
39 train_mask[test_indices] = False
40
41 # Apply embargo
42 for idx in test_indices:
43 start = max(0, idx - embargo)
44 end = min(n_samples, idx + embargo + 1)
45 train_mask[start:end] = False
46
47 train_indices = indices[train_mask]
48
49 yield train_indices, test_indices
50
51 def get_n_splits(self, X=None, y=None, groups=None):
52 from math import comb
53 return comb(self.n_splits, self.n_test_splits)Practical Validation Framework
1def validate_model(model, X, y, cv_method='walk_forward', n_splits=5):
2 """
3 Complete model validation with proper metrics.
4 """
5 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
6
7 # Select CV method
8 if cv_method == 'walk_forward':
9 cv = WalkForwardCV(n_splits=n_splits, gap=5)
10 elif cv_method == 'purged':
11 cv = PurgedKFold(n_splits=n_splits, embargo_pct=0.02)
12 elif cv_method == 'cpcv':
13 cv = CombinatorialPurgedCV(n_splits=n_splits, n_test_splits=2)
14
15 results = {
16 'accuracy': [],
17 'precision': [],
18 'recall': [],
19 'f1': [],
20 'train_size': [],
21 'test_size': []
22 }
23
24 for fold, (train_idx, test_idx) in enumerate(cv.split(X)):
25 X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
26 y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
27
28 # Train
29 model.fit(X_train, y_train)
30
31 # Predict
32 y_pred = model.predict(X_test)
33
34 # Metrics
35 results['accuracy'].append(accuracy_score(y_test, y_pred))
36 results['precision'].append(precision_score(y_test, y_pred, zero_division=0))
37 results['recall'].append(recall_score(y_test, y_pred, zero_division=0))
38 results['f1'].append(f1_score(y_test, y_pred, zero_division=0))
39 results['train_size'].append(len(train_idx))
40 results['test_size'].append(len(test_idx))
41
42 # Summary statistics
43 print(f"\nCross-Validation Results ({cv_method}, {n_splits} folds):")
44 print(f"{'Metric':<12} {'Mean':>8} {'Std':>8} {'Min':>8} {'Max':>8}")
45 print("-" * 45)
46
47 for metric in ['accuracy', 'precision', 'recall', 'f1']:
48 values = results[metric]
49 print(f"{metric:<12} {np.mean(values):>8.4f} {np.std(values):>8.4f} "
50 f"{np.min(values):>8.4f} {np.max(values):>8.4f}")
51
52 return resultsKey Takeaways
- Never randomly shuffle time series data
- Walk-forward validation is the gold standard
- Embargo periods prevent label leakage
- Purged K-fold when you need multiple test periods
- CPCV for most robust estimates
- Out-of-sample > In-sample performance always
Proper validation is the difference between backtested profits and real losses!