Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | 1x 12x 12x 12x 12x 12x 12x 12x 12x 12x 12x 12x 10x 10x 106x 106x 10x 10x 10x 10x 9x 9x 10x 10x 28x 19x 19x 19x 10x 10x 10x 10x 10x 10x 10x 12x 94x 1x 1x 93x 93x 94x 105x 105x 105x 105x 75x 105x 1x 1x 104x 104x 92x 94x 12x 4x 4x 4x 12x 17x 17x 12x | /**
* One-Hot Encoder for categorical covariates.
*
* Mirrors scikit-learn's `preprocessing.OneHotEncoder` with
* `drop='first'` and `handle_unknown='ignore'`.
*
* This is a pure-TypeScript implementation, avoiding the need for
* an external Python dependency in the XReg pipeline.
*/
export type Category = number | string;
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface OneHotEncoderState {
/** Sorted list of unique categories seen during fit (before drop). */
categories: Category[];
/** Whether to drop the first category (avoid multicollinearity). */
drop: 'first' | null;
/** Category → index mapping (after drop). */
indexMap: Map<string, number>;
/** Number of output columns (after drop). */
numColumns: number;
}
// ---------------------------------------------------------------------------
// Core implementation
// ---------------------------------------------------------------------------
/**
* One-Hot Encoder for categorical features.
*
* Usage:
* ```typescript
* const encoder = new OneHotEncoder({ drop: 'first' });
* encoder.fit(['a', 'b', 'c', 'a']);
* const encoded = encoder.transform(['b', 'd']); // [[0,1], [0,0]]
* ```
*/
export class OneHotEncoder {
private state: OneHotEncoderState | null = null;
private readonly _drop: 'first' | null;
private readonly _handleUnknown: 'ignore' | 'error';
constructor(
options: {
drop?: 'first' | null;
handleUnknown?: 'ignore' | 'error';
} = {},
) {
this._drop = options.drop !== undefined ? options.drop : 'first';
this._handleUnknown = options.handleUnknown ?? 'ignore';
}
/**
* Fit the encoder to the given categories.
*/
fit(values: Category[]): void {
// Get sorted unique categories
const unique = new Set<string>();
for (const v of values) {
unique.add(String(v));
}
const sorted = Array.from(unique).sort();
// Build index map
const indexMap = new Map<string, number>();
let numColumns = sorted.length;
if (this._drop === 'first') {
numColumns = Math.max(0, sorted.length - 1);
}
let colIdx = 0;
for (let i = 0; i < sorted.length; i++) {
if (this._drop === 'first' && i === 0) continue; // drop first
indexMap.set(sorted[i], colIdx);
colIdx++;
}
this.state = {
categories: sorted,
drop: this._drop,
indexMap,
numColumns,
};
}
/**
* Transform values into one-hot encoded rows.
*
* Each row is an array of length `numColumns`, with exactly one `1`
* (or all zeros for unknown categories when `handleUnknown='ignore'`).
*/
transform(values: Category[]): number[][] {
if (!this.state) {
throw new Error('OneHotEncoder not fitted. Call fit() first.');
}
const { indexMap, numColumns } = this.state;
const result: number[][] = [];
for (const value of values) {
const row = new Array<number>(numColumns).fill(0);
const key = String(value);
const idx = indexMap.get(key);
if (idx !== undefined) {
row[idx] = 1;
} else if (this._handleUnknown === 'error') {
throw new Error(`Unknown category: "${value}"`);
}
// else: handleUnknown='ignore' → all zeros
result.push(row);
}
return result;
}
/**
* Fit and transform in one call.
*/
fitTransform(values: Category[]): number[][] {
this.fit(values);
return this.transform(values);
}
/**
* Number of output columns after encoding.
*/
get numColumns(): number {
return this.state?.numColumns ?? 0;
}
}
|