Skip to content

Commit 3b8db71

Browse files
authored
Merge pull request #7 from AlphaQuantJS/dev
fix: improve CSV reader functionality and core modules
2 parents b9afd69 + d0c9589 commit 3b8db71

16 files changed

Lines changed: 642 additions & 284 deletions

File tree

src/core/dataframe/GroupBy.js

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// src/core/dataframe/GroupBy.js
2+
import { DataFrame } from './DataFrame.js';
3+
import { Series } from './Series.js';
4+
5+
export class GroupBy {
6+
/**
7+
* @param {DataFrame} df - Source DataFrame
8+
* @param {string|string[]} by - Column(s) to group by
9+
*/
10+
constructor(df, by) {
11+
this.df = df;
12+
this.by = Array.isArray(by) ? by : [by];
13+
this._groups = this._createGroups();
14+
}
15+
16+
/**
17+
* Creates groups based on unique values in the grouping columns
18+
* @private
19+
* @returns {Map} - Map of group keys to row indices
20+
*/
21+
_createGroups() {
22+
const groups = new Map();
23+
const rows = this.df.toArray();
24+
25+
// Group rows by the values in the 'by' columns
26+
for (let i = 0; i < rows.length; i++) {
27+
const row = rows[i];
28+
const key = this.by.map((col) => row[col]).join('|');
29+
30+
if (!groups.has(key)) {
31+
groups.set(key, []);
32+
}
33+
34+
groups.get(key).push(i);
35+
}
36+
37+
return groups;
38+
}
39+
40+
/**
41+
* Applies an aggregation function to each group
42+
* @param {Object} aggregations - Map of column names to aggregation functions
43+
* @returns {DataFrame} - DataFrame with aggregated results
44+
*/
45+
agg(aggregations) {
46+
const result = {};
47+
48+
// Add grouping columns to result
49+
for (const col of this.by) {
50+
result[col] = [];
51+
}
52+
53+
// Add aggregation columns to result
54+
for (const col in aggregations) {
55+
result[col] = [];
56+
}
57+
58+
// Process each group
59+
for (const [key, indices] of this._groups.entries()) {
60+
// Extract group key values
61+
const keyValues = key.split('|');
62+
63+
// Add group key values to result
64+
for (let i = 0; i < this.by.length; i++) {
65+
result[this.by[i]].push(keyValues[i]);
66+
}
67+
68+
// Create subset DataFrame for this group
69+
const groupRows = indices.map((idx) => this.df.toArray()[idx]);
70+
const groupDf = DataFrame.fromRows(groupRows);
71+
72+
// Apply aggregations
73+
for (const col in aggregations) {
74+
const aggFunc = aggregations[col];
75+
const aggValue = aggFunc(groupDf.col(col));
76+
result[col].push(aggValue);
77+
}
78+
}
79+
80+
return new DataFrame(result);
81+
}
82+
83+
/**
84+
* Applies a function to each group and returns a DataFrame with the results
85+
* @param {Function} fn - Function to apply to each group
86+
* @returns {DataFrame} - DataFrame with transformed groups
87+
*/
88+
apply(fn) {
89+
const results = [];
90+
91+
// Process each group
92+
for (const [key, indices] of this._groups.entries()) {
93+
// Create subset DataFrame for this group
94+
const groupRows = indices.map((idx) => this.df.toArray()[idx]);
95+
const groupDf = DataFrame.fromRows(groupRows);
96+
97+
// Apply function to group
98+
const result = fn(groupDf);
99+
100+
// Add group key information
101+
const keyValues = key.split('|');
102+
for (let i = 0; i < this.by.length; i++) {
103+
result[this.by[i]] = keyValues[i];
104+
}
105+
106+
results.push(result);
107+
}
108+
109+
return DataFrame.fromRows(results);
110+
}
111+
112+
/**
113+
* Returns the number of items in each group
114+
* @returns {DataFrame} - DataFrame with group counts
115+
*/
116+
count() {
117+
return this.agg({
118+
count: (series) => series.length,
119+
});
120+
}
121+
122+
/**
123+
* Returns the sum of values in each group
124+
* @param {string} column - Column to sum
125+
* @returns {DataFrame} - DataFrame with group sums
126+
*/
127+
sum(column) {
128+
const agg = {};
129+
agg[column] = (series) => series.sum();
130+
return this.agg(agg);
131+
}
132+
133+
/**
134+
* Returns the mean of values in each group
135+
* @param {string} column - Column to average
136+
* @returns {DataFrame} - DataFrame with group means
137+
*/
138+
mean(column) {
139+
const agg = {};
140+
agg[column] = (series) => series.mean();
141+
return this.agg(agg);
142+
}
143+
}

src/core/dataframe/Series.js

Lines changed: 8 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,12 @@ export class Series {
1111
this.name = opts.name || '';
1212

1313
// Create vector from data
14-
if (data?._isVector) {
14+
if (data._isVector) {
1515
this.vector = data;
16-
this._length = data.length;
17-
} else if (Array.isArray(data)) {
18-
// For simplicity in tests, we use a simple array
19-
this._array = data;
20-
this._length = data.length;
21-
} else if (data === undefined) {
22-
// Empty array for initialization
23-
this._array = [];
24-
this._length = 0;
2516
} else {
26-
// For other data types, we try to create a vector
27-
// Note: VectorFactory.from is asynchronous, but we simplify it for tests
28-
this._array = Array.isArray(data) ? data : [];
29-
this._length = this._array.length;
17+
this.vector = VectorFactory.from(data, {
18+
preferArrow: opts.preferArrow ?? shouldUseArrow(data, opts),
19+
});
3020
}
3121
}
3222

@@ -43,52 +33,23 @@ export class Series {
4333
* ------------------------------------------------------------------ */
4434

4535
get length() {
46-
if (this.vector) return this.vector.length;
47-
if (this._array) return this._array.length;
48-
return this._length || 0;
36+
return this.vector.length;
4937
}
5038

5139
get values() {
52-
if (this.vector) return this.vector.toArray();
53-
return this._array || [];
40+
return this.vector.toArray();
5441
}
5542

5643
get(index) {
57-
if (this.vector) return this.vector.get(index);
58-
return this._array ? this._array[index] : undefined;
44+
return this.vector.get(index);
5945
}
6046

6147
/* ------------------------------------------------------------------ *
6248
* Data export *
6349
* ------------------------------------------------------------------ */
6450

6551
toArray() {
66-
if (this.vector) return this.vector.toArray();
67-
return this._array || [];
68-
}
69-
70-
/* ------------------------------------------------------------------ *
71-
* Aggregation methods *
72-
* ------------------------------------------------------------------ */
73-
74-
/**
75-
* Calculates the sum of all values in the Series
76-
* @returns {number} - Sum of all values
77-
*/
78-
sum() {
79-
const data = this.toArray();
80-
return data.reduce((acc, val) => acc + (Number(val) || 0), 0);
81-
}
82-
83-
/**
84-
* Calculates the mean (average) of all values in the Series
85-
* @returns {number} - Mean of all values
86-
*/
87-
mean() {
88-
const data = this.toArray();
89-
if (!data.length) return NaN;
90-
const sum = data.reduce((acc, val) => acc + (Number(val) || 0), 0);
91-
return sum / data.length;
52+
return this.vector.toArray();
9253
}
9354

9455
/* ------------------------------------------------------------------ *

src/core/lazy/optimizer.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export function optimize(plan) {
2525

2626
/* ---------- 1. Merging filter + filter ---------- */
2727
if (step.op === 'filter' && prev.op === 'filter') {
28-
// Сохраняем оригинальные функции, чтобы избежать циклических ссылок
28+
// Save original functions to avoid circular references
2929
const prevFn = prev.fn;
3030
const stepFn = step.fn;
3131
prev.fn = (row) => prevFn(row) && stepFn(row);

src/core/storage/ArrowVector.js

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import { ColumnVector } from './ColumnVector.js';
33
import { Vector } from 'apache-arrow';
44

55
/**
6-
* Обёртка над Apache Arrow Vector.
7-
* Поддерживает get / sum / map и сериализацию.
6+
* Wrapper around Apache Arrow Vector.
7+
* Supports get / sum / map and serialization.
88
*/
99
export class ArrowVector extends ColumnVector {
1010
/**
@@ -17,29 +17,29 @@ export class ArrowVector extends ColumnVector {
1717
}
1818

1919
/* -------------------------------------------------- *
20-
* Доступ к элементам *
20+
* Element access *
2121
* -------------------------------------------------- */
2222

2323
get(i) {
2424
return this._arrow.get(i);
2525
}
2626

2727
/* -------------------------------------------------- *
28-
* Агрегаты *
28+
* Aggregates *
2929
* -------------------------------------------------- */
3030

3131
sum() {
32-
// Arrow Vector имеет reduce
32+
// Arrow Vector has reduce
3333
return this._arrow.reduce((acc, v) => acc + (v ?? 0), 0);
3434
}
3535

3636
/* -------------------------------------------------- *
37-
* Трансформации *
37+
* Transformations *
3838
* -------------------------------------------------- */
3939

4040
/**
41-
* Возвращает новый ArrowVector, к которому применена функция fn.
42-
* Arrow JS Vector уже имеет метод map, который создаёт новый Vector.
41+
* Returns a new ArrowVector with the function fn applied.
42+
* Arrow JS Vector already has a map method that creates a new Vector.
4343
* @param fn
4444
*/
4545
map(fn) {
@@ -48,25 +48,25 @@ export class ArrowVector extends ColumnVector {
4848
}
4949

5050
/* -------------------------------------------------- *
51-
* Сериализация / экспорт *
51+
* Serialization / export *
5252
* -------------------------------------------------- */
5353

54-
/** Быстрое преобразование в JS-массив */
54+
/** Fast conversion to JS array */
5555
toArray() {
5656
return this._arrow.toArray();
5757
}
5858

59-
/** Поддержка JSON.stringify(series) */
59+
/** Support for JSON.stringify(series) */
6060
toJSON() {
6161
return this.toArray();
6262
}
6363

64-
/** Совместимость с ColumnVector.toArrow() */
64+
/** Compatibility with ColumnVector.toArrow() */
6565
toArrow() {
6666
return this._arrow;
6767
}
6868

69-
/** Маркер, что это Arrow-бэкенд (для внутренней логики) */
69+
/** Marker, that this is Arrow backend (for internal logic) */
7070
get isArrow() {
7171
return true;
7272
}

0 commit comments

Comments
 (0)