Skip to content

Commit 2802ad8

Browse files
authored
Merge pull request #13 from AlphaQuantJS/dev
fix: mutate method for DataFrame
2 parents 1838874 + 74765b5 commit 2802ad8

2 files changed

Lines changed: 65 additions & 54 deletions

File tree

src/methods/dataframe/transform/mutate.js

Lines changed: 50 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { Series } from '../../../core/dataframe/Series.js';
2-
import { VectorFactory } from '../../../core/storage/VectorFactory.js';
32

43
/**
54
* Creates new columns or modifies existing columns in a DataFrame by applying functions to each row
@@ -18,35 +17,50 @@ export function mutate(df, columnFunctions, options = {}) {
1817
throw new Error('Column functions must be specified as an object');
1918
}
2019

21-
// Get row count
20+
// Get row count and columns for processing
2221
const rowCount = df.rowCount;
22+
const columns = df.columns;
2323

24-
// Convert DataFrame to array of row objects for processing
25-
const rows = df.toArray();
24+
// Process column functions and create new column arrays
25+
const newColumns = {};
2626

27-
// If inplace=true, modify DataFrame directly
28-
if (inplace) {
29-
// Apply mutation functions to each column
30-
for (const [colName, colFunc] of Object.entries(columnFunctions)) {
31-
if (typeof colFunc !== 'function') {
32-
throw new Error(`Value for column '${colName}' must be a function`);
33-
}
27+
// For each column function
28+
for (const [colName, colFunc] of Object.entries(columnFunctions)) {
29+
if (typeof colFunc !== 'function') {
30+
throw new Error(`Value for column '${colName}' must be a function`);
31+
}
32+
33+
// Create array for new column values
34+
const colValues = new Array(rowCount);
3435

35-
// Create new column by applying function to each row
36-
const values = [];
36+
// Process each row
37+
for (let i = 0; i < rowCount; i++) {
38+
// Build row object for this index
39+
const row = {};
40+
for (const col of columns) {
41+
row[col] = df.col(col).get(i);
42+
}
3743

38-
// Process each row
39-
for (let i = 0; i < rowCount; i++) {
40-
// Apply the transformation function with correct parameters
41-
const result = colFunc(rows[i], i, df);
44+
// Apply the transformation function with correct parameters
45+
let result = colFunc(row, i, df);
4246

43-
// Convert null/undefined to NaN
44-
values.push(result === null || result === undefined ? NaN : result);
47+
// Convert null/undefined to NaN
48+
if (result === null || result === undefined) {
49+
result = NaN;
4550
}
4651

47-
// Create new Series for this column
48-
const vector = VectorFactory.from(values);
49-
const series = new Series(vector, { name: colName });
52+
colValues[i] = result;
53+
}
54+
55+
// Store the column values
56+
newColumns[colName] = colValues;
57+
}
58+
59+
if (inplace) {
60+
// Update existing columns and add new ones
61+
for (const [colName, colValues] of Object.entries(newColumns)) {
62+
// Create a new Series for this column
63+
const series = new Series(colValues, { name: colName });
5064

5165
// Update or add Series to DataFrame
5266
df._columns[colName] = series;
@@ -62,36 +76,27 @@ export function mutate(df, columnFunctions, options = {}) {
6276
// Return the original DataFrame
6377
return df;
6478
} else {
65-
// Create a new object to store all columns
79+
// Create a new DataFrame with all columns
6680
const newData = {};
6781

68-
// Copy existing columns
69-
for (const col of df.columns) {
70-
newData[col] = df.col(col).toArray();
71-
}
72-
73-
// Apply mutation functions to each column
74-
for (const [colName, colFunc] of Object.entries(columnFunctions)) {
75-
if (typeof colFunc !== 'function') {
76-
throw new Error(`Value for column '${colName}' must be a function`);
82+
// Copy existing columns that aren't being modified
83+
for (const col of columns) {
84+
if (!(col in newColumns)) {
85+
newData[col] = df.col(col).toArray();
86+
} else {
87+
// Use the new values for modified columns
88+
newData[col] = newColumns[col];
7789
}
90+
}
7891

79-
// Create new column
80-
newData[colName] = [];
81-
82-
// Process each row
83-
for (let i = 0; i < rowCount; i++) {
84-
// Apply the transformation function with correct parameters
85-
const result = colFunc(rows[i], i, df);
86-
87-
// Convert null/undefined to NaN
88-
newData[colName].push(
89-
result === null || result === undefined ? NaN : result,
90-
);
92+
// Add completely new columns
93+
for (const colName of Object.keys(newColumns)) {
94+
if (!columns.includes(colName)) {
95+
newData[colName] = newColumns[colName];
9196
}
9297
}
9398

94-
// Create a new DataFrame with updated data
99+
// Create a new DataFrame with the updated data
95100
return new df.constructor(newData);
96101
}
97102
}

test/methods/dataframe/transform/mutate.test.js

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ describe('DataFrame.mutate', () => {
4343

4444
// Assert
4545
expect(result.columns).toContain('c');
46-
expect(Array.from(result.col('c'))).toEqual([10, 40, 90]);
46+
expect(result.col('c').toArray()).toEqual([10, 40, 90]);
4747
});
4848

4949
test('modifies an existing column with a function', () => {
@@ -56,7 +56,7 @@ describe('DataFrame.mutate', () => {
5656
const result = df.mutate(columnFunctions);
5757

5858
// Assert
59-
expect(Array.from(result.col('a'))).toEqual([2, 4, 6]);
59+
expect(result.col('a').toArray()).toEqual([2, 4, 6]);
6060
});
6161

6262
test('adds multiple columns with functions', () => {
@@ -72,8 +72,8 @@ describe('DataFrame.mutate', () => {
7272
// Assert
7373
expect(result.columns).toContain('c');
7474
expect(result.columns).toContain('d');
75-
expect(Array.from(result.col('c'))).toEqual([10, 40, 90]);
76-
expect(Array.from(result.col('d'))).toEqual([11, 22, 33]);
75+
expect(result.col('c').toArray()).toEqual([10, 40, 90]);
76+
expect(result.col('d').toArray()).toEqual([11, 22, 33]);
7777
});
7878

7979
test('throws error if column functions are not provided', () => {
@@ -101,7 +101,7 @@ describe('DataFrame.mutate', () => {
101101
const result = df.mutate(columnFunctions);
102102

103103
// Assert
104-
expect(Array.from(result.col('index'))).toEqual([0, 1, 2]);
104+
expect(result.col('index').toArray()).toEqual([0, 1, 2]);
105105
});
106106

107107
test('provides DataFrame as third parameter to column functions', () => {
@@ -114,7 +114,7 @@ describe('DataFrame.mutate', () => {
114114
const result = df.mutate(columnFunctions);
115115

116116
// Assert
117-
expect(Array.from(result.col('colCount'))).toEqual([5, 5, 5]);
117+
expect(result.col('colCount').toArray()).toEqual([5, 5, 5]);
118118
});
119119

120120
test('converts null and undefined to NaN in column functions', () => {
@@ -129,10 +129,16 @@ describe('DataFrame.mutate', () => {
129129

130130
// Assert
131131
expect(
132-
Array.from(result.col('nullValues')).every((v) => Number.isNaN(v)),
132+
result
133+
.col('nullValues')
134+
.toArray()
135+
.every((v) => Number.isNaN(v)),
133136
).toBe(true);
134137
expect(
135-
Array.from(result.col('undefinedValues')).every((v) => Number.isNaN(v)),
138+
result
139+
.col('undefinedValues')
140+
.toArray()
141+
.every((v) => Number.isNaN(v)),
136142
).toBe(true);
137143
});
138144

@@ -148,6 +154,6 @@ describe('DataFrame.mutate', () => {
148154
// Assert
149155
expect(result).toBe(df); // Должен вернуть тот же экземпляр DataFrame
150156
expect(df.columns).toContain('c');
151-
expect(Array.from(df.col('c'))).toEqual([10, 40, 90]);
157+
expect(df.col('c').toArray()).toEqual([10, 40, 90]);
152158
});
153159
});

0 commit comments

Comments
 (0)