Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,18 @@ private List<ReadOnlyDataFrameBuffer<byte>> CloneNullBitMapBuffers()
public PrimitiveColumnContainer<T> Clone<U>(PrimitiveColumnContainer<U> mapIndices, Type type, bool invertMapIndices = false)
where U : unmanaged
{
PrimitiveColumnContainer<T> ret = new PrimitiveColumnContainer<T>(mapIndices.Length);

if (Buffers.Count == 0)
{
return ret;
}

ReadOnlySpan<T> thisSpan = Buffers[0].ReadOnlySpan;
ReadOnlySpan<byte> thisNullBitMapSpan = NullBitMapBuffers[0].ReadOnlySpan;
long minRange = 0;
long maxRange = DataFrameBuffer<T>.MaxCapacity;
long maxCapacity = maxRange;
PrimitiveColumnContainer<T> ret = new PrimitiveColumnContainer<T>(mapIndices.Length);
for (int b = 0; b < mapIndices.Buffers.Count; b++)
{
int index = b;
Expand Down
42 changes: 42 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/DataFrameAssert.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Xunit;

namespace Microsoft.Data.Analysis.Tests
{
public static class DataFrameAssert
{
public static void Equal(DataFrame expected, DataFrame actual)
{
Assert.Equal(expected.Columns.Count, actual.Columns.Count);
Assert.Equal(expected.Rows.Count, actual.Rows.Count);

for (int c = 0; c < expected.Columns.Count; c++)
{
var expectedColumn = expected.Columns[c];
var actualColumn = actual.Columns[c];

Assert.Equal(expectedColumn.Name, actualColumn.Name);
Assert.Equal(expectedColumn.GetType(), actualColumn.GetType());

for (int r = 0; r < expected.Rows.Count; r++)
{
var expectedValue = expectedColumn[r];
var actualValue = actualColumn[r];

if (expectedValue == null || actualValue == null)
{
Assert.Null(expectedValue);
Assert.Null(actualValue);
}
else
{
Assert.Equal(expectedValue, actualValue);
}
}
}
}
}
}
250 changes: 250 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/DataFrameTests.Merge.cs
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,256 @@ public void TestMerge_Issue5778()
MatchRowsOnMergedDataFrame(merge, left, right, 1, 1, 0);
}

public static IEnumerable<object[]> GenerateData_TestMerge_EmptyDataFrames()
{
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3")
),
new DataFrame(
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("R1", new[] { 0, 1, 1 }),
new Int32DataFrameColumn("R2", new[] { 1, 1, 2 }),
new StringDataFrameColumn("R3", new[] { "Z", "Y", "B" })
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Left,
new DataFrame(
new Int32DataFrameColumn("Index_left"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3"),
new Int32DataFrameColumn("Index_right"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
};
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3")
),
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Inner,
new DataFrame(
new Int32DataFrameColumn("Index_left"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3"),
new Int32DataFrameColumn("Index_right"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
};
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3")
),
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Left,
new DataFrame(
new Int32DataFrameColumn("Index_left"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3"),
new Int32DataFrameColumn("Index_right"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
};
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3")
),
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Right,
new DataFrame(
new Int32DataFrameColumn("Index_left"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3"),
new Int32DataFrameColumn("Index_right"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
};
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3")
),
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.FullOuter,
new DataFrame(
new Int32DataFrameColumn("Index_left"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3"),
new Int32DataFrameColumn("Index_right"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
};
}

[Theory]
[MemberData(nameof(GenerateData_TestMerge_EmptyDataFrames))]
public void TestMerge_EmptyDataFrames(DataFrame left, DataFrame right, string[] leftColumns, string[] rightColumns, JoinAlgorithm joinAlgorithm, DataFrame expectedOutput)
{
DataFrame actualOutput = left.Merge(right, leftColumns, rightColumns, joinAlgorithm: joinAlgorithm);

DataFrameAssert.Equal(expectedOutput, actualOutput);
}

public static IEnumerable<object[]> GenerateData_TestMerge_OuterJoinsPreserveUnmatched()
{
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
new StringDataFrameColumn("L3", new[] { "A", "B", "C" })
),
new DataFrame(
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("R1", new[] { 10, 11, 11 }),
new Int32DataFrameColumn("R2", new[] { 1, 1, 2 }),
new StringDataFrameColumn("R3", new[] { "Z", "Y", "B" })
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Left,
new DataFrame(
new Int32DataFrameColumn("Index_left", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
new StringDataFrameColumn("L3", new[] { "A", "B", "C" }),
new Int32DataFrameColumn("Index_right", new int?[] { null, null, null }),
new Int32DataFrameColumn("R1", new int?[] { null, null, null }),
new Int32DataFrameColumn("R2", new int?[] { null, null, null }),
new StringDataFrameColumn("R3", new string[] { null, null, null })
),
};
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
new StringDataFrameColumn("L3", new[] { "A", "B", "C" })
),
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("R1"),
new Int32DataFrameColumn("R2"),
new StringDataFrameColumn("R3")
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Left,
new DataFrame(
new Int32DataFrameColumn("Index_left", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }),
new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }),
new StringDataFrameColumn("L3", new[] { "A", "B", "C" }),
new Int32DataFrameColumn("Index_right", new int?[] { null, null, null }),
new Int32DataFrameColumn("R1", new int?[] { null, null, null }),
new Int32DataFrameColumn("R2", new int?[] { null, null, null }),
new StringDataFrameColumn("R3", new string[] { null, null, null })
),
};
yield return new object[]
{
new DataFrame(
new Int32DataFrameColumn("Index"),
new Int32DataFrameColumn("L1"),
new Int32DataFrameColumn("L2"),
new StringDataFrameColumn("L3")
),
new DataFrame(
new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("R1", new[] { 1, 2, 3 }),
new Int32DataFrameColumn("R2", new[] { 1, 2, 1 }),
new StringDataFrameColumn("R3", new[] { "A", "B", "C" })
),
new string[]{ "L1" },
new string[]{ "R1" },
JoinAlgorithm.Right,
new DataFrame(
new Int32DataFrameColumn("Index_left", new int?[] { null, null, null }),
new Int32DataFrameColumn("L1", new int?[] { null, null, null }),
new Int32DataFrameColumn("L2", new int?[] { null, null, null }),
new StringDataFrameColumn("L3", new string[] { null, null, null }),
new Int32DataFrameColumn("Index_right", new[] { 0, 1, 2 }),
new Int32DataFrameColumn("R1", new[] { 1, 2, 3 }),
new Int32DataFrameColumn("R2", new[] { 1, 2, 1 }),
new StringDataFrameColumn("R3", new[] { "A", "B", "C" })
),
};
}

[Theory]
[MemberData(nameof(GenerateData_TestMerge_OuterJoinsPreserveUnmatched))]
public void TestMerge_OuterJoinsPreserveUnmatched(DataFrame left, DataFrame right, string[] leftColumns, string[] rightColumns, JoinAlgorithm joinAlgorithm, DataFrame expectedOutput)
{
DataFrame actualOutput = left.Merge(right, leftColumns, rightColumns, joinAlgorithm: joinAlgorithm);

DataFrameAssert.Equal(expectedOutput, actualOutput);
}

[Fact]
//Issue 6127
public void TestMerge_CorrectColumnTypes()
Expand Down
Loading