diff --git a/dbatools.psd1 b/dbatools.psd1 index be793291ac65..547e132af911 100644 --- a/dbatools.psd1 +++ b/dbatools.psd1 @@ -428,6 +428,7 @@ 'Grant-DbaAgPermission', 'Export-DbaCsv', 'Import-DbaCsv', + 'Import-DbaParquet', 'Import-DbaPfDataCollectorSetTemplate', 'Import-DbaRegServer', 'Import-DbaSpConfigure', @@ -436,9 +437,10 @@ 'Install-DbaDarlingData', 'Install-DbaFirstResponderKit', 'Install-DbaInstance', - 'Install-DbaMaintenanceSolution', - 'Install-DbaMultiTool', - 'Install-DbaSqlPackage', + 'Install-DbaMaintenanceSolution', + 'Install-DbaMultiTool', + 'Install-DbaParquet', + 'Install-DbaSqlPackage', 'Install-DbaSqlWatch', 'Install-DbaWhoIsActive', 'Invoke-DbaDbAzSqlTip', diff --git a/dbatools.psm1 b/dbatools.psm1 index 6ea08357ffd5..4a6dfba455ff 100644 --- a/dbatools.psm1 +++ b/dbatools.psm1 @@ -787,6 +787,7 @@ if ($PSVersionTable.PSVersion.Major -lt 5) { 'Resolve-DbaPath', 'Export-DbaCsv', 'Import-DbaCsv', + 'Import-DbaParquet', 'Invoke-DbaDbDataMasking', 'New-DbaDbMaskingConfig', 'Get-DbaDbccSessionBuffer', @@ -883,6 +884,7 @@ if ($PSVersionTable.PSVersion.Major -lt 5) { 'Export-DbaSysDbUserObject', 'Test-DbaDbQueryStore', 'Install-DbaMultiTool', + 'Install-DbaParquet', 'New-DbaAgentOperator', 'Remove-DbaAgentOperator', 'Remove-DbaDbTableData', @@ -1229,4 +1231,4 @@ Register-DbatoolsConfig' } } -[Dataplat.Dbatools.dbaSystem.SystemHost]::ModuleImported = $true \ No newline at end of file +[Dataplat.Dbatools.dbaSystem.SystemHost]::ModuleImported = $true diff --git a/private/configurations/settings/paths.ps1 b/private/configurations/settings/paths.ps1 index 7c2cbf659c80..bad621fea3c7 100644 --- a/private/configurations/settings/paths.ps1 +++ b/private/configurations/settings/paths.ps1 @@ -48,6 +48,9 @@ if (-not (Test-Path -Path $script:AppData)) { # The default path where dbatools stores persistent data Set-DbatoolsConfig -FullName 'Path.DbatoolsData' -Value (Join-DbaPath $script:AppData "PowerShell" "dbatools") -Initialize -Validation string -Handler { } -Description "The path where dbatools stores persistent data on a per user basis." +# The default path where dbatools stores Parquet.NET assemblies +Set-DbatoolsConfig -FullName 'Path.DbatoolsParquet' -Value (Join-DbaPath $script:AppData "PowerShell" "dbatools" "parquet") -Initialize -Validation string -Handler { } -Description "The path where dbatools stores Parquet.NET assemblies." + # The default path where dbatools stores temporary data Set-DbatoolsConfig -FullName 'Path.DbatoolsTemp' -Value $temp -Initialize -Validation string -Handler { } -Description "The path where dbatools stores temporary data." @@ -115,4 +118,4 @@ Set-DbatoolsConfig -FullName 'Path.Managed.Temp' -Value $path_Temp -Initialize - Set-DbatoolsConfig -FullName 'Path.Managed.LocalAppData' -Value $path_LocalAppData -Initialize -Validation 'string' -Description "Path pointing at the LocalAppData path. Used with Get-DbatoolsPath." Set-DbatoolsConfig -FullName 'Path.Managed.AppData' -Value $path_AppData -Initialize -Validation 'string' -Description "Path pointing at the AppData path. Used with Get-DbatoolsPath." Set-DbatoolsConfig -FullName 'Path.Managed.ProgramData' -Value $path_ProgramData -Initialize -Validation 'string' -Description "Path pointing at the ProgramData path. Used with Get-DbatoolsPath." -#endregion Managed Path Stuff \ No newline at end of file +#endregion Managed Path Stuff diff --git a/private/functions/Get-DbaParquetPath.ps1 b/private/functions/Get-DbaParquetPath.ps1 new file mode 100644 index 000000000000..d71a80db918a --- /dev/null +++ b/private/functions/Get-DbaParquetPath.ps1 @@ -0,0 +1,81 @@ +function Get-DbaParquetPath { + <# + .SYNOPSIS + Gets the path to the Parquet.NET assembly. + + .DESCRIPTION + Finds the Parquet.NET assembly used by Import-DbaParquet. Checks the currently loaded assembly first, + then the dbatools data directory populated by Install-DbaParquet, then the legacy bundled module path. + + .PARAMETER EnableException + By default, when something goes wrong we try to catch it, interpret it and give you a friendly warning message. + This avoids overwhelming you with "sea of red" exceptions, but is inconvenient because it basically disables advanced scripting. + Using this switch turns this "nice by default" feature off and enables you to catch exceptions with your own try/catch. + + .NOTES + Tags: Parquet, Import + Author: Jovan Popovic, the dbatools team + Claude + + Website: https://dbatools.io + Copyright: (c) 2026 by dbatools, licensed under MIT + License: MIT https://opensource.org/licenses/MIT + + .OUTPUTS + System.String. The path to the Parquet.NET assembly if found, otherwise $null. + #> + [CmdletBinding()] + param ( + [switch]$Silent, + [switch]$EnableException + ) + + $loadedAssembly = [System.AppDomain]::CurrentDomain.GetAssemblies() | Where-Object { $_.GetName().Name -eq "Parquet" } | Select-Object -First 1 + if ($loadedAssembly -and $loadedAssembly.Location -and (Test-Path -Path $loadedAssembly.Location)) { + Write-Message -Level Verbose -Message "Found loaded Parquet.NET assembly at: $($loadedAssembly.Location)" + return $loadedAssembly.Location + } + + $searchPaths = @() + + $configuredPath = Get-DbatoolsConfigValue -FullName "Path.DbatoolsParquet" + if ($configuredPath) { + $configuredPath = $configuredPath.TrimEnd("/", "\") + $searchPaths += Join-Path -Path $configuredPath -ChildPath "Parquet.dll" + $searchPaths += Join-Path -Path $configuredPath -ChildPath "Parquet.Net.dll" + } + + $dbatoolsData = Get-DbatoolsConfigValue -FullName "Path.DbatoolsData" + if ($dbatoolsData) { + $dbatoolsData = $dbatoolsData.TrimEnd("/", "\") + $searchPaths += Join-Path -Path $dbatoolsData -ChildPath "parquet" | Join-Path -ChildPath "Parquet.dll" + $searchPaths += Join-Path -Path $dbatoolsData -ChildPath "parquet" | Join-Path -ChildPath "Parquet.Net.dll" + } + + if ($script:PSModuleRoot) { + $searchPaths += Join-Path -Path $script:PSModuleRoot -ChildPath "bin" | Join-Path -ChildPath "parquet" | Join-Path -ChildPath "Parquet.dll" + $searchPaths += Join-Path -Path $script:PSModuleRoot -ChildPath "bin" | Join-Path -ChildPath "parquet" | Join-Path -ChildPath "Parquet.Net.dll" + } + + foreach ($path in $searchPaths) { + if (Test-Path -Path $path) { + Write-Message -Level Verbose -Message "Found Parquet.NET assembly at: $path" + return $path + } + } + + $message = @" +Could not find Parquet.NET. Parquet.NET is required for Import-DbaParquet. + +To install Parquet.NET, use: + Install-DbaParquet + +This will download Parquet.NET and its managed dependencies to your dbatools data directory. +"@ + + if ($Silent) { + return $null + } + + Stop-Function -Message $message -Target "Parquet.NET" -EnableException $EnableException + return $null +} diff --git a/public/Import-DbaParquet.ps1 b/public/Import-DbaParquet.ps1 new file mode 100644 index 000000000000..bcde9e9a0747 --- /dev/null +++ b/public/Import-DbaParquet.ps1 @@ -0,0 +1,1114 @@ +function Import-DbaParquet { + <# + .SYNOPSIS + Imports Parquet files into SQL Server tables using high-performance bulk copy operations. + + .DESCRIPTION + Import-DbaParquet uses .NET's SqlBulkCopy class to efficiently load Parquet data into SQL Server tables, handling files of any size from small datasets to multi-gigabyte imports. The function wraps the entire operation in a transaction, so any failure or interruption rolls back all changes automatically. + + Parquet files are read using Parquet.NET, which provides high-performance columnar data access. Unlike CSV, Parquet files contain schema information including column names and data types, which are used automatically during import. + + When the target table doesn't exist, you can use -AutoCreateTable to create it on the fly with string columns using UTF-8 varchar(MAX) by default (or nvarchar(MAX) with -NoUtf8). For production use, create your table first with proper data types and constraints. The function intelligently maps Parquet columns to table columns by name, with fallback to ordinal position when needed. + + Column mapping lets you import specific columns or rename them during import, while schema detection can automatically place data in the correct schema based on filename patterns. + + Perfect for ETL processes, data migrations, or loading reference data where you need reliable, fast imports with proper error handling and transaction safety. + + .PARAMETER Path + Specifies the file path to Parquet files for import. Supports single files, multiple files, or pipeline input from Get-ChildItem. + + .PARAMETER SqlInstance + The SQL Server Instance to import data into. + + .PARAMETER SqlCredential + Login to the target instance using alternative credentials. Accepts PowerShell credentials (Get-Credential). + + Windows Authentication, SQL Server Authentication, Active Directory - Password, and Active Directory - Integrated are all supported. + + For MFA support, please use Connect-DbaInstance. + + .PARAMETER Database + Specifies the target database for the Parquet import. The database must exist on the SQL Server instance. + Use this to direct your data load to the appropriate database, whether it's a staging, ETL, or production database. + + .PARAMETER Schema + Specifies the target schema for the table. Defaults to 'dbo' if not specified. + If the schema doesn't exist, it will be created automatically when using -AutoCreateTable. This parameter takes precedence over -UseFileNameForSchema. + + .PARAMETER Table + Specifies the destination table name. If omitted, uses the Parquet filename as the table name. + The table will be created automatically with -AutoCreateTable using UTF-8 varchar(MAX) columns for strings by default, but for production use, create the table first with proper data types and constraints. + + .PARAMETER Column + Imports only the specified columns from the Parquet file, ignoring all others. Column names must match exactly. + Use this to selectively load data when you only need certain fields, reducing import time and storage requirements. + + .PARAMETER ColumnMap + Maps Parquet columns to different table column names using a hashtable. Keys are Parquet column names, values are table column names. + Use this when your Parquet headers don't match your table structure or when importing from systems with different naming conventions. + + .PARAMETER KeepOrdinalOrder + Maps columns by position rather than by name matching. The first Parquet column goes to the first table column, second to second, etc. + Use this when column names don't match but the order is correct, or when dealing with files that have inconsistent naming. + + .PARAMETER AutoCreateTable + Creates the destination table automatically if it doesn't exist, using Parquet schema types for SQL column definitions. + String columns are created as UTF-8 varchar(MAX) by default (or nvarchar(MAX) with -NoUtf8), then automatically optimized based on actual data lengths. + For production use with specific constraints, create tables manually with appropriate data types, indexes, and constraints. + + .PARAMETER NoUtf8 + Switches AutoCreateTable string columns from UTF-8 varchar to nvarchar. + By default, string columns are created as varchar(MAX) COLLATE Latin1_General_100_BIN2_UTF8. + Use this switch to create string columns as nvarchar(MAX) instead. + + .PARAMETER NoColumnOptimize + Skips the automatic column size optimization that runs after AutoCreateTable imports. + By default, AutoCreateTable creates string columns as UTF-8 varchar(MAX) (or nvarchar(MAX) with -NoUtf8) and then shrinks them to fit the imported data. + Use this switch when importing multiple Parquet files into the same auto-created table, so that later files + with longer values are not rejected due to columns being shrunk to fit only the first file's data. + + .PARAMETER Truncate + Removes all existing data from the destination table before importing. The truncate operation is part of the transaction. + Use this for full data refreshes where you want to replace all existing data with the Parquet contents. + + .PARAMETER NotifyAfter + Sets how often progress notifications are displayed during the import, measured in rows. Defaults to 50,000. + Lower values provide more frequent updates but may slow the import slightly, while higher values reduce overhead for very large files. + + .PARAMETER BatchSize + Controls how many rows are sent to SQL Server in each batch during the bulk copy operation. Defaults to 50,000. + Larger batches are generally more efficient but use more memory, while smaller batches provide better granular control and error isolation. + + .PARAMETER UseFileNameForSchema + Extracts the schema name from the filename using the first period as a delimiter. For example, 'sales.customers.parquet' imports to the 'sales' schema. + If no period is found, defaults to 'dbo'. The schema will be created if it doesn't exist. This parameter is ignored if -Schema is explicitly specified. + + .PARAMETER TableLock + Acquires an exclusive table lock for the duration of the import instead of using row-level locks. + Improves performance for large imports by reducing lock overhead, but blocks other operations on the table during the import. + + .PARAMETER CheckConstraints + Enforces check constraints, foreign keys, and other table constraints during the import. By default, constraints are not checked for performance. + Enable this when data integrity validation is critical, but expect slower import performance. + + .PARAMETER FireTriggers + Executes INSERT triggers on the destination table during the bulk copy operation. By default, triggers are not fired for performance. + Use this when your triggers perform essential business logic like auditing, logging, or cascading updates that must run during import. + + .PARAMETER KeepIdentity + Preserves identity column values from the Parquet file instead of generating new ones. By default, the destination assigns new identity values. + Use this when migrating data and you need to maintain existing primary key values or referential integrity. + + .PARAMETER NoProgress + Disables the progress bar display during import to improve performance, especially for very large files. + Use this in automated scripts or when maximum import speed is more important than visual progress feedback. + + .PARAMETER NoTransaction + Disables the automatic transaction wrapper, allowing partial imports to remain committed even if the operation fails. + Use this for very large imports where you want to commit data in batches, but be aware that failed imports may leave partial data. + + .PARAMETER StaticColumns + A hashtable of static column names and values to add to every row. + Useful for tagging imported data with metadata like source filename or import timestamp. + Keys are column names, values are the static values to insert. + Example: @{ SourceFile = "data.parquet"; ImportDate = (Get-Date) } + + .PARAMETER WhatIf + Shows what would happen if the command were to run. No actions are actually performed. + + .PARAMETER Confirm + Prompts you for confirmation before executing any changing operations within the command. + + .PARAMETER EnableException + By default, when something goes wrong we try to catch it, interpret it and give you a friendly warning message. + This avoids overwhelming you with "sea of red" exceptions, but is inconvenient because it basically disables advanced scripting. + Using this switch turns this "nice by default" feature off and enables you to catch exceptions with your own try/catch. + + .NOTES + Tags: Import, Data, Utility + Author: Jovan Popovic, the dbatools team + Claude + + Website: https://dbatools.io + Copyright: (c) 2026 by dbatools, licensed under MIT + License: MIT https://opensource.org/licenses/MIT + + Requires Parquet.NET. Use Install-DbaParquet to install the external Parquet.NET assemblies if they are not already available. + + .LINK + https://dbatools.io/Import-DbaParquet + + + .OUTPUTS + PSCustomObject + + Returns one object per Parquet file imported. Each object contains comprehensive metrics about the import operation. + + Properties: + - ComputerName: The computer name of the SQL Server instance where the Parquet file was imported + - InstanceName: The SQL Server instance name + - SqlInstance: The full SQL Server instance name (computer\instance format) + - Database: The database name where data was imported + - Table: The table name where Parquet data was loaded + - Schema: The schema name containing the target table + - RowsCopied: The total number of rows successfully copied from the Parquet file (int64) + - Elapsed: The elapsed time for the import operation in elapsed time format (automatically formatted as HH:mm:ss.fff) + - RowsPerSecond: The average import rate calculated as total rows divided by elapsed time in seconds (decimal) + - Path: The full file system path of the imported Parquet file + + .EXAMPLE + PS C:\> Import-DbaParquet -Path C:\temp\housing.parquet -SqlInstance sql001 -Database markets + + Imports the entire housing.parquet to the SQL "markets" database on a SQL Server named sql001. + + Since a table name was not specified, the table name is automatically determined from filename as "housing". + + .EXAMPLE + PS C:\> Get-ChildItem -Path \\FileServer\parquets -Filter *.parquet | Import-DbaParquet -SqlInstance sql001, sql002 -Database tempdb -AutoCreateTable + + Imports every Parquet file in the \\FileServer\parquets path into both sql001 and sql002's tempdb database. Each Parquet file will be imported into an automatically determined table name. + + .EXAMPLE + PS C:\> Get-ChildItem -Path \\FileServer\parquets -Filter *.parquet | Import-DbaParquet -SqlInstance sql001, sql002 -Database tempdb -AutoCreateTable -WhatIf + + Shows what would happen if the command were to be executed + + .EXAMPLE + PS C:\> Import-DbaParquet -Path c:\temp\dataset.parquet -SqlInstance sql2016 -Database tempdb -Column Name, Address, Mobile + + Import only Name, Address and Mobile even if other columns exist. All other columns are ignored and therefore null or default values. + + .EXAMPLE + PS C:\> Import-DbaParquet -Path C:\temp\schema.data.parquet -SqlInstance sql2016 -database tempdb -UseFileNameForSchema + + Will import the contents of C:\temp\schema.data.parquet to table 'data' in schema 'schema'. + + .EXAMPLE + PS C:\> Import-DbaParquet -Path C:\temp\schema.data.parquet -SqlInstance sql2016 -database tempdb -UseFileNameForSchema -Table testtable + + Will import the contents of C:\temp\schema.data.parquet to table 'testtable' in schema 'schema'. + + .EXAMPLE + PS C:\> $columns = @{ + >> Text = "FirstName" + >> Number = "PhoneNumber" + >> } + PS C:\> Import-DbaParquet -Path c:\temp\supersmall.parquet -SqlInstance sql2016 -Database tempdb -ColumnMap $columns + + The Parquet field 'Text' is inserted into SQL column 'FirstName' and Parquet field Number is inserted into the SQL Column 'PhoneNumber'. All other columns are ignored and therefore null or default values. + + .EXAMPLE + PS C:\> Import-DbaParquet -Path C:\temp\refresh.parquet -SqlInstance sql001 -Database tempdb -Table LookupData -Truncate + + Performs a full data refresh by truncating the existing table before importing. The truncate and import + operations are wrapped in a transaction, so if the import fails, the original data is preserved. + + .EXAMPLE + PS C:\> $static = @{ SourceFile = "sales_2024.parquet"; ImportDate = (Get-Date); Region = "EMEA" } + PS C:\> Import-DbaParquet -Path C:\temp\sales.parquet -SqlInstance sql001 -Database sales -Table SalesData -StaticColumns $static -AutoCreateTable + + Imports Parquet data and adds three static columns (SourceFile, ImportDate, Region) to every row. + This is useful for tracking data lineage and tagging imported records with metadata. + + .EXAMPLE + PS C:\> Import-DbaParquet -Path C:\temp\quickload.parquet -SqlInstance sql001 -Database tempdb -Table QuickData -AutoCreateTable + + Imports quickload.parquet with AutoCreateTable. After import completes, column sizes are automatically + optimized by querying actual max lengths and altering columns from varchar(MAX) to padded sizes + like varchar(16), varchar(32), varchar(64), etc. + #> + [CmdletBinding(SupportsShouldProcess, ConfirmImpact = "Low")] + param ( + [parameter(ValueFromPipeline)] + [ValidateNotNullOrEmpty()] + [Alias("Parquet", "FullPath")] + [object[]]$Path, + [Parameter(Mandatory)] + [DbaInstanceParameter[]]$SqlInstance, + [PSCredential]$SqlCredential, + [Parameter(Mandatory)] + [string]$Database, + [string]$Table, + [string]$Schema, + [switch]$Truncate, + [int]$BatchSize = 50000, + [int]$NotifyAfter = 50000, + [switch]$TableLock, + [switch]$CheckConstraints, + [switch]$FireTriggers, + [switch]$KeepIdentity, + [string[]]$Column, + [hashtable]$ColumnMap, + [switch]$KeepOrdinalOrder, + [switch]$AutoCreateTable, + [switch]$NoUtf8, + [switch]$NoColumnOptimize, + [switch]$NoProgress, + [switch]$UseFileNameForSchema, + [switch]$NoTransaction, + [hashtable]$StaticColumns, + [switch]$EnableException + ) + begin { + $scriptelapsed = [System.Diagnostics.Stopwatch]::StartNew() + $StoreStringAsUtf8 = -not $NoUtf8 + + if ($PSBoundParameters.UseFileNameForSchema -and $PSBoundParameters.Schema) { + Write-Message -Level Warning -Message "Schema and UseFileNameForSchema parameters both specified. UseSchemaInFileName will be ignored." + } + + # Load Parquet.NET assembly + $parquetAssembly = [System.AppDomain]::CurrentDomain.GetAssemblies() | Where-Object { $_.GetName().Name -eq "Parquet" } + if (-not $parquetAssembly) { + $parquetDllPath = Get-DbaParquetPath -EnableException:$EnableException + if (-not $parquetDllPath) { + return + } + + $parquetDirectory = Split-Path -Path $parquetDllPath -Parent + $script:dbatools_ParquetAssemblyPath = $parquetDirectory + if (-not $script:dbatools_ParquetAssemblyResolveRegistered) { + $script:dbatools_ParquetAssemblyResolve = [System.ResolveEventHandler] { + param($sender, $resolveArgs) + + $assemblyName = (New-Object -TypeName System.Reflection.AssemblyName -ArgumentList $resolveArgs.Name).Name + $candidate = Join-Path -Path $script:dbatools_ParquetAssemblyPath -ChildPath "$assemblyName.dll" + if (Test-Path -Path $candidate) { + return [System.Reflection.Assembly]::LoadFrom($candidate) + } + return $null + } + [System.AppDomain]::CurrentDomain.add_AssemblyResolve($script:dbatools_ParquetAssemblyResolve) + $script:dbatools_ParquetAssemblyResolveRegistered = $true + } + + Get-ChildItem -Path $parquetDirectory -Filter "*.dll" | Where-Object Name -notin "Parquet.dll", "Parquet.Net.dll" | Sort-Object Name | ForEach-Object { + try { + Add-Type -Path $PSItem.FullName -ErrorAction Stop + } catch { + Write-Message -Level Verbose -Message "Could not preload Parquet.NET dependency $($PSItem.Name): $($_.Exception.Message)" + } + } + + try { + Add-Type -Path $parquetDllPath -ErrorAction Stop + } catch { + Stop-Function -Message "Could not load Parquet.NET from $parquetDllPath. Run Install-DbaParquet to install the required assemblies." -ErrorRecord $_ -EnableException $EnableException + return + } + } + + function Get-ParquetReader { + param([string]$Path) + $stream = [System.IO.File]::OpenRead($Path) + try { + $reader = [Parquet.ParquetReader]::CreateAsync($stream).GetAwaiter().GetResult() + return $reader + } catch { + $stream.Dispose() + throw + } + } + + function Get-ParquetDataFields { + param($Reader) + $dataFields = $Reader.Schema.GetDataFields() + # Fail-fast on nested/complex types + foreach ($df in $dataFields) { + if ($df.ClrType -eq [System.Object] -or + $df.ClrType.IsArray -and $df.ClrType -ne [byte[]]) { + Stop-Function -Message "Nested Parquet types not supported: $($df.Name) (type: $($df.ClrType.FullName))" -EnableException $true + return + } + } + return $dataFields + } + + function Get-ParquetDataTable { + param( + $Reader, + [string[]]$Column, + [hashtable]$StaticColumns, + [int]$RowGroupIndex + ) + + function Convert-ParquetValueForColumn { + param( + [object]$Value, + [System.Type]$TargetType, + [string]$ColumnName + ) + + if ($null -eq $Value) { + return [DBNull]::Value + } + + if ($TargetType -eq [byte[]]) { + if ($Value -is [byte[]]) { + return , $Value + } + + if ($Value -is [System.Array]) { + $converted = New-Object byte[] ($Value.Length) + for ($index = 0; $index -lt $Value.Length; $index++) { + $item = $Value[$index] + + if ($item -is [byte]) { + $converted[$index] = $item + continue + } + + if ($item -is [int] -and $item -ge [byte]::MinValue -and $item -le [byte]::MaxValue) { + $converted[$index] = [byte]$item + continue + } + + Stop-Function -Message "Could not convert value in column $ColumnName to byte array. Element type: $($item.GetType().FullName)." -EnableException $true + return + } + + return , $converted + } + + Stop-Function -Message "Could not convert value in column $ColumnName from type $($Value.GetType().FullName) to byte array." -EnableException $true + return + } + + return $Value + } + + $dataFields = $Reader.Schema.GetDataFields() + $dataTable = New-Object System.Data.DataTable + + foreach ($df in $dataFields) { + if ($Column -and $Column -notcontains $df.Name) { continue } + [void]$dataTable.Columns.Add($df.Name, $df.ClrType) + } + + if ($StaticColumns) { + foreach ($key in $StaticColumns.Keys) { + if (-not $dataTable.Columns.Contains($key)) { + [void]$dataTable.Columns.Add($key, [string]) + } + } + } + + $rowGroupReader = $Reader.OpenRowGroupReader($RowGroupIndex) + $columns = @{ } + $rowCount = 0 + foreach ($df in $dataFields) { + if ($Column -and $Column -notcontains $df.Name) { continue } + $col = $rowGroupReader.ReadColumnAsync($df).GetAwaiter().GetResult() + $columns[$df.Name] = $col.Data + $rowCount = $col.Data.Length + } + + for ($row = 0; $row -lt $rowCount; $row++) { + $dataRow = $dataTable.NewRow() + foreach ($name in $columns.Keys) { + $val = $columns[$name][$row] + $targetType = $dataTable.Columns[$name].DataType + $dataRow[$name] = Convert-ParquetValueForColumn -Value $val -TargetType $targetType -ColumnName $name + } + if ($StaticColumns) { + foreach ($key in $StaticColumns.Keys) { + $dataRow[$key] = $StaticColumns[$key] + } + } + [void]$dataTable.Rows.Add($dataRow) + } + + if ($rowGroupReader -is [System.IDisposable]) { $rowGroupReader.Dispose() } + + return , $dataTable + } + + function Convert-ParquetTypeToSqlType { + param([object]$DataField) + $clrType = $DataField.ClrType + switch ($clrType.FullName) { + "System.String" { + $stringLength = "MAX" + if ($DataField.SchemaElement -and + $DataField.SchemaElement.Type -eq "FIXED_LEN_BYTE_ARRAY" -and + $DataField.SchemaElement.TypeLength -gt 0) { + $maxAllowed = if ($StoreStringAsUtf8) { 8000 } else { 4000 } + $typeLength = [int]$DataField.SchemaElement.TypeLength + if ($typeLength -le $maxAllowed) { + $stringLength = $typeLength + } + } + + if ($StoreStringAsUtf8) { + return "varchar($stringLength) COLLATE Latin1_General_100_BIN2_UTF8" + } + return "nvarchar($stringLength)" + } + "System.Int32" { return "int" } + "System.Int64" { return "bigint" } + "System.Int16" { return "smallint" } + "System.Byte" { return "tinyint" } + "System.Boolean" { return "bit" } + "System.Single" { return "real" } + "System.Double" { return "float" } + "System.Decimal" { + $precision = 38 + $scale = 18 + + if ($DataField.GetType().Name -eq "DecimalDataField") { + $precision = $DataField.Precision + $scale = $DataField.Scale + } + + if ($precision -gt 38) { + $precision = 38 + } + + if ($scale -gt $precision) { + $scale = $precision + } + + return "decimal($precision,$scale)" + } + "System.DateTime" { return "datetime2(6)" } + "System.DateTimeOffset" { return "datetimeoffset" } + "System.TimeSpan" { return "time" } + "System.Byte[]" { + if ($DataField.SchemaElement -and + $DataField.SchemaElement.Type -eq "FIXED_LEN_BYTE_ARRAY" -and + $DataField.SchemaElement.TypeLength -gt 0) { + return "varbinary($($DataField.SchemaElement.TypeLength))" + } + return "varbinary(MAX)" + } + "System.Guid" { return "uniqueidentifier" } + default { + Stop-Function -Message "Unsupported Parquet type: $($clrType.FullName)" -EnableException $true + return + } + } + } + + function New-SqlTable { + <# + .SYNOPSIS + Creates new Table using existing SqlCommand. + + SQL datatypes are inferred from Parquet schema data fields. + String columns use UTF-8 varchar(MAX) by default (or nvarchar(MAX) when requested) and can be post-optimized. + + .EXAMPLE + New-SqlTable -DataFields $dataFields -SqlConn $sqlconn -Transaction $transaction + + .OUTPUTS + Creates new table + #> + [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseShouldProcessForStateChangingFunctions", "")] + param ( + [Parameter(Mandatory)] + [object[]]$DataFields, + [Microsoft.Data.SqlClient.SqlConnection]$sqlconn, + [Microsoft.Data.SqlClient.SqlTransaction]$transaction, + [hashtable]$StaticColumns + ) + + $sqldatatypes = @() + foreach ($df in $DataFields) { + $sqlType = Convert-ParquetTypeToSqlType -DataField $df + $sqldatatypes += "[$($df.Name)] $sqlType NULL" + } + + # Static columns ride along on every row in the DataTable, so they must exist in the + # destination too — otherwise SqlBulkCopy.ColumnMappings.Add fails with "does not match + # up with any column in the source or destination". Use the same string column shape + # the rest of AutoCreateTable picks for parquet strings. + if ($StaticColumns) { + $stringSqlType = if ($StoreStringAsUtf8) { "varchar(MAX) COLLATE Latin1_General_100_BIN2_UTF8" } else { "nvarchar(MAX)" } + foreach ($key in $StaticColumns.Keys) { + $sqldatatypes += "[$key] $stringSqlType NULL" + } + } + + $sql = "BEGIN CREATE TABLE [$schema].[$table] ($($sqldatatypes -join ", ")) END" + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($sql, $sqlconn, $transaction) + + try { + $null = $sqlcmd.ExecuteNonQuery() + } catch { + Stop-Function -Continue -Message "Failed to execute $sql" -ErrorRecord $_ + } + + Write-Message -Level Verbose -Message "Successfully created table $schema.$table with the following column definitions:`n $($sqldatatypes -join "`n ")" + Write-Message -Level Verbose -Message "This is inefficient but allows the script to import without issues." + Write-Message -Level Verbose -Message "Consider creating the table first using best practices if the data will be used in production." + } + + function Optimize-ColumnSize { + <# + .SYNOPSIS + Optimizes varchar(MAX) columns to appropriate sizes after import. + + .DESCRIPTION + Queries MAX(LEN()) for each column and ALTERs to appropriate varchar sizes. + This is called automatically when AutoCreateTable is used. + + .NOTES + Requires SQL Server 2005 or higher. This is not a limitation since varchar(MAX) + was introduced in SQL Server 2005 - the feature this optimizes cannot exist on SQL 2000. + #> + [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseShouldProcessForStateChangingFunctions", "")] + param ( + [Microsoft.Data.SqlClient.SqlConnection]$SqlConn, + [string]$Schema, + [string]$Table + ) + + Write-Message -Level Verbose -Message "Optimizing column sizes for $Schema.$Table..." + + # Get column names and their current types from the table + $getColumnsSql = @" +SELECT c.name AS ColumnName, t.name AS TypeName, c.collation_name AS CollationName +FROM sys.columns c +INNER JOIN sys.types t ON c.user_type_id = t.user_type_id +WHERE c.object_id = OBJECT_ID(@tableName) + AND t.name IN ('nvarchar', 'varchar') + AND c.max_length = -1 +"@ + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($getColumnsSql, $SqlConn) + $null = $sqlcmd.Parameters.AddWithValue("tableName", "[$Schema].[$Table]") + + $columns = @{ } + $reader = $sqlcmd.ExecuteReader() + while ($reader.Read()) { + $columns[$reader["ColumnName"]] = [PSCustomObject]@{ + TypeName = $reader["TypeName"] + CollationName = if ($reader["CollationName"] -is [DBNull]) { $null } else { [string]$reader["CollationName"] } + } + } + $reader.Close() + + if ($columns.Count -eq 0) { + Write-Message -Level Verbose -Message "No nvarchar(MAX)/varchar(MAX) columns to optimize." + return + } + + # Build MAX(LEN()) query for all columns + $columnNames = @($columns.Keys) + $maxLenSelects = $columnNames | ForEach-Object { "MAX(LEN([$_])) AS [$_]" } + $maxLenSql = "SELECT $($maxLenSelects -join ", ") FROM [$Schema].[$Table]" + + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($maxLenSql, $SqlConn) + $reader = $sqlcmd.ExecuteReader() + + $maxLengths = @{ } + if ($reader.Read()) { + foreach ($col in $columnNames) { + $val = $reader[$col] + if ($val -is [DBNull] -or $null -eq $val) { + $maxLengths[$col] = 1 + } else { + $maxLengths[$col] = [int]$val + } + } + } + $reader.Close() + + # ALTER each column to appropriate size, preserving original type + foreach ($col in $columnNames) { + $maxLen = $maxLengths[$col] + if ($maxLen -eq 0) { $maxLen = 1 } + + # Preserve the original column type (nvarchar stays nvarchar, varchar stays varchar) + # This is safer than trying to detect Unicode - no risk of data loss + $baseType = $columns[$col].TypeName + $maxAllowed = if ($baseType -eq "nvarchar") { 4000 } else { 8000 } + + if ($maxLen -gt $maxAllowed) { + # Keep as MAX if truly needed + Write-Message -Level Verbose -Message "Column [$col] requires $baseType(MAX) - max length is $maxLen" + continue + } + + # Add padding to the length to allow for future data that may be slightly longer + # This prevents issues when re-importing to the same table with -Truncate + # Round up to common sizes: 16, 32, 64, 128, 256, 512, 1024, 2048, 4000/8000 + $paddedLen = switch ($true) { + ($maxLen -le 16) { 16; break } + ($maxLen -le 32) { 32; break } + ($maxLen -le 64) { 64; break } + ($maxLen -le 128) { 128; break } + ($maxLen -le 256) { 256; break } + ($maxLen -le 512) { 512; break } + ($maxLen -le 1024) { 1024; break } + ($maxLen -le 2048) { 2048; break } + default { $maxAllowed } + } + # Ensure we don't exceed the max allowed + if ($paddedLen -gt $maxAllowed) { $paddedLen = $maxAllowed } + + $newType = "${baseType}($paddedLen)" + $collateClause = "" + if ($columns[$col].CollationName) { + $collateClause = " COLLATE $($columns[$col].CollationName)" + } + # SQL Server 2008 R2 and earlier require NULL/NOT NULL in ALTER COLUMN + # Original columns were varchar(MAX) NULL, so we preserve NULL + $alterSql = "ALTER TABLE [$Schema].[$Table] ALTER COLUMN [$col] $newType$collateClause NULL" + + Write-Message -Level Verbose -Message "Optimizing [$col]: $baseType(MAX) -> $newType (max data length: $maxLen, padded to: $paddedLen)" + + try { + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($alterSql, $SqlConn) + $null = $sqlcmd.ExecuteNonQuery() + } catch { + Write-Message -Level Warning -Message "Failed to optimize column [$col]: $($_.Exception.Message)" + } + } + + Write-Message -Level Verbose -Message "Column size optimization complete." + } + + function ConvertTo-DotnetType { + param ( + [string]$DataType + ) + + switch ($DataType) { + "BigInt" { return [System.Int64] } + "Binary" { return [System.Byte[]] } + "VarBinary" { return [System.Byte[]] } + "Bit" { return [System.Boolean] } + "Char" { return [System.String] } + "VarChar" { return [System.String] } + "NChar" { return [System.String] } + "NVarChar" { return [System.String] } + "DateTime" { return [System.DateTime] } + "SmallDateTime" { return [System.DateTime] } + "Date" { return [System.DateTime] } + "Time" { return [System.DateTime] } + "DateTime2" { return [System.DateTime] } + "Decimal" { return [System.Decimal] } + "Numeric" { return [System.Decimal] } + "Money" { return [System.Decimal] } + "SmallMoney" { return [System.Decimal] } + "Float" { return [System.Double] } + "Int" { return [System.Int32] } + "Real" { return [System.Single] } + "UniqueIdentifier" { return [System.Guid] } + "SmallInt" { return [System.Int16] } + "TinyInt" { return [System.Byte] } + "Xml" { return [System.String] } + default { throw "Unsupported SMO DataType: $($DataType)" } + } + } + + function Get-TableDefinitionFromInfoSchema { + param ( + [string]$table, + [string]$schema, + $sqlconn, + $transaction + ) + + $query = "SELECT c.COLUMN_NAME, c.DATA_TYPE, c.ORDINAL_POSITION - 1 FROM INFORMATION_SCHEMA.COLUMNS AS c WHERE TABLE_SCHEMA = @schema AND TABLE_NAME = @table;" + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($query, $sqlconn, $transaction) + $null = $sqlcmd.Parameters.AddWithValue("schema", $schema) + $null = $sqlcmd.Parameters.AddWithValue("table", $table) + + $result = @() + try { + $reader = $sqlcmd.ExecuteReader() + foreach ($dataRow in $reader) { + $result += [PSCustomObject]@{ + Name = $dataRow[0] + DataType = $dataRow[1] + Index = $dataRow[2] + } + } + $reader.Close() + } catch { + Write-Message -Level Debug -Message "Error querying table definition: $_" + } + + return $result + } + + Write-Message -Level Verbose -Message "Started at $(Get-Date)" + } + process { + foreach ($filename in $Path) { + if (-not $PSBoundParameters.ColumnMap) { + $ColumnMap = $null + } + + if ($filename.FullName) { + $filename = $filename.FullName + } + + if (-not (Test-Path -Path $filename)) { + Stop-Function -Continue -Message "$filename cannot be found" + } + + $file = (Resolve-Path -Path $filename).ProviderPath + + $filename = [IO.Path]::GetFileNameWithoutExtension($file) + + # Automatically generate Table name if not specified + if (-not $PSBoundParameters.Table) { + $periodFound = $false + if ($filename.IndexOf(".") -ne -1) { $periodFound = $true } + + if ($UseFileNameForSchema -and $periodFound -and -not $PSBoundParameters.Schema) { + $table = $filename.Remove(0, $filename.IndexOf(".") + 1) + Write-Message -Level Verbose -Message "Table name not specified, using $table from file name" + } else { + $table = $filename + Write-Message -Level Verbose -Message "Table name not specified, using $table" + } + } + + # Use dbo as schema name if not specified in params, or as first string before a period in filename + if (-not ($PSBoundParameters.Schema)) { + if ($UseFileNameForSchema) { + if ($filename.IndexOf(".") -eq -1) { + $schema = "dbo" + Write-Message -Level Verbose -Message "Schema not specified, and not found in file name, using dbo" + } else { + $schema = $filename.SubString(0, $filename.IndexOf(".")) + Write-Message -Level Verbose -Message "Schema detected in filename, using $schema" + } + } else { + $schema = "dbo" + Write-Message -Level Verbose -Message "Schema not specified, using dbo" + } + } + + foreach ($instance in $SqlInstance) { + $elapsed = [System.Diagnostics.Stopwatch]::StartNew() + # Open Connection to SQL Server + # Detect if user passed an already-open connection that we should preserve + $startedWithAnOpenConnection = $false + try { + # Check if user passed a Server SMO object with an open connection + # Following the pattern from Invoke-DbaQuery.ps1 + if ($instance.InputObject.GetType().Name -eq "Server" -and + (-not $SqlCredential) -and + ($instance.InputObject.ConnectionContext.DatabaseName -eq $Database -or -not $Database)) { + $startedWithAnOpenConnection = $true + Write-Message -Level Debug -Message "User provided an open connection - will preserve it after import" + } + + $server = Connect-DbaInstance -SqlInstance $instance -SqlCredential $SqlCredential -Database $Database -MinimumVersion 9 + $sqlconn = $server.ConnectionContext.SqlConnectionObject + if ($sqlconn.State -ne "Open") { + $sqlconn.Open() + } + } catch { + Stop-Function -Message "Failure" -Category ConnectionError -ErrorRecord $_ -Target $instance -Continue + } + + if (-not $NoTransaction) { + if ($PSCmdlet.ShouldProcess($instance, "Starting transaction in $Database")) { + # Everything will be contained within 1 transaction, even creating a new table if required + # and truncating the table, if specified. + $transaction = $sqlconn.BeginTransaction() + } + } + + # Ensure Schema exists + $sql = "SELECT COUNT(*) FROM sys.schemas WHERE name = @schema" + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($sql, $sqlconn, $transaction) + $null = $sqlcmd.Parameters.AddWithValue("schema", $schema) + # If Schema doesn't exist create it + # Defaulting to dbo. + if (($sqlcmd.ExecuteScalar()) -eq 0) { + if (-not $AutoCreateTable) { + Stop-Function -Continue -Message "Schema $Schema does not exist and AutoCreateTable was not specified" + } + $sql = "CREATE SCHEMA [$schema] AUTHORIZATION dbo" + if ($PSCmdlet.ShouldProcess($instance, "Creating schema $schema")) { + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($sql, $sqlconn, $transaction) + try { + $null = $sqlcmd.ExecuteNonQuery() + } catch { + Stop-Function -Continue -Message "Could not create $schema" -ErrorRecord $_ + } + } + } + + # Ensure table or view exists + $sql = "SELECT COUNT(*) FROM sys.tables WHERE name = @table AND schema_id = schema_id(@schema)" + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($sql, $sqlconn, $transaction) + $null = $sqlcmd.Parameters.AddWithValue("schema", $schema) + $null = $sqlcmd.Parameters.AddWithValue("table", $table) + + $sql2 = "SELECT COUNT(*) FROM sys.views WHERE name = @table AND schema_id=schema_id(@schema)" + $sqlcmd2 = New-Object Microsoft.Data.SqlClient.SqlCommand($sql2, $sqlconn, $transaction) + $null = $sqlcmd2.Parameters.AddWithValue("schema", $schema) + $null = $sqlcmd2.Parameters.AddWithValue("table", $table) + + # Track if we created a "fat" table (varchar(MAX) for all columns) that needs post-import optimization + $createdFatTable = $false + + # Open Parquet reader to get schema information + $parquetReader = $null + try { + $parquetReader = Get-ParquetReader -Path $file + $dataFields = Get-ParquetDataFields -Reader $parquetReader + } catch { + Stop-Function -Continue -Message "Failed to open Parquet file: $file" -ErrorRecord $_ + } + + # Create the table if required. Remember, this will occur within a transaction, so if the script fails, the + # new table will no longer exist. + if (($sqlcmd.ExecuteScalar()) -eq 0 -and ($sqlcmd2.ExecuteScalar()) -eq 0) { + if (-not $AutoCreateTable) { + Stop-Function -Continue -Message "Table or view $table does not exist and AutoCreateTable was not specified" + } + Write-Message -Level Verbose -Message "Table does not exist" + + if ($PSCmdlet.ShouldProcess($instance, "Creating table $table")) { + try { + $splatNewSqlTable = @{ + DataFields = $dataFields + SqlConn = $sqlconn + Transaction = $transaction + } + if ($PSBoundParameters.StaticColumns) { + $splatNewSqlTable.StaticColumns = $StaticColumns + } + New-SqlTable @splatNewSqlTable + $createdFatTable = $true + } catch { + Stop-Function -Continue -Message "Failure" -ErrorRecord $_ + } + } + } else { + Write-Message -Level Verbose -Message "Table exists" + } + + # Truncate if specified. Remember, this will occur within a transaction, so if the script fails, the + # truncate will not be committed. + if ($Truncate) { + $sql = "TRUNCATE TABLE [$schema].[$table]" + if ($PSCmdlet.ShouldProcess($instance, "Performing TRUNCATE TABLE [$schema].[$table] on $Database")) { + $sqlcmd = New-Object Microsoft.Data.SqlClient.SqlCommand($sql, $sqlconn, $transaction) + try { + $null = $sqlcmd.ExecuteNonQuery() + } catch { + Stop-Function -Continue -Message "Could not truncate $schema.$table" -ErrorRecord $_ + } + } + } + + # Setup bulk copy + Write-Message -Level Verbose -Message "Starting bulk copy for $(Split-Path $file -Leaf)" + + # Setup bulk copy options + [int]$bulkCopyOptions = ([Microsoft.Data.SqlClient.SqlBulkCopyOptions]::Default) + $options = "TableLock", "CheckConstraints", "FireTriggers", "KeepIdentity" + foreach ($option in $options) { + $optionValue = Get-Variable $option -ValueOnly -ErrorAction SilentlyContinue + if ($optionValue -eq $true) { + $bulkCopyOptions += $([Microsoft.Data.SqlClient.SqlBulkCopyOptions]::$option).value__ + } + } + + if ($PSCmdlet.ShouldProcess($instance, "Performing import from $file")) { + try { + # Create SqlBulkCopy using default options, or options specified in command line. + if ($bulkCopyOptions) { + $bulkcopy = New-Object Microsoft.Data.SqlClient.SqlBulkCopy($sqlconn, $bulkCopyOptions, $transaction) + } else { + $bulkcopy = New-Object Microsoft.Data.SqlClient.SqlBulkCopy($sqlconn, ([Microsoft.Data.SqlClient.SqlBulkCopyOptions]::Default), $transaction) + } + + $bulkcopy.DestinationTableName = "[$schema].[$table]" + $bulkcopy.BulkCopyTimeout = 0 + $bulkCopy.BatchSize = $BatchSize + $bulkCopy.NotifyAfter = $NotifyAfter + $bulkCopy.EnableStreaming = $true + + # Auto-create column mapping from Parquet schema for name-based matching + if (-not $KeepOrdinalOrder -and -not $Column) { + if ($ColumnMap) { + Write-Message -Level Verbose -Message "ColumnMap was supplied. Additional auto-mapping will not be attempted." + } else { + try { + $ColumnMap = @{ } + foreach ($df in $dataFields) { + Write-Message -Level Verbose -Message "Adding $($df.Name) to ColumnMap" + $ColumnMap.Add($df.Name, $df.Name) + } + } catch { + # oh well, we tried + Write-Message -Level Verbose -Message "Couldn't auto create ColumnMap from Parquet schema" + $ColumnMap = $null + } + } + } + + if ($ColumnMap) { + foreach ($columnname in $ColumnMap) { + foreach ($key in $columnname.Keys | Sort-Object) { + #sort added in case of column maps done by ordinal + $null = $bulkcopy.ColumnMappings.Add($key, $columnname[$key]) + } + } + } + + if ($Column) { + foreach ($columnname in $Column) { + $null = $bulkcopy.ColumnMappings.Add($columnname, $columnname) + } + } + + # Add static column mappings for metadata tagging + if ($PSBoundParameters.StaticColumns) { + foreach ($key in $StaticColumns.Keys) { + $null = $bulkcopy.ColumnMappings.Add($key, $key) + } + } + + } catch { + Stop-Function -Continue -Message "Failure" -ErrorRecord $_ + } + + # Write to server + try { + + # The legacy bulk copy library uses a 4 byte integer to track the RowsCopied, so the only option is to use + # integer wrap so that copy operations of row counts greater than [int32]::MaxValue will report accurate numbers. + # See https://github.com/dataplat/dbatools/issues/6927 for more details + $script:prevRowsCopied = [int64]0 + $script:totalRowsCopied = [int64]0 + + # Add rowcount output + $bulkCopy.Add_SqlRowsCopied( { + $script:totalRowsCopied += (Get-AdjustedTotalRowsCopied -ReportedRowsCopied $args[1].RowsCopied -PreviousRowsCopied $script:prevRowsCopied).NewRowCountAdded + + Write-Message -Level Verbose -FunctionName "Import-DbaParquet" -Message " Total rows copied = $($script:totalRowsCopied)" + # save the previous count of rows copied to be used on the next event notification + $script:prevRowsCopied = $args[1].RowsCopied + }) + + for ($rgIndex = 0; $rgIndex -lt $parquetReader.RowGroupCount; $rgIndex++) { + $dataTable = Get-ParquetDataTable -Reader $parquetReader -Column $Column -StaticColumns $StaticColumns -RowGroupIndex $rgIndex + + if (-not $NoProgress) { + $timetaken = [math]::Round($elapsed.Elapsed.TotalSeconds, 2) + $percent = [int]((($rgIndex + 1) / $parquetReader.RowGroupCount) * 100) + Write-ProgressHelper -StepNumber $percent -TotalSteps 100 -Activity "Importing from $file" -Message ([System.String]::Format("Progress: {0} rows {1}% in {2} seconds", $script:totalRowsCopied, $percent, $timetaken)) + } + + $dataReader = $dataTable.CreateDataReader() + $bulkCopy.WriteToServer($dataReader) + $dataReader.Dispose() + $dataTable.Dispose() + } + + $completed = $true + } catch { + $completed = $false + Stop-Function -Continue -Message "Failure" -ErrorRecord $_ + } finally { + try { + if ($parquetReader) { + $parquetReader.Dispose() + $parquetReader = $null + } + } catch { + } + + if (-not $NoTransaction) { + if ($completed) { + try { + $null = $transaction.Commit() + } catch { + } + + # Optimize column sizes after commit if we created a fat table + if ($createdFatTable -and -not $NoColumnOptimize) { + try { + Optimize-ColumnSize -SqlConn $sqlconn -Schema $schema -Table $table + } catch { + Write-Message -Level Warning -Message "Column size optimization failed: $($_.Exception.Message)" + } + } + } else { + try { + $null = $transaction.Rollback() + } catch { + } + } + } elseif ($completed -and $createdFatTable -and -not $NoColumnOptimize) { + # NoTransaction mode - still optimize if we created a fat table + try { + Optimize-ColumnSize -SqlConn $sqlconn -Schema $schema -Table $table + } catch { + Write-Message -Level Warning -Message "Column size optimization failed: $($_.Exception.Message)" + } + } + + # Only close connection if we created it (not user-provided) + if (-not $startedWithAnOpenConnection) { + try { + $sqlconn.Close() + $sqlconn.Dispose() + } catch { + } + } + + try { + $bulkCopy.Close() + $bulkcopy.Dispose() + } catch { + } + + $finalRowCountReported = Get-BulkRowsCopiedCount $bulkCopy + + $script:totalRowsCopied += (Get-AdjustedTotalRowsCopied -ReportedRowsCopied $finalRowCountReported -PreviousRowsCopied $script:prevRowsCopied).NewRowCountAdded + + if ($completed) { + Write-Progress -Id 1 -Activity "Inserting $($script:totalRowsCopied) rows" -Status "Complete" -Completed + } else { + Write-Progress -Id 1 -Activity "Inserting $($script:totalRowsCopied) rows" -Status "Failed" -Completed + } + } + } + # Clean up Parquet reader if ShouldProcess was skipped (WhatIf mode) + if ($parquetReader) { + try { $parquetReader.Dispose() } catch { } + $parquetReader = $null + } + if ($PSCmdlet.ShouldProcess($instance, "Finalizing import")) { + if ($completed) { + # "Note: This count does not take into consideration the number of rows actually inserted when Ignore Duplicates is set to ON." + $rowsPerSec = [math]::Round($script:totalRowsCopied / $elapsed.ElapsedMilliseconds * 1000.0, 1) + + Write-Message -Level Verbose -Message "$($script:totalRowsCopied) total rows copied" + + [PSCustomObject]@{ + ComputerName = $server.ComputerName + InstanceName = $server.ServiceName + SqlInstance = $server.DomainInstanceName + Database = $Database + Table = $table + Schema = $schema + RowsCopied = $script:totalRowsCopied + Elapsed = [prettytimespan]$elapsed.Elapsed + RowsPerSecond = $rowsPerSec + Path = $file + } + } else { + Stop-Function -Message "Transaction rolled back." + return + } + } + } + } + } + end { + $totaltime = [math]::Round($scriptelapsed.Elapsed.TotalSeconds, 2) + Write-Message -Level Verbose -Message "Total Elapsed Time for everything: $totaltime seconds" + } +} diff --git a/public/Install-DbaParquet.ps1 b/public/Install-DbaParquet.ps1 new file mode 100644 index 000000000000..e333a4909eea --- /dev/null +++ b/public/Install-DbaParquet.ps1 @@ -0,0 +1,496 @@ +function Install-DbaParquet { + <# + .SYNOPSIS + Installs Parquet.NET assemblies required by Import-DbaParquet. + + .DESCRIPTION + Downloads Parquet.NET from NuGet and installs the netstandard2.0 assemblies into the dbatools data directory. + The installer also downloads and extracts the managed dependency closure declared by the NuGet packages. + + Parquet.NET is a managed .NET library, so the installed assemblies work across Windows, Linux, and macOS as long + as the host PowerShell/.NET runtime can load netstandard2.0 assemblies. + + By default, assemblies are installed to the dbatools data directory for the current user. Use -Path for a custom + portable location, or -LocalFile to install from an already downloaded nupkg, zip, or folder that contains + Parquet.dll or Parquet.Net.dll and its dependencies. + + .PARAMETER Path + Specifies the directory where Parquet.NET assemblies will be installed. + If not specified, defaults to the Path.DbatoolsParquet configuration value. + + .PARAMETER Version + Specifies the Parquet.Net NuGet package version to install. Defaults to 5.5.0, the version used by Import-DbaParquet. + + .PARAMETER LocalFile + Specifies a local nupkg, zip file, or directory containing Parquet.dll or Parquet.Net.dll and its dependency DLLs. + Use this for offline or pre-approved package installs. Local nupkg files only contain Parquet.NET itself, so + dependency DLLs must also be present if installing without internet access. + + .PARAMETER Force + Forces re-download and reinstallation even if Parquet.NET already exists in the target location. + + .PARAMETER WhatIf + Shows what would happen if the command were to run. No actions are actually performed. + + .PARAMETER Confirm + Prompts you for confirmation before executing any operations that change state. + + .PARAMETER EnableException + By default, when something goes wrong we try to catch it, interpret it and give you a friendly warning message. + This avoids overwhelming you with "sea of red" exceptions, but is inconvenient because it basically disables advanced scripting. + Using this switch turns this "nice by default" feature off and enables you to catch exceptions with your own try/catch. + + .NOTES + Tags: Import, Parquet, Install + Author: dbatools team + + Website: https://dbatools.io + Copyright: (c) 2026 by dbatools, licensed under MIT + License: MIT https://opensource.org/licenses/MIT + + .LINK + https://dbatools.io/Install-DbaParquet + + .OUTPUTS + PSCustomObject + + Returns installation details when Parquet.NET is installed or already present. + + Properties: + - Name: The primary assembly name + - Path: The full file path to the Parquet.NET assembly + - Version: The installed Parquet.NET file version + - Installed: Boolean value of $true indicating successful installation + + .EXAMPLE + PS C:\> Install-DbaParquet + + Downloads Parquet.NET and dependencies from NuGet and installs them to the dbatools data directory. + + .EXAMPLE + PS C:\> Install-DbaParquet -Path C:\dbatools\parquet + + Installs Parquet.NET and dependencies to C:\dbatools\parquet. + + .EXAMPLE + PS C:\> Install-DbaParquet -LocalFile C:\temp\parquet-libs.zip + + Installs Parquet.NET from a local zip file containing Parquet.dll and its dependencies. + #> + [CmdletBinding(SupportsShouldProcess, ConfirmImpact = "Medium")] + param ( + [string]$Path, + [ValidateNotNullOrEmpty()] + [string]$Version = "5.5.0", + [string]$LocalFile, + [switch]$Force, + [switch]$EnableException + ) + + begin { + function Resolve-NuGetDependencyVersion { + param ( + [Parameter(Mandatory)] + [string]$VersionRange + ) + + $range = $VersionRange.Trim() + if ($range -match "^\[(?[^,\]]+)\]$") { + return $Matches.exact.Trim() + } + if ($range -match "^[\[\(]?(?[^,\]\)]+)") { + return $Matches.minimum.Trim() + } + return $range + } + + function Get-NuGetPackageDownloadUrl { + param ( + [Parameter(Mandatory)] + [string]$PackageId, + [Parameter(Mandatory)] + [string]$PackageVersion + ) + + $lowerPackageId = $PackageId.ToLowerInvariant() + $lowerVersion = $PackageVersion.ToLowerInvariant() + "https://api.nuget.org/v3-flatcontainer/$lowerPackageId/$lowerVersion/$lowerPackageId.$lowerVersion.nupkg" + } + + function Save-NuGetPackage { + param ( + [Parameter(Mandatory)] + [string]$PackageId, + [Parameter(Mandatory)] + [string]$PackageVersion, + [Parameter(Mandatory)] + [string]$PackageCache + ) + + if (-not (Test-Path -Path $PackageCache)) { + $null = New-Item -Path $PackageCache -ItemType Directory -Force + } + + $packageFileName = "$($PackageId.ToLowerInvariant()).$($PackageVersion.ToLowerInvariant()).nupkg" + $packageFile = Join-Path -Path $PackageCache -ChildPath $packageFileName + if ((Test-Path -Path $packageFile) -and -not $Force) { + Write-Message -Level Verbose -Message "Using cached NuGet package $packageFile" + return $packageFile + } + + $url = Get-NuGetPackageDownloadUrl -PackageId $PackageId -PackageVersion $PackageVersion + Write-Message -Level Verbose -Message "Downloading $PackageId $PackageVersion from $url" + Invoke-TlsWebRequest -Uri $url -OutFile $packageFile -UseBasicParsing -ErrorAction Stop + return $packageFile + } + + function Expand-NuGetPackage { + param ( + [Parameter(Mandatory)] + [string]$PackageFile, + [Parameter(Mandatory)] + [string]$DestinationPath + ) + + # Use System.IO.Compression rather than Expand-Archive because Expand-Archive is PowerShell v5+ + # and silently skips entries it cannot translate (some NuGet packages contain files Expand-Archive + # quietly drops, leading to "lib not found" failures downstream). dbatools must support PowerShell v3. + Add-Type -AssemblyName System.IO.Compression.FileSystem -ErrorAction SilentlyContinue + if (Test-Path -Path $DestinationPath) { + Remove-Item -LiteralPath $DestinationPath -Recurse -Force -ErrorAction SilentlyContinue + } + [System.IO.Compression.ZipFile]::ExtractToDirectory($PackageFile, $DestinationPath) + } + + function Get-NuGetLibPath { + param ( + [Parameter(Mandatory)] + [string]$ExtractPath + ) + + $libRoot = Join-Path -Path $ExtractPath -ChildPath "lib" + if (-not (Test-Path -Path $libRoot)) { + return $null + } + + # Pick the closest TFM to the host runtime. On .NET Core / .NET 5+ the netstandard2.0 builds + # of Parquet.NET hit Span/System.Memory ambiguity and throw NotImplementedException at + # runtime, so we walk net*.0 down from the running major version first. On Windows PowerShell + # we stay on .NET Framework targets. + $psEdition = $PSVersionTable.PSEdition + if (-not $psEdition) { $psEdition = "Desktop" } + + if ($psEdition -eq "Core") { + $preferredFrameworks = @() + $netMajor = [System.Environment]::Version.Major + if ($netMajor -lt 5) { $netMajor = 8 } + for ($i = $netMajor; $i -ge 5; $i--) { + $preferredFrameworks += "net$i.0" + } + $preferredFrameworks += @("netstandard2.1", "netcoreapp3.1", "netcoreapp3.0", "netstandard2.0") + } else { + $preferredFrameworks = @( + "net48", + "net472", + "net471", + "netstandard2.0", + "net462", + "net461", + "net46" + ) + } + + foreach ($framework in $preferredFrameworks) { + $candidate = Join-Path -Path $libRoot -ChildPath $framework + if (Test-Path -Path $candidate) { + return $candidate + } + } + + $fallback = Get-ChildItem -Path $libRoot -Directory | Sort-Object Name | Select-Object -First 1 + if ($fallback) { + return $fallback.FullName + } + return $null + } + + function Get-NuGetDependencies { + param ( + [Parameter(Mandatory)] + [string]$ExtractPath + ) + + $nuspecFile = Get-ChildItem -Path $ExtractPath -Filter "*.nuspec" | Select-Object -First 1 + if (-not $nuspecFile) { + return @() + } + + [xml]$nuspec = Get-Content -Path $nuspecFile.FullName -Raw + $dependencyGroups = @($nuspec.SelectNodes("//*[local-name()='dependencies']/*[local-name()='group']")) + $dependencies = @() + if ($dependencyGroups.Count -gt 0) { + $selectedGroup = $dependencyGroups | Where-Object { $_.targetFramework -in ".NETStandard2.0", "netstandard2.0", "NETStandard2.0" } | Select-Object -First 1 + if (-not $selectedGroup) { + $selectedGroup = $dependencyGroups | Where-Object { -not $_.targetFramework } | Select-Object -First 1 + } + if (-not $selectedGroup) { + $selectedGroup = $dependencyGroups | Where-Object { $_.targetFramework -match "netstandard" } | Sort-Object targetFramework | Select-Object -First 1 + } + if ($selectedGroup) { + $dependencies = @($selectedGroup.SelectNodes("*[local-name()='dependency']")) + } + } else { + $dependencies = @($nuspec.SelectNodes("//*[local-name()='dependencies']/*[local-name()='dependency']")) + } + + foreach ($dependency in $dependencies) { + if ($dependency.id -and $dependency.version) { + [PSCustomObject]@{ + Id = [string]$dependency.id + Version = Resolve-NuGetDependencyVersion -VersionRange ([string]$dependency.version) + } + } + } + } + + function Compare-NuGetVersion { + param ( + [Parameter(Mandatory)] + [string]$Left, + [Parameter(Mandatory)] + [string]$Right + ) + + $leftClean = ($Left -split "-", 2)[0].Trim() + $rightClean = ($Right -split "-", 2)[0].Trim() + + try { + $leftVer = [System.Version]$leftClean + $rightVer = [System.Version]$rightClean + return $leftVer.CompareTo($rightVer) + } catch { + return [string]::Compare($leftClean, $rightClean, [System.StringComparison]::OrdinalIgnoreCase) + } + } + + function Resolve-NuGetPackageGraph { + param ( + [Parameter(Mandatory)] + [string]$PackageId, + [Parameter(Mandatory)] + [string]$PackageVersion, + [Parameter(Mandatory)] + [string]$PackageCache, + [Parameter(Mandatory)] + [string]$ExtractRoot, + [Parameter(Mandatory)] + [hashtable]$ResolvedPackages + ) + + # NuGet "highest minimum" resolution: when multiple deps require the same package id, + # keep the highest minimum version instead of letting DFS order decide. + $lowerId = $PackageId.ToLowerInvariant() + if ($ResolvedPackages.ContainsKey($lowerId)) { + $comparison = Compare-NuGetVersion -Left $PackageVersion -Right $ResolvedPackages[$lowerId].Version + if ($comparison -le 0) { + return + } + } + + $packageFile = Save-NuGetPackage -PackageId $PackageId -PackageVersion $PackageVersion -PackageCache $PackageCache + $extractPath = Join-Path -Path $ExtractRoot -ChildPath "$lowerId.$($PackageVersion.ToLowerInvariant())" + Expand-NuGetPackage -PackageFile $packageFile -DestinationPath $extractPath + + $ResolvedPackages[$lowerId] = [PSCustomObject]@{ + Id = $PackageId + Version = $PackageVersion + ExtractPath = $extractPath + } + + foreach ($dependency in Get-NuGetDependencies -ExtractPath $extractPath) { + Resolve-NuGetPackageGraph -PackageId $dependency.Id -PackageVersion $dependency.Version -PackageCache $PackageCache -ExtractRoot $ExtractRoot -ResolvedPackages $ResolvedPackages + } + } + + function Copy-ResolvedAssemblies { + param ( + [Parameter(Mandatory)] + [hashtable]$ResolvedPackages, + [Parameter(Mandatory)] + [string]$DestinationPath + ) + + # Collect every candidate DLL from the resolved package set, keyed by filename. + # When the same DLL ships in multiple packages, keep the highest file version so + # transitive dependencies cannot regress newer assemblies pulled in by leaf packages. + $dllsByName = @{ } + foreach ($package in $ResolvedPackages.Values) { + $libPath = Get-NuGetLibPath -ExtractPath $package.ExtractPath + if (-not $libPath) { + Write-Message -Level Verbose -Message "No lib assets found for $($package.Id) $($package.Version)" + continue + } + + Get-ChildItem -Path $libPath -Filter "*.dll" | ForEach-Object { + $key = $PSItem.Name.ToLowerInvariant() + if (-not $dllsByName.ContainsKey($key)) { + $dllsByName[$key] = $PSItem + return + } + + $existingVersion = [System.Version]"0.0.0.0" + $candidateVersion = [System.Version]"0.0.0.0" + try { $existingVersion = [System.Version]$dllsByName[$key].VersionInfo.FileVersion } catch { } + try { $candidateVersion = [System.Version]$PSItem.VersionInfo.FileVersion } catch { } + if ($candidateVersion -gt $existingVersion) { + $dllsByName[$key] = $PSItem + } + } + } + + foreach ($file in $dllsByName.Values) { + Copy-Item -Path $file.FullName -Destination $DestinationPath -Force + Write-Message -Level Verbose -Message "Installed $($file.Name) ($($file.VersionInfo.FileVersion))" + } + } + + function Install-LocalParquetAssemblies { + param ( + [Parameter(Mandatory)] + [string]$SourcePath, + [Parameter(Mandatory)] + [string]$DestinationPath, + [Parameter(Mandatory)] + [string]$ExtractRoot + ) + + if (Test-Path -Path $SourcePath -PathType Container) { + $sourceRoot = $SourcePath + } else { + $sourceRoot = Join-Path -Path $ExtractRoot -ChildPath "local" + Expand-NuGetPackage -PackageFile $SourcePath -DestinationPath $sourceRoot + } + + $libPath = Get-NuGetLibPath -ExtractPath $sourceRoot + if ($libPath) { + Get-ChildItem -Path $libPath -Filter "*.dll" | Copy-Item -Destination $DestinationPath -Force + } else { + Get-ChildItem -Path $sourceRoot -Filter "*.dll" -Recurse | Copy-Item -Destination $DestinationPath -Force + } + } + + function Remove-DbaParquetTempDirectory { + param ( + [string]$TempPath + ) + + if (-not $TempPath) { + return + } + + try { + $resolvedTempPath = [System.IO.Path]::GetFullPath($TempPath) + $systemTempPath = [System.IO.Path]::GetFullPath([System.IO.Path]::GetTempPath()) + if ($resolvedTempPath.StartsWith($systemTempPath, [System.StringComparison]::OrdinalIgnoreCase)) { + Remove-Item -LiteralPath $resolvedTempPath -Recurse -Force -ErrorAction SilentlyContinue + } + } catch { + } + } + } + + process { + if ($Force) { $ConfirmPreference = "none" } + + if ($LocalFile -and $LocalFile.StartsWith("http")) { + Stop-Function -Message "LocalFile cannot be a URL. It must be a local file path." + return + } + + if ($LocalFile -and -not (Test-Path -Path $LocalFile)) { + Stop-Function -Message "LocalFile $LocalFile does not exist." + return + } + + Write-Progress -Activity "Installing Parquet.NET" -Status "Checking for existing installation..." -PercentComplete 0 + + $installedPath = Get-DbaParquetPath -Silent + if ($installedPath -and -not $Force) { + Write-Progress -Activity "Installing Parquet.NET" -Completed + $notes = "Parquet.NET already exists at $installedPath. Skipped installation. Use -Force to overwrite." + Write-Message -Level Verbose -Message $notes + [PSCustomObject]@{ + Name = Split-Path -Path $installedPath -Leaf + Path = $installedPath + Version = (Get-Item -Path $installedPath).VersionInfo.FileVersion + Installed = $true + Notes = $notes + } + return + } + + if (-not $Path) { + $Path = Get-DbatoolsConfigValue -FullName "Path.DbatoolsParquet" + if (-not $Path) { + $dbatoolsData = Get-DbatoolsConfigValue -FullName "Path.DbatoolsData" + $dbatoolsData = $dbatoolsData.TrimEnd("/", "\") + $Path = Join-Path -Path $dbatoolsData -ChildPath "parquet" + } + } else { + Set-DbatoolsConfig -FullName "Path.DbatoolsParquet" -Value $Path + } + + $tempRoot = Join-Path -Path ([System.IO.Path]::GetTempPath()) -ChildPath "dbatools-parquet-$([System.Guid]::NewGuid().ToString())" + try { + if (-not $PSCmdlet.ShouldProcess($Path, "Install Parquet.NET assemblies")) { + Write-Progress -Activity "Installing Parquet.NET" -Completed + return + } + + Write-Progress -Activity "Installing Parquet.NET" -Status "Preparing installation directory..." -PercentComplete 10 + if (-not (Test-Path -Path $Path)) { + $null = New-Item -Path $Path -ItemType Directory -Force + } + + if (-not (Test-Path -Path $tempRoot)) { + $null = New-Item -Path $tempRoot -ItemType Directory -Force + } + + if ($LocalFile) { + Write-Progress -Activity "Installing Parquet.NET" -Status "Installing local assemblies..." -PercentComplete 40 + Install-LocalParquetAssemblies -SourcePath $LocalFile -DestinationPath $Path -ExtractRoot $tempRoot + } else { + Write-Progress -Activity "Installing Parquet.NET" -Status "Resolving NuGet dependencies..." -PercentComplete 35 + $packageCache = Join-Path -Path $Path -ChildPath "packages" + $extractRoot = Join-Path -Path $tempRoot -ChildPath "packages" + $resolvedPackages = @{ } + Resolve-NuGetPackageGraph -PackageId "Parquet.Net" -PackageVersion $Version -PackageCache $packageCache -ExtractRoot $extractRoot -ResolvedPackages $resolvedPackages + Write-Progress -Activity "Installing Parquet.NET" -Status "Installing assemblies..." -PercentComplete 70 + Copy-ResolvedAssemblies -ResolvedPackages $resolvedPackages -DestinationPath $Path + } + + Write-Progress -Activity "Installing Parquet.NET" -Status "Verifying installation..." -PercentComplete 90 + $parquetDllPath = Join-Path -Path $Path -ChildPath "Parquet.dll" + if (-not (Test-Path -Path $parquetDllPath)) { + $parquetDllPath = Join-Path -Path $Path -ChildPath "Parquet.Net.dll" + } + if (Test-Path -Path $parquetDllPath) { + Write-Progress -Activity "Installing Parquet.NET" -Completed + [PSCustomObject]@{ + Name = Split-Path -Path $parquetDllPath -Leaf + Path = $parquetDllPath + Version = (Get-Item -Path $parquetDllPath).VersionInfo.FileVersion + Installed = $true + } + } else { + Write-Progress -Activity "Installing Parquet.NET" -Completed + Stop-Function -Message "Parquet.NET installation failed. Parquet.dll was not found in $Path." + } + } catch { + Write-Progress -Activity "Installing Parquet.NET" -Completed + Stop-Function -Message "Failed to install Parquet.NET. $_" -ErrorRecord $_ + } finally { + Remove-DbaParquetTempDirectory -TempPath $tempRoot + } + } +} diff --git a/tests/Import-DbaParquet.Tests.ps1 b/tests/Import-DbaParquet.Tests.ps1 new file mode 100644 index 000000000000..29f8013c28ee --- /dev/null +++ b/tests/Import-DbaParquet.Tests.ps1 @@ -0,0 +1,427 @@ +#Requires -Module @{ ModuleName="Pester"; ModuleVersion="5.0" } +param( + $ModuleName = "dbatools", + $CommandName = "Import-DbaParquet", + $PSDefaultParameterValues = $TestConfig.Defaults +) + +if ($null -eq $PSDefaultParameterValues) { + $PSDefaultParameterValues = @{ } +} + +$hasIntegrationConfig = $false +if ($TestConfig -and $TestConfig.appveyorlabrepo -and $TestConfig.InstanceMulti1) { + $parquetFixturePath = Join-Path -Path $TestConfig.appveyorlabrepo -ChildPath "parquet" + $pathEcdc = Join-Path -Path $parquetFixturePath -ChildPath "ecdc_cases.parquet" + $pathBoundaries = Join-Path -Path $parquetFixturePath -ChildPath "world-administrative-boundaries.parquet" + $pathMixedTypes = Join-Path -Path $parquetFixturePath -ChildPath "mixed_types.parquet" + $hasIntegrationConfig = (Test-Path $pathEcdc) -and (Test-Path $pathBoundaries) -and (Test-Path $pathMixedTypes) +} + +Describe $CommandName -Tag UnitTests { + Context "Parameter validation" { + It "Should have the expected parameters" { + $hasParameters = (Get-Command $CommandName).Parameters.Values.Name | Where-Object { $PSItem -notin ("WhatIf", "Confirm") } + $expectedParameters = @( + "Path", + "SqlInstance", + "SqlCredential", + "Database", + "Table", + "Schema", + "Truncate", + "BatchSize", + "NotifyAfter", + "TableLock", + "CheckConstraints", + "FireTriggers", + "KeepIdentity", + "Column", + "ColumnMap", + "KeepOrdinalOrder", + "AutoCreateTable", + "NoUtf8", + "NoColumnOptimize", + "NoProgress", + "UseFileNameForSchema", + "NoTransaction", + "StaticColumns", + "EnableException" + ) + ($expectedParameters | Where-Object { $PSItem -notin $hasParameters }) | Should -BeNullOrEmpty + } + + It "Should not have any CSV-only parameters" { + $csvOnlyParams = @( + "NoHeaderRow", + "Delimiter", + "SingleColumn", + "KeepNulls", + "Quote", + "Escape", + "Comment", + "TrimmingOption", + "BufferSize", + "ParseErrorAction", + "Encoding", + "NullValue", + "MaxQuotedFieldLength", + "SkipEmptyLine", + "SupportsMultiline", + "UseColumnDefault", + "MaxDecompressedSize", + "SkipRows", + "QuoteMode", + "DuplicateHeaderBehavior", + "MismatchedFieldAction", + "DistinguishEmptyFromNull", + "NormalizeQuotes", + "CollectParseErrors", + "MaxParseErrors", + "DateTimeFormats", + "Culture", + "SampleRows", + "DetectColumnTypes" + ) + $commandParams = (Get-Command $CommandName).Parameters.Keys + foreach ($csvParam in $csvOnlyParams) { + $commandParams | Should -Not -Contain $csvParam + } + } + } +} + +Describe $CommandName -Tag IntegrationTests -Skip:(-not $hasIntegrationConfig) { + BeforeAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + + $null = Install-DbaParquet + + # Set up Parquet file paths for testing + $parquetFixturePath = Join-Path -Path $TestConfig.appveyorlabrepo -ChildPath "parquet" + $pathEcdc = Join-Path -Path $parquetFixturePath -ChildPath "ecdc_cases.parquet" + $pathBoundaries = Join-Path -Path $parquetFixturePath -ChildPath "world-administrative-boundaries.parquet" + $pathMixedTypes = Join-Path -Path $parquetFixturePath -ChildPath "mixed_types.parquet" + + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + + # Cleanup test tables + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases, "world-administrative-boundaries", ecdc_cases_static, ecdc_cases_ordinal, ecdc_cases_notxn, ecdc_cases_utf8, ecdc_cases_utf16, mixed_types, mixed_types_columns, mixed_types_column_map, world_boundaries_exact -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + + Context "Auto-create table path" { + It "imports a Parquet file with AutoCreateTable" { + $result = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + $result.Database | Should -Be "tempdb" + $result.Table | Should -Be "ecdc_cases" + } + + It "imports binary parquet columns with AutoCreateTable" { + $result = Import-DbaParquet -Path $pathBoundaries -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + $result.Database | Should -Be "tempdb" + $result.Table | Should -Be "world-administrative-boundaries" + } + + It "creates SQL column types from Parquet schema in AutoCreateTable mode" { + $null = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -NoColumnOptimize -Truncate + + $sql = @" +SELECT + c.name AS ColumnName, + t.name AS TypeName +FROM sys.columns c +INNER JOIN sys.types t + ON c.user_type_id = t.user_type_id +WHERE c.object_id = OBJECT_ID('dbo.ecdc_cases') + AND c.name IN ('date_rep', 'day', 'pop_data_2018') +"@ + $types = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query $sql + ($types | Where-Object ColumnName -eq "date_rep").TypeName | Should -Be "datetime2" + ($types | Where-Object ColumnName -eq "day").TypeName | Should -Be "smallint" + ($types | Where-Object ColumnName -eq "pop_data_2018").TypeName | Should -Be "int" + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "Deterministic lab fixtures" { + It "imports mixed-type fixture rows and preserves exact values" { + $result = Import-DbaParquet -Path $pathMixedTypes -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -Table mixed_types -NoColumnOptimize -NoUtf8 + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -Be 3 + + $data = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT [key], label, CAST(amount AS decimal(10,2)) AS amount, CONVERT(date, imported_on) AS imported_on FROM dbo.mixed_types ORDER BY [key]" + $data.Count | Should -Be 3 + $data[0]."key" | Should -Be 10 + $data[0].label | Should -Be "alpha" + $data[0].amount | Should -Be ([decimal]"12.34") + $data[0].imported_on | Should -Be ([datetime]"2024-05-01") + $data[2]."key" | Should -Be 30 + $data[2].label | Should -Be "gamma" + $data[2].amount | Should -Be ([decimal]"90.12") + $data[2].imported_on | Should -Be ([datetime]"2024-05-03") + } + + It "imports a selected-column projection into a pre-created table" { + Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "IF OBJECT_ID('dbo.mixed_types_columns') IS NOT NULL DROP TABLE dbo.mixed_types_columns; CREATE TABLE dbo.mixed_types_columns ([key] int NULL, [label] nvarchar(20) NULL);" + + $result = Import-DbaParquet -Path $pathMixedTypes -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table mixed_types_columns -Column key, label + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -Be 3 + + $metadata = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT COUNT(*) AS cnt FROM sys.columns WHERE object_id = OBJECT_ID('dbo.mixed_types_columns') AND name IN ('amount', 'imported_on')" + $metadata.cnt | Should -Be 0 + + $summary = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT COUNT(*) AS cnt, MIN([key]) AS min_key, MAX([key]) AS max_key, MAX(label) AS max_label FROM dbo.mixed_types_columns" + $summary.cnt | Should -Be 3 + $summary.min_key | Should -Be 10 + $summary.max_key | Should -Be 30 + $summary.max_label | Should -Be "gamma" + } + + It "maps parquet columns into differently named SQL columns" { + Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "IF OBJECT_ID('dbo.mixed_types_column_map') IS NOT NULL DROP TABLE dbo.mixed_types_column_map; CREATE TABLE dbo.mixed_types_column_map ([identifier] int NULL, [display_name] nvarchar(20) NULL);" + + $columnMap = @{ + key = "identifier" + label = "display_name" + } + $result = Import-DbaParquet -Path $pathMixedTypes -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table mixed_types_column_map -ColumnMap $columnMap + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -Be 3 + + $row = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT identifier, display_name FROM dbo.mixed_types_column_map WHERE identifier = 20" + $row.display_name | Should -Be "beta" + } + + It "imports binary fixture bytes and preserves their lengths" { + $result = Import-DbaParquet -Path $pathBoundaries -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -Table world_boundaries_exact -NoColumnOptimize -NoUtf8 + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -Be 2 + + $lengths = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT objectid, DATALENGTH(shape) AS shape_length FROM dbo.world_boundaries_exact ORDER BY objectid" + $lengths[0].shape_length | Should -Be 3 + $lengths[1].shape_length | Should -Be 4 + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table mixed_types, mixed_types_columns, mixed_types_column_map, world_boundaries_exact -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "Import into existing table" { + BeforeAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + # First create table via auto-create, then truncate + $null = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable + $null = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "TRUNCATE TABLE ecdc_cases" + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + + It "imports into a pre-existing table" { + $result = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + $result.Database | Should -Be "tempdb" + $result.Table | Should -Be "ecdc_cases" + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "Truncate path" { + BeforeAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + $null = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + + It "truncates and re-imports correctly" { + $result = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases -Truncate + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + + # Verify row count equals single import, not doubled + $count = (Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT COUNT(*) AS cnt FROM ecdc_cases").cnt + $count | Should -Be $result.RowsCopied + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "Static columns" { + It "adds static columns to imported data" { + $result = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -Table ecdc_cases_static -StaticColumns @{ ImportSource = "test" } + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + + $data = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "SELECT TOP 1 ImportSource FROM ecdc_cases_static" + $data.ImportSource | Should -Be "test" + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases_static -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "Ordinal mapping" { + It "imports with KeepOrdinalOrder" { + $result = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -Table ecdc_cases_ordinal -KeepOrdinalOrder + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases_ordinal -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "Non-transaction path" { + It "imports with NoTransaction" { + $result = Import-DbaParquet -Path $pathEcdc -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -Table ecdc_cases_notxn -NoTransaction + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases_notxn -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "UseFileNameForSchema" { + BeforeAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + $schemaTestFile = Join-Path $TestDrive "staging.ecdc_parquet_test.parquet" + Copy-Item $pathEcdc $schemaTestFile + # Create the staging schema if it doesn't exist + Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "IF NOT EXISTS (SELECT 1 FROM sys.schemas WHERE name = 'staging') EXEC('CREATE SCHEMA staging')" + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + + It "derives schema from filename" { + $result = Import-DbaParquet -Path $schemaTestFile -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -AutoCreateTable -UseFileNameForSchema + + $result | Should -Not -BeNullOrEmpty + $result.Schema | Should -Be "staging" + $result.Table | Should -Be "ecdc_parquet_test" + $result.RowsCopied | Should -BeGreaterThan 0 + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query "IF OBJECT_ID('staging.ecdc_parquet_test') IS NOT NULL DROP TABLE staging.ecdc_parquet_test" -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + + Context "NoUtf8 behavior" { + It "creates UTF-8 varchar columns by default" { + $splatImport = @{ + Path = $pathEcdc + SqlInstance = $TestConfig.InstanceMulti1 + Database = "tempdb" + AutoCreateTable = $true + Table = "ecdc_cases_utf8" + NoColumnOptimize = $true + } + $result = Import-DbaParquet @splatImport + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + + $utf8ColumnSql = @" +SELECT TOP 1 + t.name AS TypeName, + c.collation_name AS CollationName +FROM sys.columns c +INNER JOIN sys.types t + ON c.user_type_id = t.user_type_id +WHERE c.object_id = OBJECT_ID('dbo.ecdc_cases_utf8') + AND t.name IN ('varchar', 'nvarchar') +ORDER BY c.column_id +"@ + $utf8Column = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query $utf8ColumnSql + $utf8Column.TypeName | Should -Be "varchar" + $utf8Column.CollationName | Should -Match "UTF8" + } + + It "creates nvarchar columns when NoUtf8 is specified" { + $splatImport = @{ + Path = $pathEcdc + SqlInstance = $TestConfig.InstanceMulti1 + Database = "tempdb" + AutoCreateTable = $true + Table = "ecdc_cases_utf16" + NoColumnOptimize = $true + NoUtf8 = $true + } + $result = Import-DbaParquet @splatImport + + $result | Should -Not -BeNullOrEmpty + $result.RowsCopied | Should -BeGreaterThan 0 + + $utf16ColumnSql = @" +SELECT TOP 1 + t.name AS TypeName, + c.collation_name AS CollationName +FROM sys.columns c +INNER JOIN sys.types t + ON c.user_type_id = t.user_type_id +WHERE c.object_id = OBJECT_ID('dbo.ecdc_cases_utf16') + AND t.name IN ('varchar', 'nvarchar') +ORDER BY c.column_id +"@ + $utf16Column = Invoke-DbaQuery -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Query $utf16ColumnSql + $utf16Column.TypeName | Should -Be "nvarchar" + $utf16Column.CollationName | Should -Not -Match "UTF8" + } + + AfterAll { + $PSDefaultParameterValues["*-Dba*:EnableException"] = $true + Get-DbaDbTable -SqlInstance $TestConfig.InstanceMulti1 -Database tempdb -Table ecdc_cases_utf8, ecdc_cases_utf16 -ErrorAction SilentlyContinue | Remove-DbaDbTable -ErrorAction SilentlyContinue + $PSDefaultParameterValues.Remove("*-Dba*:EnableException") + } + } + +} diff --git a/tests/Install-DbaParquet.Tests.ps1 b/tests/Install-DbaParquet.Tests.ps1 new file mode 100644 index 000000000000..2178a2c986bd --- /dev/null +++ b/tests/Install-DbaParquet.Tests.ps1 @@ -0,0 +1,49 @@ +#Requires -Module @{ ModuleName="Pester"; ModuleVersion="5.0" } +param( + $ModuleName = "dbatools", + $CommandName = "Install-DbaParquet", + $PSDefaultParameterValues = $TestConfig.Defaults +) + +Describe $CommandName -Tag UnitTests { + Context "Parameter validation" { + It "Should have the expected parameters" { + $hasParameters = (Get-Command $CommandName).Parameters.Values.Name | Where-Object { $PSItem -notin ("WhatIf", "Confirm") } + $expectedParameters = @( + "Path", + "Version", + "LocalFile", + "Force", + "EnableException" + ) + ($expectedParameters | Where-Object { $PSItem -notin $hasParameters }) | Should -BeNullOrEmpty + } + } +} + +Describe $CommandName -Tag IntegrationTests { + BeforeAll { + $script:originalParquetPath = Get-DbatoolsConfigValue -FullName "Path.DbatoolsParquet" + } + + AfterAll { + Set-DbatoolsConfig -FullName "Path.DbatoolsParquet" -Value $script:originalParquetPath + } + + Context "NuGet installation" { + It "installs Parquet.NET and managed dependencies to a custom path" { + $installPath = Join-Path $TestDrive "parquet" + + $result = Install-DbaParquet -Path $installPath -Force -EnableException + + $result | Should -Not -BeNullOrEmpty + $result.Installed | Should -BeTrue + @("Parquet.dll", "Parquet.Net.dll") | Should -Contain $result.Name + Test-Path -Path $result.Path | Should -BeTrue + + foreach ($assemblyName in "CommunityToolkit.HighPerformance.dll", "K4os.Compression.LZ4.dll", "Snappier.dll", "ZstdSharp.dll") { + Test-Path -Path (Join-Path $installPath $assemblyName) | Should -BeTrue + } + } + } +} diff --git a/tests/pester.groups.ps1 b/tests/pester.groups.ps1 index 3127fcaf5d31..6855bc4f9ea4 100644 --- a/tests/pester.groups.ps1 +++ b/tests/pester.groups.ps1 @@ -206,6 +206,7 @@ $TestsRunGroups = @{ 'Get-DbaWaitStatistic', 'Get-DbaWindowsLog', 'Import-DbaCsv', + 'Import-DbaParquet', 'Import-DbaPfDataCollectorSetTemplate', 'Import-DbaRegServer', 'Install-DbaDarlingData',