@@ -264,7 +264,7 @@ module Expand =
264264 if length = 0 then
265265 segmentPointers.Free processor
266266
267- None
267+ length , None
268268 else
269269 // expand
270270 let leftMatrixValues , rightMatrixValues , columns , rows =
@@ -281,26 +281,28 @@ module Expand =
281281 columns.Free processor
282282 rows.Free processor
283283
284- mulResult
285- |> Option.bind
286- ( fun ( resultValues , resultColumns , resultRows ) ->
287- // sort
288- let sortedValues , sortedColumns , sortedRows =
289- sort processor resultValues resultColumns resultRows
284+ let result =
285+ mulResult
286+ |> Option.bind
287+ ( fun ( resultValues , resultColumns , resultRows ) ->
288+ // sort
289+ let sortedValues , sortedColumns , sortedRows =
290+ sort processor resultValues resultColumns resultRows
290291
291- resultValues.Free processor
292- resultColumns.Free processor
293- resultRows.Free processor
292+ resultValues.Free processor
293+ resultColumns.Free processor
294+ resultRows.Free processor
294295
295- // addition
296- let reduceResult =
297- reduce processor allocationMode sortedValues sortedColumns sortedRows
296+ // addition
297+ let reduceResult =
298+ reduce processor allocationMode sortedValues sortedColumns sortedRows
298299
299- sortedValues.Free processor
300- sortedColumns.Free processor
301- sortedRows.Free processor
300+ sortedValues.Free processor
301+ sortedColumns.Free processor
302+ sortedRows.Free processor
302303
303- reduceResult)
304+ reduceResult)
305+ length, result
304306
305307 let runOneStep opAdd opMul ( clContext : ClContext ) workGroupSize =
306308
@@ -323,7 +325,7 @@ module Expand =
323325 Columns = leftMatrix.Columns
324326 Values = leftMatrix.Values }
325327
326- let result =
328+ let _ , result =
327329 runCOO processor allocationMode rightMatrixRowsNNZ rightMatrix leftMatrixCOO
328330
329331 rows.Free processor
@@ -343,7 +345,7 @@ module Expand =
343345 let gather = Gather.run clContext workGroupSize
344346
345347 let upperBound =
346- ClArray.upperBoundAndValue clContext workGroupSize
348+ ClArray.upperBound clContext workGroupSize
347349
348350 let set = ClArray.set clContext workGroupSize
349351
@@ -378,9 +380,13 @@ module Expand =
378380 clContext.CreateClCell( workOffset + maxAllocSize: int)
379381
380382 // find largest row that fit into maxAllocSize
381- let endRow , value =
383+ let upperBound =
382384 ( upperBound currentBound) .ToHostAndFree processor
383385
386+ let endRow = upperBound - 2
387+
388+ currentBound.Free processor
389+
384390 // TODO(handle largest rows)
385391 // (we can split row, multiply and merge them but merge path needed)
386392 if endRow = beginRow then
@@ -389,12 +395,15 @@ module Expand =
389395 // extract matrix TODO(Transfer overhead)
390396 let subMatrix =
391397 subMatrix beginRow ( endRow - beginRow) leftMatrix
398+
392399 // compute sub result
393- let result = runCOO subMatrix
400+ let length , result = runCOO subMatrix
401+ // increase workOffset according to previous expand
402+ let workOffset = workOffset + length
394403
395404 match result with
396- | Some result -> helper endRow value <| result :: previousResult
397- | None -> helper endRow value previousResult
405+ | Some result -> helper endRow workOffset <| result :: previousResult
406+ | None -> helper endRow workOffset previousResult
398407 else
399408 previousResult
400409
0 commit comments