Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions .vscode/launch.json
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that these changes were made to adjust your local work env, and should not be pushed to main. Please revert them.

Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,18 @@
"program": "generateNetwork.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/DeeployTest",
"env": {
"PYTHONPATH": "${workspaceFolder}"
},
"subProcess": true,
"justMyCode": false,
"args":
"-p${input:platformUntiled} -t${input:model} ${input:additionalArgsUntiled}"
"args": [
"-p",
"${input:platformUntiled}",
"-t",
"${input:model}",
"${input:additionalArgsUntiled}"
]
},
{
"name": "Deeploy Generate Tiled",
Expand All @@ -22,9 +31,18 @@
"program": "testMVP.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/DeeployTest",
"env": {
"PYTHONPATH": "${workspaceFolder}"
},
"subProcess": true,
"justMyCode": false,
"args":
"-p${input:platformTiled} -t${input:model} ${input:additionalArgsTiled}"
"args": [
"-p",
"${input:platformTiled}",
"-t",
"${input:model}",
"${input:additionalArgsTiled}"
]
}
],
"inputs": [
Expand Down Expand Up @@ -85,7 +103,7 @@
"id": "additionalArgsTiled",
"type": "promptString",
"description": "Additional Arguments",
"default": "-v --doublebuffer"
"default": "--doublebuffer"
}
]
}
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only changes to this file are blank lines. Please revert them completely.

Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ def apply(self,

for buffer in inputs + transients:
assert buffer._live == True, f"Tried to deallocate already dead buffer {buffer.name}"

buffer._live = False
# Don't deallocate if it's an alias of a live buffer
if not buffer.has_live_aliases(ctxt):
Expand Down Expand Up @@ -363,7 +364,6 @@ def apply(self,

for buffer in inputs + transients:
assert buffer._live == True, f"Tried to deallocate already dead buffer {buffer.name}"

memoryLevel = "None" if not hasattr(buffer, "_memoryLevel") else buffer._memoryLevel
if memoryLevel not in ctxt._dynamicSize:
ctxt._dynamicSize[memoryLevel] = 0
Expand Down
84 changes: 60 additions & 24 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,37 @@ class CodeGenVerbosity:
_backendPostParsingFilename = 'backend_post_parsing'
_backendPostBindingFilename = 'backend_post_binding'


def _deeployTypeToNpType(ty: Type[BaseType]):

def _broadcastInteger(ty: Type[IntegerImmediate]):
if ty.signed:
return np.dtype(getattr(np, "int" + str(ty.typeWidth)))
else:
return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))

def _broadcastFloat(ty: Type[FloatImmediate]):
if ty.typeWidth == 16:
return np.dtype(np.float16)
if ty.typeWidth == 32:
return np.dtype(np.float32)
if ty.typeWidth == 64:
return np.dtype(np.float64)
return np.dtype(np.float32)

if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
if issubclass(ty.referencedType, IntegerImmediate):
return _broadcastInteger(ty.referencedType)
if issubclass(ty.referencedType, FloatImmediate):
return _broadcastFloat(ty.referencedType)
elif issubclass(ty, IntegerImmediate):
return _broadcastInteger(ty)
elif issubclass(ty, FloatImmediate):
return _broadcastFloat(ty)

return None


_ctxtExtension = '.pkl'
_graphExtension = '.onnx'
_dataExtension = '.data'
Expand Down Expand Up @@ -415,7 +446,12 @@ def __eq__(self, other):
def _valueString(self) -> str:
values = list(self.values.reshape(-1))
if self._type.typeName == 'float32_t*':
strValues = [f'{value}f' for value in values]
strValues = []
for value in values:
literal = f"{float(value):.9g}"
if "e" not in literal and "." not in literal:
literal += ".0"
strValues.append(literal + "f")
elif self._type.typeName == 'int8_t*':
strValues = [f'{int(value)}' for value in values]
else:
Expand Down Expand Up @@ -977,8 +1013,6 @@ def hoistConstant(self,
Returns the name of the newly registed ConstantBuffer

"""
assert len(constant.outputs) <= 1, f"Constant {constant.name} has more than one output"

name = name if name is not None else constant.name

# LMACAN: The shape needs to be copied into a tuple for pickling to work. Don't ask me why..
Expand Down Expand Up @@ -2027,25 +2061,7 @@ def parse(self, ctxt: NetworkContext, default_channels_first: bool) -> Tuple[Net
return ctxt, False

def _broadcastToNpType(self, ty: Type[BaseType]):

def _broadcastInteger(ty: Type[IntegerImmediate]):
if ty.signed:
return np.dtype(getattr(np, "int" + str(ty.typeWidth)))
else:
return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))

def _broadcastFloat(ty: Type[FloatImmediate]):
return np.dtype(getattr(np, "double"))

if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
if issubclass(ty.referencedType, IntegerImmediate):
return _broadcastInteger(ty.referencedType)
elif issubclass(ty, IntegerImmediate):
return _broadcastInteger(ty)
elif issubclass(ty, FloatImmediate):
return _broadcastFloat(ty)

return None
return _deeployTypeToNpType(ty)

def typeCheck(self, ctxt: NetworkContext) -> Tuple[NetworkContext, bool]:
"""Invokes the mapper's typeCheck method
Expand Down Expand Up @@ -2106,8 +2122,9 @@ def bind(self, ctxt: NetworkContext) -> Tuple[NetworkContext, bool]:
elif ctxt.is_global(node.name):
npType = self._broadcastToNpType(ctxt.globalObjects[node.name]._type)
if isinstance(ctxt.globalObjects[node.name], ConstantBuffer):
if isinstance(node, gs.Constant):
if isinstance(node, gs.Constant) and npType is not None:
node.values = node.values.astype(npType)
node.export_dtype = npType
else:
node.shape = ctxt.globalObjects[node.name].shape
if npType is not None:
Expand Down Expand Up @@ -2856,7 +2873,17 @@ def generateInferenceInitializationCode(self) -> str:

name = node.name
node.name = self.ctxt._mangle(node.name)
callStack += node.init()

if ("TILING_CODEGEN" not in node.name and isinstance(node, VariableBuffer) and hasattr(node, "_type")
and issubclass(node._type, Pointer)):
# Local inference buffers are late-bound by the generated layer code. Initializing them to NULL keeps
# clang from flagging false-positive uninitialized reads on paths where the assignment is emitted in a
# separate closure, and marking them unused avoids noise for scratch buffers that are reserved
# generically but optimized away for a specific layer instance.
typeName = node._instance.typeName if hasattr(node, "_instance") else node._type.typeName
callStack += f"{typeName} {node.name} __attribute__((unused)) = NULL;\n"
else:
callStack += node.init()
node.name = name

return callStack
Expand Down Expand Up @@ -3121,6 +3148,15 @@ def _exportGraph(self, folderPath, fileName):
# VJUNG: ONNX-Graphsurgeon needs tensors to be in their export types
constTensors = [tensor for tensor in self.graph.tensors().values() if isinstance(tensor, gs.Constant)]
for tensor in constTensors:
if tensor.name in self.ctxt.globalObjects:
ctxtTensor = self.ctxt.globalObjects[tensor.name]
if isinstance(ctxtTensor, ConstantBuffer) and hasattr(ctxtTensor, "_type"):
npType = _deeployTypeToNpType(ctxtTensor._type)
if npType is not None:
tensor.values = tensor.values.astype(npType)
tensor.export_dtype = npType
continue

if tensor.dtype != tensor.export_dtype:
tensor.values = tensor.values.astype(tensor.export_dtype)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def apply(self, graph: gs.Graph) -> Tuple[gs.Graph]:
engine = self.engineMapper.mapNodeToEngine(node, graph)
if engine is not None:
node.attrs["engine"] = engine.name
if hasattr(engine, "n_cores"):
node.attrs["n_cores"] = engine.n_cores
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a good approach IMO. The number of cores is not a node attribute (conceptually, the node attributes should follow the ones that exist in the real ONNX nodes). Plus, this issue of passing the information about the number of cores should already be solved, and the value should already exist in the operator representation, it's passed here. If this value doesn't get passed in your case, we should identify the root cause.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking a little more into it, maybe you need to add NeurekaEngine in the list here.

return graph


Expand Down
42 changes: 33 additions & 9 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \
FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \
FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \
GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \
MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
FloatPowTemplate, FloatReduceLogSumExpTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \
FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \
MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \
iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \
DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \
LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \
ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, \
SoftmaxChecker, TransposeChecker
LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceLogSumExpChecker, \
ReduceMeanChecker, ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, \
SliceChecker, SoftmaxChecker, TransposeChecker

BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])

Expand Down Expand Up @@ -227,6 +227,11 @@
BasicTransformer) for type in SignedIntegerDataTypes
]

BasicReduceLogSumExpBindings = [
NodeBinding(ReduceLogSumExpChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatReduceLogSumExpTemplate.referenceTemplate, BasicTransformer)
]

BasicReluBinding = NodeBinding(ReluChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatReluTemplate.referenceTemplate, BasicTransformer)

Expand Down Expand Up @@ -286,6 +291,9 @@
BasicConcatBindings = [
NodeBinding(ConcatChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]),
ConcatTemplate.referenceTemplate, BasicTransformer) for type in IntegerDataTypes
] + [
NodeBinding(ConcatChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
ConcatTemplate.referenceTemplate, BasicTransformer)
]

BasicQuantBindings = [
Expand All @@ -312,18 +320,34 @@
for type in FloatDataTypes
]

BasicConvTransposeBindings = [
BasicConvTranspose1DBindings = [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type), PointerClass(type)], # input, weight, bias
[PointerClass(type)]),
ConvTransposeTemplate.reference1DTemplate,
BasicTransformer) for type in FloatDataTypes
] + [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type)], # input, weight
[PointerClass(type)]),
ConvTransposeTemplate.reference1DTemplate,
BasicTransformer) for type in FloatDataTypes
]

BasicConvTranspose2DBindings = [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type), PointerClass(type)], # input, weight, bias
[PointerClass(type)]),
ConvTransposeTemplate.referenceTemplate,
ConvTransposeTemplate.reference2DTemplate,
BasicTransformer) for type in FloatDataTypes
] + [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type)], # input, weight
[PointerClass(type)]),
ConvTransposeTemplate.referenceTemplate,
ConvTransposeTemplate.reference2DTemplate,
BasicTransformer) for type in FloatDataTypes
]
27 changes: 27 additions & 0 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,12 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
if inputShapes[1] == () or inputShapes[1] == []:
inputShapes[1] = (1,)

# Scalars and singletons should broadcast to the tensor operand,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

# not shrink the tensor shape to (1,).
if tuple(inputShapes[1]) == (1,):
inputShapes[1] = inputShapes[0]
return (inputShapes, outputShapes)

if len(inputShapes[0]) > len(inputShapes[1]):
inputShapes[1] = inputShapes[0]
else:
Expand Down Expand Up @@ -438,6 +444,27 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
return (inputShapes, outputShapes)


class ReduceLogSumExpLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
axis = operatorRepresentation['axes'][0]
inputShape = list(copy.deepcopy(inputShapes[0]))

if operatorRepresentation['keepdims']:
outputShape = inputShape
outputShape[axis] = 1
else:
outputShape = inputShape[:axis] + inputShape[axis + 1:]
if len(outputShape) == 0:
outputShape = [1]

return (inputShapes, [outputShape])


class ReluLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
Expand Down
Loading