diff --git a/.gitignore b/.gitignore index bd13656..1eae22b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ tools/vocab_builder/builder.dSYM/Contents/Resources/Relocations/aarch64/builder. tools/vocab_builder/builder.dSYM/Contents/Info.plist tools/vocab_builder/builder tools/vocab_builder/builder.dSYM +out.dot diff --git a/README.md b/README.md index 0c49284..9b62f82 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,43 @@ by h g wells i the time traveller for so it will be convenient to speak of him w This model was trained for 8 epochs using the full text of The Time Machine novel. +## handwritten_recognition + +To verify some functions more quickly, I have introduced a handwritten digit recognition program. + +``` +./handwritten_recognition +images magic : 2051 +label magic : 2049 +lables_num : 60000 +data loaded. +Actions: +... +evaluating : [10000/10000] correct : 9501 +epoch : 9 [50000/50000] loss : 0.150985 +evaluating : [10000/10000] correct : 9493 +``` + +### graphviz supported + +You can add a line of code like this to the program to output an out.dot file that records the tensor computation topology. For example, in mnist.cpp: + +``` +printAllActions(); +printDotGraph(); // here +allocMemAndInitTensors(); +``` + +If you have Graphviz installed, you can use the following command to convert the out.dot file into a PNG image: + +``` +dot -Tpng out.dot -o out.png +``` + +Here's an example from my side where a PNG file is generated as output. + +![alt text](handwritten_recognition_topo.png) + ## legacy version [v1](https://github.com/freelw/cpp-transformer/tree/v1_freeze_20250529) diff --git a/graph/actions.cpp b/graph/actions.cpp index 30b1496..39338d8 100644 --- a/graph/actions.cpp +++ b/graph/actions.cpp @@ -4,10 +4,12 @@ #include #include #include +#include #include "backends/backend_ops.h" #include "optimizers/parameter.h" extern bool g_training; +int g_action_id_counter = 0; bool Action::executed_once() const { return exec_times > 0; @@ -21,6 +23,20 @@ int Action::get_exec_times() const { return exec_times; } +std::string Action::get_dot_string() const { + std::ostringstream oss; + if (lhs) { + oss << "Tensor_" << lhs->get_id() << " -> " << "Action_" << action_id << ";" << std::endl; + } + if (rhs) { + oss << "Tensor_" << rhs->get_id() << " -> " << "Action_" << action_id << ";" << std::endl; + } + if (res) { + oss << "Action_" << action_id << " -> " << "Tensor_" << res->get_id() << ";" << std::endl; + } + return oss.str(); +} + std::ostream& operator<<(std::ostream& output, const Action& a) { output << a.to_string(); return output; @@ -150,6 +166,14 @@ std::string AddEqAction::to_string() const { return oss.str(); } +std::string AddEqAction::get_dot_string() const { + std::ostringstream oss; + assert(lhs != nullptr); + oss << "Action_" << action_id << " -> Tensor_" << lhs->get_id() << ";" << std::endl; + oss << "Tensor_" << rhs->get_id() << " -> Action_" << action_id << ";" << std::endl; + return oss.str(); +} + void ExpandAddAction::execute() { assert(lhs != nullptr); assert(rhs != nullptr); @@ -431,25 +455,19 @@ void FillWeightAction::execute() { assert(lhs != nullptr); if (init_type == "gauss") { g_backend_ops->init_weight_gauss(lhs, mean, sigma); - } - else if (init_type == "uniform") { + } else if (init_type == "uniform") { g_backend_ops->init_weight_uniform(lhs, sigma); - } - else if (init_type == "xavier") { + } else if (init_type == "xavier") { assert(false); // g_backend_ops->xavier(lhs); - } - else if (init_type == "kaiming") { + } else if (init_type == "kaiming") { assert(false); // g_backend_ops->kaiming(lhs); - } - else if (init_type == "dbg") { + } else if (init_type == "dbg") { g_backend_ops->init_weight_for_dbg(lhs, sigma); - } - else if (init_type == "fill") { + } else if (init_type == "fill") { g_backend_ops->fill(lhs, sigma); - } - else { + } else { std::cerr << "Error: Unknown initialization type: " << init_type << std::endl; abort(); } @@ -473,6 +491,13 @@ std::string InitWeightAction::to_string() const { return oss.str(); } +std::string InitWeightAction::get_dot_string() const { + std::ostringstream oss; + assert(lhs != nullptr); + oss << "Action_" << action_id << " -> Tensor_" << lhs->get_id() << ";" << std::endl; + return oss.str(); +} + void BoundaryAction::execute() { // Do nothing } @@ -829,6 +854,13 @@ std::string ClearAction::to_string() const { return oss.str(); } +std::string ClearAction::get_dot_string() const { + assert(lhs != nullptr); + std::ostringstream oss; + oss << "Action_" << action_id << " -> Tensor_" << lhs->get_id() << ";" << std::endl; + return oss.str(); +} + std::vector g_actions; std::vector getOnceActions() { @@ -953,6 +985,51 @@ void printAllActions() { } } +void printDotGraph() { + // save in out.dot + + std::ofstream out("out.dot"); + out << "digraph G {" << std::endl; + + for (Tensor* tensor : g_tensors) { + out << "Tensor_" << tensor->get_id() << " [shape=\"ellipse\" label=\"" << tensor->get_meta_info() << "\"];" << std::endl; + } + + for (Tensor* tensor_view : g_tensor_views) { + out << "Tensor_" << tensor_view->get_id() << " [shape=\"ellipse\" color=\"blue\" label=\"" << tensor_view->get_meta_info() << "\"];" << std::endl; + } + + for (Tensor* c_tensor : g_c_tensors) { + out << "Tensor_" << c_tensor->get_id() << " [shape=\"ellipse\" color=\"green\" label=\"" << c_tensor->get_meta_info() << "\"];" << std::endl; + } + + for (Tensor* grad_tensor : g_grad_tensors) { + out << "Tensor_" << grad_tensor->get_id() << " [shape=\"ellipse\" color=\"yellow\" label=\"" << grad_tensor->get_meta_info() << "\"];" << std::endl; + } + + for (Action* action : g_actions) { + out << "Action_" << action->get_id() << " [shape=\"box\" label=\"" << action->get_name() << "\"];" << std::endl; + } + + for (Tensor* tensor_view : g_tensor_views) { + // build edge + auto parent = tensor_view->get_parent(); + if (parent != nullptr) { + out << "Tensor_" << parent->get_id() << " -> Tensor_" << tensor_view->get_id() << ";" << std::endl; + } + } + + for (Action* action : g_actions) { + std::string dot_string = action->get_dot_string(); + if (dot_string.empty()) { + continue; // skip actions that do not have a dot string + } + out << dot_string; + } + + out << "}" << std::endl; +} + void freeAllActions() { for (Action* action : g_actions) { delete action; diff --git a/graph/actions.h b/graph/actions.h index 417ebcd..7940b05 100644 --- a/graph/actions.h +++ b/graph/actions.h @@ -7,13 +7,22 @@ class Parameter; +extern int g_action_id_counter; + class Action { public: Action(Tensor* _lhs, const Tensor* _rhs, Tensor* _res) - : lhs(_lhs), rhs(_rhs), res(_res), exec_times(0) { + : lhs(_lhs), rhs(_rhs), res(_res), exec_times(0), + action_id(g_action_id_counter++) { } virtual ~Action() = default; + int get_id() const { + return action_id; + } virtual void execute() = 0; + virtual std::string get_name() const { + return "Action"; + } virtual std::string to_string() const { return "Action not implemented"; } @@ -32,6 +41,7 @@ class Action { virtual bool is_init_weight() const { return false; } + virtual std::string get_dot_string() const; bool executed_once() const; void increase_exec_times(); int get_exec_times() const; @@ -41,12 +51,16 @@ class Action { const Tensor* rhs; Tensor* res; int exec_times; + int action_id; }; class AddAction : public Action { public: AddAction(Tensor* _lhs, const Tensor* _rhs, Tensor* _res); void execute() override; + std::string get_name() const override { + return "AddAction"; + } std::string to_string() const override; private: Tensor* lhs_shape; @@ -60,7 +74,11 @@ class AddEqAction : public Action { public: AddEqAction(Tensor* _lhs, const Tensor* _rhs); void execute() override; + std::string get_name() const override { + return "AddEqAction"; + } std::string to_string() const override; + std::string get_dot_string() const override; private: Tensor* lhs_shape; Tensor* lhs_strides; @@ -73,6 +91,9 @@ class ExpandAddAction : public Action { : Action(_lhs, _rhs, _res) { } void execute() override; + std::string get_name() const override { + return "ExpandAddAction"; + } std::string to_string() const override; }; @@ -82,6 +103,9 @@ class ExpandMulAction : public Action { : Action(_lhs, _rhs, _res) { } void execute() override; + std::string get_name() const override { + return "ExpandMulAction"; + } std::string to_string() const override; }; @@ -91,6 +115,9 @@ class AtAction : public Action { : Action(_lhs, _rhs, _res) { } void execute() override; + std::string get_name() const override { + return "AtAction"; + } std::string to_string() const override; }; @@ -98,6 +125,9 @@ class MulAction : public Action { public: MulAction(Tensor* _lhs, const Tensor* _rhs, Tensor* _res); void execute() override; + std::string get_name() const override { + return "MulAction"; + } std::string to_string() const override; private: Tensor* lhs_shape; @@ -112,6 +142,9 @@ class SumAction : public Action { : Action(_lhs, nullptr, _res), dim(_dim) { } void execute() override; + std::string get_name() const override { + return "SumAction"; + } std::string to_string() const override; private: int dim; @@ -123,6 +156,9 @@ class ReluAction : public Action { : Action(_lhs, nullptr, _res) { } void execute() override; + std::string get_name() const override { + return "ReluAction"; + } std::string to_string() const override; }; @@ -132,6 +168,9 @@ class ReluPrimeAction : public Action { : Action(_lhs, nullptr, _res) { } void execute() override; + std::string get_name() const override { + return "ReluPrimeAction"; + } std::string to_string() const override; }; @@ -141,6 +180,9 @@ class CrossEntropyAction : public Action { : Action(_lhs, labels, _res), maxs(_maxs), sums(_sums) { } void execute() override; + std::string get_name() const override { + return "CrossEntropyAction"; + } std::string to_string() const override; private: Tensor* maxs; @@ -153,6 +195,9 @@ class CrossEntropyBackwardAction : public Action { : Action(_lhs, labels, _res), maxs(_maxs), sums(_sums) { } void execute() override; + std::string get_name() const override { + return "CrossEntropyBackwardAction"; + } std::string to_string() const override; private: Tensor* maxs; @@ -165,6 +210,9 @@ class CalcAllGradNormAction : public Action { : Action(nullptr, nullptr, _norm), grads(_grads) { } void execute() override; + std::string get_name() const override { + return "CalcAllGradNormAction"; + } std::string to_string() const override; private: std::vector grads; @@ -176,6 +224,9 @@ class ClipGradAction : public Action { : Action(_grad, _norm, nullptr), grad_clip_val(_grad_clip_val) { } void execute() override; + std::string get_name() const override { + return "ClipGradAction"; + } std::string to_string() const override; private: float grad_clip_val; @@ -187,6 +238,9 @@ class AdamStepAction : public Action { : Action(nullptr, nullptr, nullptr), param(_param), lr(_lr), beta1(_beta1), beta2(_beta2), epsilon(_epsilon) { } void execute() override; + std::string get_name() const override { + return "AdamStepAction"; + } std::string to_string() const override; private: Parameter* param; @@ -202,6 +256,9 @@ class ZeroGradAction : public Action { : Action(nullptr, nullptr, nullptr) { } void execute() override; + std::string get_name() const override { + return "ZeroGradAction"; + } std::string to_string() const override; bool is_zero_grad() const override { return true; @@ -217,6 +274,9 @@ class ZeroCTensorsAction : public Action { return true; } void execute() override; + std::string get_name() const override { + return "ZeroCTensorsAction"; + } std::string to_string() const override; }; @@ -226,6 +286,9 @@ class PrintNoZeroTensorNamesAction : public Action { : Action(nullptr, nullptr, nullptr) { } void execute() override; + std::string get_name() const override { + return "PrintNoZeroTensorNamesAction"; + } std::string to_string() const override; }; @@ -235,6 +298,9 @@ class FillWeightAction : public Action { : Action(_lhs, nullptr, nullptr), init_type(_init_type), sigma(_sigma), mean(_mean) { } void execute() override; + std::string get_name() const override { + return "FillWeightAction"; + } std::string to_string() const override; protected: std::string init_type; @@ -253,7 +319,11 @@ class InitWeightAction : public FillWeightAction { bool is_init_weight() const override { return true; } + std::string get_name() const override { + return "InitWeightAction"; + } std::string to_string() const override; + std::string get_dot_string() const override; }; class BoundaryAction : public Action { @@ -263,6 +333,9 @@ class BoundaryAction : public Action { } void execute() override; bool is_backward_boundary() const override; + std::string get_name() const override { + return "BoundaryAction"; + } std::string to_string() const override; }; @@ -276,6 +349,9 @@ class AssignShapeAndStridesAction : public Action { ); virtual ~AssignShapeAndStridesAction(); void execute() override; + std::string get_name() const override { + return "AssignShapeAndStridesAction"; + } std::string to_string() const override; private: int32_t* shape_data; @@ -287,6 +363,9 @@ class AssignValueAction : public Action { AssignValueAction(Tensor* tensor, float value); virtual ~AssignValueAction(); void execute() override; + std::string get_name() const override { + return "AssignValueAction"; + } std::string to_string() const override; private: float value; @@ -301,6 +380,9 @@ class ReshapeDeepCpAction : public Action { shape(_shape), strides(_strides) { } void execute() override; + std::string get_name() const override { + return "ReshapeDeepCpAction"; + } std::string to_string() const override; private: const Tensor* shape; @@ -313,6 +395,9 @@ class RepeatInterleaveAction : public Action { : Action(_lhs, nullptr, _res), n(_n) { } void execute() override; + std::string get_name() const override { + return "RepeatInterleaveAction"; + } std::string to_string() const override; private: int n; @@ -324,6 +409,9 @@ class SequenceMaskAction : public Action { : Action(_lhs, _rhs, _res), value(_value) { } void execute() override; + std::string get_name() const override { + return "SequenceMaskAction"; + } std::string to_string() const override; private: float value; @@ -335,6 +423,9 @@ class SoftmaxAction : public Action { : Action(_lhs, nullptr, _res) { } void execute() override; + std::string get_name() const override { + return "SoftmaxAction"; + } std::string to_string() const override; }; @@ -344,6 +435,9 @@ class SoftmaxBackwardAction : public Action { : Action(_lhs, _softmax_res, grad) { } void execute() override; + std::string get_name() const override { + return "SoftmaxBackwardAction"; + } std::string to_string() const override; }; @@ -355,6 +449,9 @@ class LazyDivAction : public Action { assert(value->get_shape()[0] == 1); } void execute() override; + std::string get_name() const override { + return "LazyDivAction"; + } std::string to_string() const override; private: Tensor* value; @@ -364,6 +461,9 @@ class DropoutMaskAction : public Action { public: DropoutMaskAction(Tensor* mask, float _p); void execute() override; + std::string get_name() const override { + return "DropoutMaskAction"; + } std::string to_string() const override; private: float p; @@ -377,6 +477,9 @@ class EmbeddingAction : public Action { : Action(_lhs, indices, _res) { } void execute() override; + std::string get_name() const override { + return "EmbeddingAction"; + } std::string to_string() const override; }; @@ -386,6 +489,9 @@ class EmbeddingBackwardAction : public Action { : Action(_lhs, indices, _res) { } void execute() override; + std::string get_name() const override { + return "EmbeddingBackwardAction"; + } std::string to_string() const override; }; @@ -395,6 +501,9 @@ class PosEncodingAction : public Action { : Action(nullptr, nullptr, res) { } void execute() override; + std::string get_name() const override { + return "PosEncodingAction"; + } std::string to_string() const override; bool is_do_once() const override { return true; @@ -407,6 +516,9 @@ class AvgAction : public Action { : Action(_lhs, nullptr, _res) { } void execute() override; + std::string get_name() const override { + return "AvgAction"; + } std::string to_string() const override; }; @@ -416,6 +528,9 @@ class VarAction : public Action { : Action(_lhs, avg, _res) { } void execute() override; + std::string get_name() const override { + return "VarAction"; + } std::string to_string() const override; }; @@ -425,6 +540,9 @@ class NormAction : public Action { : Action(avg, var, _res), src(_src) { } void execute() override; + std::string get_name() const override { + return "NormAction"; + } std::string to_string() const override; private: Tensor* src; @@ -436,6 +554,9 @@ class NormBackwardAction : public Action { : Action(_grad, norm_res, _res), var_tensor(_var_tensor) { } void execute() override; + std::string get_name() const override { + return "NormBackwardAction"; + } std::string to_string() const override; private: Tensor* var_tensor; @@ -447,6 +568,9 @@ class DbgPrintAction : public Action { : Action(_lhs, nullptr, nullptr), msg(_msg), expected_name(_expected_name) { } void execute() override; + std::string get_name() const override { + return "DbgPrintAction"; + } std::string to_string() const override; private: std::string msg; @@ -459,6 +583,9 @@ class MemCpAction : public Action { : Action(_lhs, _rhs, nullptr), offset_l(_offset_l), offset_r(_offset_r), size(_size) { } void execute() override; + std::string get_name() const override { + return "MemCpAction"; + } std::string to_string() const override; private: int offset_l; @@ -472,6 +599,9 @@ class MulSVAction : public Action { : Action(_lhs, nullptr, _res), value(_value) { } void execute() override; + std::string get_name() const override { + return "MulSVAction"; + } std::string to_string() const override; private: float value; @@ -483,7 +613,11 @@ class ClearAction : public Action { : Action(_lhs, nullptr, nullptr) { } void execute() override; + std::string get_name() const override { + return "ClearAction"; + } std::string to_string() const override; + std::string get_dot_string() const override; }; std::vector getOnceActions(); @@ -493,6 +627,7 @@ void gDoOnceActions(); void gDoForwardActions(bool training = false); void gDoBackwardActions(); void printAllActions(); +void printDotGraph(); void freeAllActions(); void disableInitWeightAction(); diff --git a/handwritten_recognition_topo.png b/handwritten_recognition_topo.png new file mode 100644 index 0000000..2e92dc9 Binary files /dev/null and b/handwritten_recognition_topo.png differ diff --git a/mnist.cpp b/mnist.cpp index d2e3d45..680a965 100644 --- a/mnist.cpp +++ b/mnist.cpp @@ -69,6 +69,7 @@ void train(int epochs, float lr, int batch_size) { optimizer.clip_grad(1.0f); optimizer.step(); printAllActions(); + printDotGraph(); allocMemAndInitTensors(); float* inputs_tmp_buffer = static_cast(::malloc(inputs->size())); int32_t* labels_tmp_buffer = static_cast(::malloc(labels->size())); diff --git a/tensor/tensor.cpp b/tensor/tensor.cpp index 2a880f4..816c130 100644 --- a/tensor/tensor.cpp +++ b/tensor/tensor.cpp @@ -19,7 +19,7 @@ std::string TensorDtype_to_string(TensorDType dtype) { } Tensor::Tensor(const std::vector& _shape, const std::string& _name, TensorDType _dtype) - : shape(_shape), name(_name), dtype(_dtype), own_storage(true), offset(0), id(0) { + : shape(_shape), name(_name), dtype(_dtype), own_storage(true), offset(0), id(0), parent(nullptr) { strides.resize(shape.size()); strides[shape.size() - 1] = 1; for (int i = shape.size() - 2; i >= 0; --i) { @@ -41,11 +41,13 @@ Tensor::Tensor( const std::string& _name, TensorDType _dtype, TensorStorage* _storage, - int _offset) + int _offset, + const Tensor* _parent) : shape(_shape), strides(_strides), name(_name), dtype(_dtype), own_storage(false), storage(_storage), - offset(_offset), id(0) { + offset(_offset), id(0), + parent(_parent) { assert(shape.size() == strides.size()); assert(_storage != nullptr); assert(_offset >= 0); @@ -54,17 +56,17 @@ Tensor::Tensor( id = gen_id(); } -Tensor::Tensor( - const std::vector& _shape, - const std::vector& _strides, - const std::string& _name, - TensorDType _dtype, - TensorStorage* _storage) - : Tensor( - _shape, _strides, _name, _dtype, _storage, 0 - ) { - assert(shape.size() == strides.size()); -} +// Tensor::Tensor( +// const std::vector& _shape, +// const std::vector& _strides, +// const std::string& _name, +// TensorDType _dtype, +// TensorStorage* _storage) +// : Tensor( +// _shape, _strides, _name, _dtype, _storage, 0 +// ) { +// assert(shape.size() == strides.size()); +// } Tensor::~Tensor() { if (own_storage) { @@ -426,7 +428,8 @@ Tensor* allocTensorView( Tensor* tensor_view = new Tensor( shape, strides, name, parent->get_dtype(), parent->get_storage(), - parent->get_offset() + offset + parent->get_offset() + offset, + parent ); g_tensor_views.push_back(tensor_view); return tensor_view; diff --git a/tensor/tensor.h b/tensor/tensor.h index c47bb63..4a66a2b 100644 --- a/tensor/tensor.h +++ b/tensor/tensor.h @@ -37,12 +37,12 @@ class Tensor { Tensor( const std::vector& _shape, const std::vector& _strides, const std::string& _name, TensorDType _dtype, TensorStorage* _storage, - int _offset - ); - Tensor( - const std::vector& _shape, const std::vector& _strides, - const std::string& _name, TensorDType _dtype, TensorStorage* _storage + int _offset, const Tensor* _parent ); + // Tensor( + // const std::vector& _shape, const std::vector& _strides, + // const std::string& _name, TensorDType _dtype, TensorStorage* _storage + // ); virtual ~Tensor(); virtual void set_data(void* ptr, void* ctx); virtual void* get_data() const; @@ -57,6 +57,7 @@ class Tensor { virtual int get_dim() const { return shape.size(); } TensorDType get_dtype() const { return dtype; } virtual std::string get_name() const { return name; } + const Tensor* get_parent() const { return parent; } Tensor* transpose(int a = 0, int b = 1); Tensor* permute(const std::vector& dims); Tensor* reshape(const std::vector& shape) const; @@ -81,6 +82,7 @@ class Tensor { std::vector strides; std::string name; TensorDType dtype; + const Tensor* parent; private: const bool own_storage; TensorStorage* storage;