Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ tools/vocab_builder/builder.dSYM/Contents/Resources/Relocations/aarch64/builder.
tools/vocab_builder/builder.dSYM/Contents/Info.plist
tools/vocab_builder/builder
tools/vocab_builder/builder.dSYM
out.dot
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,43 @@ by h g wells i the time traveller for so it will be convenient to speak of him w

This model was trained for 8 epochs using the full text of The Time Machine novel.

## handwritten_recognition

To verify some functions more quickly, I have introduced a handwritten digit recognition program.

```
./handwritten_recognition
images magic : 2051
label magic : 2049
lables_num : 60000
data loaded.
Actions:
...
evaluating : [10000/10000] correct : 9501
epoch : 9 [50000/50000] loss : 0.150985
evaluating : [10000/10000] correct : 9493
```

### graphviz supported

You can add a line of code like this to the program to output an out.dot file that records the tensor computation topology. For example, in mnist.cpp:

```
printAllActions();
printDotGraph(); // here
allocMemAndInitTensors();
```

If you have Graphviz installed, you can use the following command to convert the out.dot file into a PNG image:

```
dot -Tpng out.dot -o out.png
```

Here's an example from my side where a PNG file is generated as output.

![alt text](handwritten_recognition_topo.png)

## legacy version

[v1](https://github.com/freelw/cpp-transformer/tree/v1_freeze_20250529)
Expand Down
101 changes: 89 additions & 12 deletions graph/actions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
#include <stdlib.h>
#include <assert.h>
#include <sstream>
#include <fstream>
#include "backends/backend_ops.h"
#include "optimizers/parameter.h"

extern bool g_training;
int g_action_id_counter = 0;

bool Action::executed_once() const {
return exec_times > 0;
Expand All @@ -21,6 +23,20 @@ int Action::get_exec_times() const {
return exec_times;
}

std::string Action::get_dot_string() const {
std::ostringstream oss;
if (lhs) {
oss << "Tensor_" << lhs->get_id() << " -> " << "Action_" << action_id << ";" << std::endl;
}
if (rhs) {
oss << "Tensor_" << rhs->get_id() << " -> " << "Action_" << action_id << ";" << std::endl;
}
if (res) {
oss << "Action_" << action_id << " -> " << "Tensor_" << res->get_id() << ";" << std::endl;
}
return oss.str();
}

std::ostream& operator<<(std::ostream& output, const Action& a) {
output << a.to_string();
return output;
Expand Down Expand Up @@ -150,6 +166,14 @@ std::string AddEqAction::to_string() const {
return oss.str();
}

std::string AddEqAction::get_dot_string() const {
std::ostringstream oss;
assert(lhs != nullptr);
oss << "Action_" << action_id << " -> Tensor_" << lhs->get_id() << ";" << std::endl;
oss << "Tensor_" << rhs->get_id() << " -> Action_" << action_id << ";" << std::endl;
return oss.str();
}

void ExpandAddAction::execute() {
assert(lhs != nullptr);
assert(rhs != nullptr);
Expand Down Expand Up @@ -431,25 +455,19 @@ void FillWeightAction::execute() {
assert(lhs != nullptr);
if (init_type == "gauss") {
g_backend_ops->init_weight_gauss(lhs, mean, sigma);
}
else if (init_type == "uniform") {
} else if (init_type == "uniform") {
g_backend_ops->init_weight_uniform(lhs, sigma);
}
else if (init_type == "xavier") {
} else if (init_type == "xavier") {
assert(false);
// g_backend_ops->xavier(lhs);
}
else if (init_type == "kaiming") {
} else if (init_type == "kaiming") {
assert(false);
// g_backend_ops->kaiming(lhs);
}
else if (init_type == "dbg") {
} else if (init_type == "dbg") {
g_backend_ops->init_weight_for_dbg(lhs, sigma);
}
else if (init_type == "fill") {
} else if (init_type == "fill") {
g_backend_ops->fill(lhs, sigma);
}
else {
} else {
std::cerr << "Error: Unknown initialization type: " << init_type << std::endl;
abort();
}
Expand All @@ -473,6 +491,13 @@ std::string InitWeightAction::to_string() const {
return oss.str();
}

std::string InitWeightAction::get_dot_string() const {
std::ostringstream oss;
assert(lhs != nullptr);
oss << "Action_" << action_id << " -> Tensor_" << lhs->get_id() << ";" << std::endl;
return oss.str();
}

void BoundaryAction::execute() {
// Do nothing
}
Expand Down Expand Up @@ -829,6 +854,13 @@ std::string ClearAction::to_string() const {
return oss.str();
}

std::string ClearAction::get_dot_string() const {
assert(lhs != nullptr);
std::ostringstream oss;
oss << "Action_" << action_id << " -> Tensor_" << lhs->get_id() << ";" << std::endl;
return oss.str();
}

std::vector<Action*> g_actions;

std::vector<Action*> getOnceActions() {
Expand Down Expand Up @@ -953,6 +985,51 @@ void printAllActions() {
}
}

void printDotGraph() {
// save in out.dot

std::ofstream out("out.dot");
out << "digraph G {" << std::endl;

for (Tensor* tensor : g_tensors) {
out << "Tensor_" << tensor->get_id() << " [shape=\"ellipse\" label=\"" << tensor->get_meta_info() << "\"];" << std::endl;
}

for (Tensor* tensor_view : g_tensor_views) {
out << "Tensor_" << tensor_view->get_id() << " [shape=\"ellipse\" color=\"blue\" label=\"" << tensor_view->get_meta_info() << "\"];" << std::endl;
}

for (Tensor* c_tensor : g_c_tensors) {
out << "Tensor_" << c_tensor->get_id() << " [shape=\"ellipse\" color=\"green\" label=\"" << c_tensor->get_meta_info() << "\"];" << std::endl;
}

for (Tensor* grad_tensor : g_grad_tensors) {
out << "Tensor_" << grad_tensor->get_id() << " [shape=\"ellipse\" color=\"yellow\" label=\"" << grad_tensor->get_meta_info() << "\"];" << std::endl;
}

for (Action* action : g_actions) {
out << "Action_" << action->get_id() << " [shape=\"box\" label=\"" << action->get_name() << "\"];" << std::endl;
}

for (Tensor* tensor_view : g_tensor_views) {
// build edge
auto parent = tensor_view->get_parent();
if (parent != nullptr) {
out << "Tensor_" << parent->get_id() << " -> Tensor_" << tensor_view->get_id() << ";" << std::endl;
}
}

for (Action* action : g_actions) {
std::string dot_string = action->get_dot_string();
if (dot_string.empty()) {
continue; // skip actions that do not have a dot string
}
out << dot_string;
}

out << "}" << std::endl;
}

void freeAllActions() {
for (Action* action : g_actions) {
delete action;
Expand Down
Loading