Skip to content

Commit eac9698

Browse files
authored
Merge pull request #41 from freelw/wangli_dev_20250617_2
#fix 35
2 parents 451ce7a + dcc55d4 commit eac9698

2 files changed

Lines changed: 24 additions & 7 deletions

File tree

backends/gpu/metal/metal_ops.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,15 @@ MetalOps::MetalOps() : commandBuffer(nullptr), cur_int_args(0), cur_float_args(0
3535
}
3636

3737
bufferIntArgs = device->newBuffer(TOTAL_INT_ARGS * sizeof(int), MTL::ResourceStorageModeShared);
38+
if (!bufferIntArgs) {
39+
std::cerr << "Failed to create buffer for int args!" << std::endl;
40+
throw std::runtime_error("Failed to create buffer for int args");
41+
}
3842
bufferFloatArgs = device->newBuffer(TOTAL_FLOAT_ARGS * sizeof(float), MTL::ResourceStorageModeShared);
43+
if (!bufferFloatArgs) {
44+
std::cerr << "Failed to create buffer for float args!" << std::endl;
45+
throw std::runtime_error("Failed to create buffer for float args");
46+
}
3947
load_kernel_metal();
4048

4149
addOps = new MetalKops("tensor_add_kernel", library);
@@ -115,10 +123,10 @@ void MetalOps::prepare() {
115123
cur_float_args = 0;
116124
}
117125

118-
int calc_offset(const Tensor* t) {
126+
unsigned int calc_offset(const Tensor* t) {
119127
char* base = reinterpret_cast<char*>(reinterpret_cast<MTL::Buffer*>(t->get_storage()->ctx)->contents());
120128
char* pos = reinterpret_cast<char*>(t->get_data());
121-
auto offset_res = pos - base;
129+
unsigned int offset_res = pos - base;
122130
return offset_res;
123131
}
124132

@@ -1428,6 +1436,10 @@ void MetalOps::mulSV(Tensor* dst, Tensor* src, float value) {
14281436

14291437
void* MetalOps::alloc(size_t size, void** ctx) {
14301438
MTL::Buffer* buffer = device->newBuffer(size, MTL::ResourceStorageModeShared);
1439+
if (!buffer) {
1440+
std::cerr << "Error allocating buffer of size " << size << std::endl;
1441+
abort();
1442+
}
14311443
*ctx = (void*)buffer;
14321444
return buffer->contents();
14331445
}

lm.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ std::vector<uint> trim_or_padding(const std::vector<uint>& src, uint max_len, ui
4141
std::vector<uint> res = src;
4242
if (src.size() > max_len) {
4343
res.resize(max_len);
44-
} else {
44+
}
45+
else {
4546
res.resize(max_len, pad_id);
4647
}
4748
return res;
@@ -98,7 +99,7 @@ int main(int argc, char* argv[]) {
9899

99100
int opt;
100101
int epochs = 10;
101-
int batch_size = 4;
102+
int batch_size = 16;
102103
int gpu = 1;
103104
int max_words_cnt = 256;
104105
float lr = 0.001f;
@@ -225,6 +226,7 @@ int main(int argc, char* argv[]) {
225226
adam.clip_grad(1.0f);
226227
adam.step();
227228
graph::validateAllNodesRefCnt(0);
229+
// printAllTensors();
228230
// printAllActions();
229231
allocMemAndInitTensors();
230232
std::cout << "Allocating memory " << std::endl
@@ -264,7 +266,8 @@ int main(int argc, char* argv[]) {
264266
auto origin_size = src_token_ids.size();
265267
if (src_token_ids.size() < num_steps) {
266268
src_token_ids.resize(num_steps, loader.get_pad_id());
267-
} else if (src_token_ids.size() > num_steps) {
269+
}
270+
else if (src_token_ids.size() > num_steps) {
268271
src_token_ids.erase(src_token_ids.begin(), src_token_ids.end() - num_steps);
269272
}
270273
auto cur_step = origin_size - 1;
@@ -302,15 +305,17 @@ int main(int argc, char* argv[]) {
302305
if (cur_step >= num_steps - 1) {
303306
src_token_ids.push_back(max_index);
304307
src_token_ids.erase(src_token_ids.begin(), src_token_ids.end() - num_steps);
305-
} else {
308+
}
309+
else {
306310
src_token_ids[++cur_step] = max_index;
307311
}
308312
}
309313
std::cout << std::endl;
310314
std::cout << "-----------------" << std::endl;
311315
::free(res_buffer);
312316
}
313-
} else {
317+
}
318+
else {
314319
init_dec_valid_lens_for_training(dec_valid_lens);
315320
signal(SIGINT, signal_callback_handler);
316321
int epoch = 0;

0 commit comments

Comments
 (0)