@@ -2088,7 +2088,7 @@ class joint_matrix {
20882088// / \param [in] trans Indicates whether the matrix to be loaded transposed
20892089// / \param [in] mat The matrix index to be loaded
20902090template <typename T>
2091- void ldmatrix (uintptr_t addr, T *m, bool trans = false , unsigned mat = 0 ) {
2091+ inline void ldmatrix (uintptr_t addr, T *m, bool trans = false , unsigned mat = 0 ) {
20922092 auto sg = sycl::ext::oneapi::this_work_item::get_sub_group ();
20932093 int lane = sg.get_local_linear_id ();
20942094
@@ -2165,7 +2165,7 @@ void ldmatrix(uintptr_t addr, T *m, bool trans = false, unsigned mat = 0) {
21652165// / to 2 b16 type elements.
21662166// / \param [in] trans Indicates whether the matrix to be loaded transposed
21672167template <typename T>
2168- void ldmatrix (uintptr_t addr, T *m1, T *m2, bool trans = false ) {
2168+ inline void ldmatrix (uintptr_t addr, T *m1, T *m2, bool trans = false ) {
21692169 // Load 1st matrix
21702170 ldmatrix (addr, m1, trans, 0 );
21712171 // Load 2nd matrix
@@ -2207,7 +2207,7 @@ void ldmatrix(uintptr_t addr, T *m1, T *m2, bool trans = false) {
22072207// / to 2 b16 type elements.
22082208// / \param [in] trans Indicates whether the matrix to be loaded transposed
22092209template <typename T>
2210- void ldmatrix (uintptr_t addr, T *m1, T *m2, T *m3, T *m4, bool trans = false ) {
2210+ inline void ldmatrix (uintptr_t addr, T *m1, T *m2, T *m3, T *m4, bool trans = false ) {
22112211 // Load 1st matrix
22122212 ldmatrix (addr, m1, trans, 0 );
22132213 // Load 2nd matrix
@@ -2248,7 +2248,7 @@ void ldmatrix(uintptr_t addr, T *m1, T *m2, T *m3, T *m4, bool trans = false) {
22482248// / \param [in] trans Indicates whether the matrix to be stored transposed
22492249// / \param [in] mat The matrix index to be stored
22502250template <typename T>
2251- void stmatrix (uintptr_t addr, T m, bool trans = false , unsigned mat = 0 ) {
2251+ inline void stmatrix (uintptr_t addr, T m, bool trans = false , unsigned mat = 0 ) {
22522252 auto sg = sycl::ext::oneapi::this_work_item::get_sub_group ();
22532253 int lane = sg.get_local_linear_id ();
22542254
@@ -2325,7 +2325,7 @@ void stmatrix(uintptr_t addr, T m, bool trans = false, unsigned mat = 0) {
23252325// / to 2 b16 type elements.
23262326// / \param [in] trans Indicates whether the matrix to be stored transposed
23272327template <typename T>
2328- void stmatrix (uintptr_t addr, T m1, T m2, bool trans = false ) {
2328+ inline void stmatrix (uintptr_t addr, T m1, T m2, bool trans = false ) {
23292329 // Store 1st matrix
23302330 stmatrix (addr, m1, trans, 0 );
23312331 // Store 2nd matrix
@@ -2367,7 +2367,7 @@ void stmatrix(uintptr_t addr, T m1, T m2, bool trans = false) {
23672367// / to 2 b16 type elements.
23682368// / \param [in] trans Indicates whether the matrix to be stored transposed
23692369template <typename T>
2370- void stmatrix (uintptr_t addr, T m1, T m2, T m3, T m4, bool trans = false ) {
2370+ inline void stmatrix (uintptr_t addr, T m1, T m2, T m3, T m4, bool trans = false ) {
23712371 // Store 1st matrix
23722372 stmatrix (addr, m1, trans, 0 );
23732373 // Store 2nd matrix
@@ -2412,7 +2412,7 @@ template <typename T> struct MMAType {
24122412// / \param [in] c_mat_frag The fragment of the input matrix C to be added with
24132413// / the result of A * B fragments
24142414template <int M, int N, int K, typename ABType, typename CDType>
2415- void mma (volatile void **d_mat_frag, void *a_mat_frag, void *b_mat_frag,
2415+ inline void mma (volatile void **d_mat_frag, void *a_mat_frag, void *b_mat_frag,
24162416 void *c_mat_frag) {
24172417 auto d = reinterpret_cast <volatile CDType **>(d_mat_frag);
24182418 auto a = reinterpret_cast <typename MMAType<ABType>::PackType *>(a_mat_frag);
@@ -2865,7 +2865,7 @@ void mma(volatile void **d_mat_frag, void *a_mat_frag, void *b_mat_frag,
28652865// / b16 type elements.
28662866// / \param [in] input: The register to store the matrix fragment. It refers to 2 b16
28672867// / type elements.
2868- void movmatrix (uint32_t &output, uint32_t &input) {
2868+ inline void movmatrix (uint32_t &output, uint32_t &input) {
28692869 auto sg = sycl::ext::oneapi::this_work_item::get_sub_group ();
28702870 int laneid = sg.get_local_linear_id ();
28712871
0 commit comments