diff --git a/Changelog.md b/Changelog.md index bd2f995..0d614fe 100644 --- a/Changelog.md +++ b/Changelog.md @@ -7,7 +7,7 @@ behaviour. - Improve copying of comments from .proto files into ocaml code using omd to parse markdown - +- Fix problem with deserializing large singed integers (#45) ## 6.1.0: 2024-04-25 - Fix name resolution leading to wrongly mapped names diff --git a/src/ocaml_protoc_plugin/deserialize.ml b/src/ocaml_protoc_plugin/deserialize.ml index 7c5d607..39cdc4a 100644 --- a/src/ocaml_protoc_plugin/deserialize.ml +++ b/src/ocaml_protoc_plugin/deserialize.ml @@ -30,13 +30,13 @@ let error_required_field_missing index spec = Result.raise (`Required_field_miss let decode_zigzag v = let open Infix.Int64 in match v land 0x01L = 0L with - | true -> v / 2L - | false -> (v / 2L * -1L) - 1L + | true -> v lsr 1 + | false -> (v lsr 1 * -1L) - 1L let decode_zigzag_unboxed v = match v land 0x01 = 0 with - | true -> v / 2 - | false -> (v / 2 * -1) - 1 + | true -> v lsr 1 + | false -> (v lsr 1 * -1) - 1 let int_of_uint32 = let open Int32 in @@ -326,3 +326,59 @@ let deserialize_fast: type constr a. (constr, a) compound_list -> constr -> Read let extension_ranges = extension_ranges spec in let values = make_values spec in fun reader -> deserialize_fast extension_ranges values constr reader + +let%expect_test "zigzag decoding" = + let n2l = Int64.of_int in + let i2l = Int64.of_int32 in + let test_values = + [Int64.min_int; n2l Int.min_int; i2l Int32.min_int; -2L; + 0L; 3L; i2l Int32.max_int; n2l Int.max_int; Int64.max_int] + |> List.map ~f:( + let open Infix.Int64 in + function + | v when v > 0L -> [pred v; v] + | v -> [v; succ v] + ) + |> List.concat + in + List.iter ~f:(fun vl -> Printf.printf "zigzag_decoding(0x%016Lx) = 0x%016Lx\n" vl (decode_zigzag vl)) test_values; + List.iter ~f:(fun v -> Printf.printf "zigzag_decoding_unboxed(0x%016x) = 0x%016x\n" (Int64.to_int v) (Int64.to_int v |> decode_zigzag_unboxed)) test_values; + (* The results should display alternation between positive and negative numbers. If the right most bit is set, the number is negative *) + [%expect {| + zigzag_decoding(0x8000000000000000) = 0x4000000000000000 + zigzag_decoding(0x8000000000000001) = 0xbfffffffffffffff + zigzag_decoding(0xc000000000000000) = 0x6000000000000000 + zigzag_decoding(0xc000000000000001) = 0x9fffffffffffffff + zigzag_decoding(0xffffffff80000000) = 0x7fffffffc0000000 + zigzag_decoding(0xffffffff80000001) = 0x800000003fffffff + zigzag_decoding(0xfffffffffffffffe) = 0x7fffffffffffffff + zigzag_decoding(0xffffffffffffffff) = 0x8000000000000000 + zigzag_decoding(0x0000000000000000) = 0x0000000000000000 + zigzag_decoding(0x0000000000000001) = 0xffffffffffffffff + zigzag_decoding(0x0000000000000002) = 0x0000000000000001 + zigzag_decoding(0x0000000000000003) = 0xfffffffffffffffe + zigzag_decoding(0x000000007ffffffe) = 0x000000003fffffff + zigzag_decoding(0x000000007fffffff) = 0xffffffffc0000000 + zigzag_decoding(0x3ffffffffffffffe) = 0x1fffffffffffffff + zigzag_decoding(0x3fffffffffffffff) = 0xe000000000000000 + zigzag_decoding(0x7ffffffffffffffe) = 0x3fffffffffffffff + zigzag_decoding(0x7fffffffffffffff) = 0xc000000000000000 + zigzag_decoding_unboxed(0x0000000000000000) = 0x0000000000000000 + zigzag_decoding_unboxed(0x0000000000000001) = 0x7fffffffffffffff + zigzag_decoding_unboxed(0x4000000000000000) = 0x2000000000000000 + zigzag_decoding_unboxed(0x4000000000000001) = 0x5fffffffffffffff + zigzag_decoding_unboxed(0x7fffffff80000000) = 0x3fffffffc0000000 + zigzag_decoding_unboxed(0x7fffffff80000001) = 0x400000003fffffff + zigzag_decoding_unboxed(0x7ffffffffffffffe) = 0x3fffffffffffffff + zigzag_decoding_unboxed(0x7fffffffffffffff) = 0x4000000000000000 + zigzag_decoding_unboxed(0x0000000000000000) = 0x0000000000000000 + zigzag_decoding_unboxed(0x0000000000000001) = 0x7fffffffffffffff + zigzag_decoding_unboxed(0x0000000000000002) = 0x0000000000000001 + zigzag_decoding_unboxed(0x0000000000000003) = 0x7ffffffffffffffe + zigzag_decoding_unboxed(0x000000007ffffffe) = 0x000000003fffffff + zigzag_decoding_unboxed(0x000000007fffffff) = 0x7fffffffc0000000 + zigzag_decoding_unboxed(0x3ffffffffffffffe) = 0x1fffffffffffffff + zigzag_decoding_unboxed(0x3fffffffffffffff) = 0x6000000000000000 + zigzag_decoding_unboxed(0x7ffffffffffffffe) = 0x3fffffffffffffff + zigzag_decoding_unboxed(0x7fffffffffffffff) = 0x4000000000000000 + |}] diff --git a/src/ocaml_protoc_plugin/infix.ml b/src/ocaml_protoc_plugin/infix.ml index a78c58f..ca474b6 100644 --- a/src/ocaml_protoc_plugin/infix.ml +++ b/src/ocaml_protoc_plugin/infix.ml @@ -9,6 +9,8 @@ module Int64 = struct let (/) = div let ( * ) = mul let (-) = sub + let succ = succ + let pred = pred end module Int = struct @@ -22,4 +24,6 @@ module Int = struct let (/) = div let ( * ) = mul let (-) = sub + let succ = succ + let pred = pred end diff --git a/src/ocaml_protoc_plugin/serialize.ml b/src/ocaml_protoc_plugin/serialize.ml index 52b9ad3..42dbb58 100644 --- a/src/ocaml_protoc_plugin/serialize.ml +++ b/src/ocaml_protoc_plugin/serialize.ml @@ -16,7 +16,7 @@ let write_fixed64 ~f v = let write_fixed32 ~f v = Writer.write_fixed32_value (f v) -let zigzag_encoding v = +let encode_zigzag v = let open Infix.Int64 in let v = match v < 0L with | true -> v lsl 1 lxor (-1L) @@ -24,7 +24,7 @@ let zigzag_encoding v = in v -let zigzag_encoding_unboxed v = +let encode_zigzag_unboxed v = let v = match v < 0 with | true -> v lsl 1 lxor (-1) | false -> v lsl 1 @@ -55,16 +55,16 @@ let write_value : type a b. (a, b) spec -> a -> Writer.t -> unit = function | SFixed32_int -> write_fixed32 ~f:Int32.of_int | Int64 -> Writer.write_varint_value | UInt64 -> Writer.write_varint_value - | SInt64 -> write_varint ~f:zigzag_encoding + | SInt64 -> write_varint ~f:encode_zigzag | Int32 -> write_varint_unboxed ~f:Int32.to_int | UInt32 -> write_varint_unboxed ~f:Int32.to_int - | SInt32 -> write_varint_unboxed ~f:(Int32.to_int @@ zigzag_encoding_unboxed) + | SInt32 -> write_varint_unboxed ~f:(Int32.to_int @@ encode_zigzag_unboxed) | Int64_int -> Writer.write_varint_unboxed_value | UInt64_int -> Writer.write_varint_unboxed_value | Int32_int -> Writer.write_varint_unboxed_value | UInt32_int -> Writer.write_varint_unboxed_value - | SInt64_int -> write_varint_unboxed ~f:zigzag_encoding_unboxed - | SInt32_int -> write_varint_unboxed ~f:zigzag_encoding_unboxed + | SInt64_int -> write_varint_unboxed ~f:encode_zigzag_unboxed + | SInt32_int -> write_varint_unboxed ~f:encode_zigzag_unboxed | Bool -> write_varint_unboxed ~f:(function true -> 1 | false -> 0) | String -> fun v -> Writer.write_length_delimited_value ~data:v ~offset:0 ~len:(String.length v) @@ -177,28 +177,56 @@ let rec serialize: type a. (a, unit) compound_list -> Writer.t -> a = function cont writer let%expect_test "zigzag encoding" = - let test vl = - let v = Int64.to_int vl in - Printf.printf "zigzag_encoding(%LdL) = %LdL\n" vl (zigzag_encoding vl); - Printf.printf "zigzag_encoding_unboxed(%d) = %d\n" v (zigzag_encoding_unboxed v); + let n2l = Int64.of_int in + let i2l = Int64.of_int32 in + let test_values = + [Int64.min_int; n2l Int.min_int; i2l Int32.min_int; -2L; + 0L; 3L; i2l Int32.max_int; n2l Int.max_int; Int64.max_int] + |> List.map ~f:( + let open Infix.Int64 in + function + | v when v > 0L -> [pred v; v] + | v -> [v; succ v] + ) + |> List.concat in - List.iter ~f:test [0L; -1L; 1L; -2L; 2L; 2147483647L; -2147483648L; Int64.max_int; Int64.min_int; ]; + List.iter ~f:(fun vl -> Printf.printf "zigzag_encoding 0x%016Lx = 0x%016Lx\n" vl (encode_zigzag vl)) test_values; + List.iter ~f:(fun v -> Printf.printf "zigzag_encoding_unboxed 0x%016x = 0x%016x\n" (Int64.to_int v) (Int64.to_int v |> encode_zigzag_unboxed)) test_values; [%expect {| - zigzag_encoding(0L) = 0L - zigzag_encoding_unboxed(0) = 0 - zigzag_encoding(-1L) = 1L - zigzag_encoding_unboxed(-1) = 1 - zigzag_encoding(1L) = 2L - zigzag_encoding_unboxed(1) = 2 - zigzag_encoding(-2L) = 3L - zigzag_encoding_unboxed(-2) = 3 - zigzag_encoding(2L) = 4L - zigzag_encoding_unboxed(2) = 4 - zigzag_encoding(2147483647L) = 4294967294L - zigzag_encoding_unboxed(2147483647) = 4294967294 - zigzag_encoding(-2147483648L) = 4294967295L - zigzag_encoding_unboxed(-2147483648) = 4294967295 - zigzag_encoding(9223372036854775807L) = -2L - zigzag_encoding_unboxed(-1) = 1 - zigzag_encoding(-9223372036854775808L) = -1L - zigzag_encoding_unboxed(0) = 0 |}] + zigzag_encoding 0x8000000000000000 = 0xffffffffffffffff + zigzag_encoding 0x8000000000000001 = 0xfffffffffffffffd + zigzag_encoding 0xc000000000000000 = 0x7fffffffffffffff + zigzag_encoding 0xc000000000000001 = 0x7ffffffffffffffd + zigzag_encoding 0xffffffff80000000 = 0x00000000ffffffff + zigzag_encoding 0xffffffff80000001 = 0x00000000fffffffd + zigzag_encoding 0xfffffffffffffffe = 0x0000000000000003 + zigzag_encoding 0xffffffffffffffff = 0x0000000000000001 + zigzag_encoding 0x0000000000000000 = 0x0000000000000000 + zigzag_encoding 0x0000000000000001 = 0x0000000000000002 + zigzag_encoding 0x0000000000000002 = 0x0000000000000004 + zigzag_encoding 0x0000000000000003 = 0x0000000000000006 + zigzag_encoding 0x000000007ffffffe = 0x00000000fffffffc + zigzag_encoding 0x000000007fffffff = 0x00000000fffffffe + zigzag_encoding 0x3ffffffffffffffe = 0x7ffffffffffffffc + zigzag_encoding 0x3fffffffffffffff = 0x7ffffffffffffffe + zigzag_encoding 0x7ffffffffffffffe = 0xfffffffffffffffc + zigzag_encoding 0x7fffffffffffffff = 0xfffffffffffffffe + zigzag_encoding_unboxed 0x0000000000000000 = 0x0000000000000000 + zigzag_encoding_unboxed 0x0000000000000001 = 0x0000000000000002 + zigzag_encoding_unboxed 0x4000000000000000 = 0x7fffffffffffffff + zigzag_encoding_unboxed 0x4000000000000001 = 0x7ffffffffffffffd + zigzag_encoding_unboxed 0x7fffffff80000000 = 0x00000000ffffffff + zigzag_encoding_unboxed 0x7fffffff80000001 = 0x00000000fffffffd + zigzag_encoding_unboxed 0x7ffffffffffffffe = 0x0000000000000003 + zigzag_encoding_unboxed 0x7fffffffffffffff = 0x0000000000000001 + zigzag_encoding_unboxed 0x0000000000000000 = 0x0000000000000000 + zigzag_encoding_unboxed 0x0000000000000001 = 0x0000000000000002 + zigzag_encoding_unboxed 0x0000000000000002 = 0x0000000000000004 + zigzag_encoding_unboxed 0x0000000000000003 = 0x0000000000000006 + zigzag_encoding_unboxed 0x000000007ffffffe = 0x00000000fffffffc + zigzag_encoding_unboxed 0x000000007fffffff = 0x00000000fffffffe + zigzag_encoding_unboxed 0x3ffffffffffffffe = 0x7ffffffffffffffc + zigzag_encoding_unboxed 0x3fffffffffffffff = 0x7ffffffffffffffe + zigzag_encoding_unboxed 0x7ffffffffffffffe = 0x0000000000000003 + zigzag_encoding_unboxed 0x7fffffffffffffff = 0x0000000000000001 + |}] diff --git a/test/int_types_native_test.ml b/test/int_types_native_test.ml index 59d176a..eb19556 100644 --- a/test/int_types_native_test.ml +++ b/test/int_types_native_test.ml @@ -5,7 +5,7 @@ let proto_file = "int_types_native.proto" let test_signed64 (type t) ~(create : Int64.t -> t) (module T : Test_lib.T with type t = t) = Printf.printf "Test %s\n%!" (T.name ()); - let values = [-1073741823L; -2L; -1L; 0L; 1L; 2L; 1073741823L] in + let values = [Int64.min_int; Int64.succ Int64.min_int; -1073741823L; -2L; -1L; 0L; 1L; 2L; 1073741823L; Int64.pred Int64.max_int; Int64.max_int] in List.iter ~f:(fun v -> Test_lib.test_encode ~proto_file (module T) (create v)) values @@ -19,14 +19,14 @@ let test_unsigned64 (type t) ~(create : Int64.t -> t) (module T : Test_lib.T wit let test_signed32 (type t) ~(create : Int32.t -> t) (module T : Test_lib.T with type t = t) = Printf.printf "Test %s\n%!" (T.name ()); - let values = [-1073741823l; -2l; -1l; 0l; 1l; 2l; 1073741823l] in + let values = [Int32.min_int; Int32.succ Int32.min_int; -1073741823l; -2l; -1l; 0l; 1l; 2l; 1073741823l; Int32.pred Int32.max_int; Int32.max_int] in List.iter ~f:(fun v -> Test_lib.test_encode ~proto_file (module T) (create v)) values let test_unsigned32 (type t) ~(create : Int32.t -> t) (module T : Test_lib.T with type t = t) = Printf.printf "Test %s\n%!" (T.name ()); - let values = [0l; 1l; 2l; 2147483647l] in + let values = [0l; 1l; 2l; 1073741823l; Int32.pred Int32.max_int; Int32.max_int] in List.iter ~f:(fun v -> Test_lib.test_encode ~proto_file (module T) (create v)) values @@ -37,12 +37,17 @@ let%expect_test _ = test_signed64 ~create (module T); [%expect {| Test .int_types_native.SInt64 + i: -9223372036854775808 + i: -9223372036854775807 i: -1073741823 i: -2 i: -1 i: 1 i: 2 - i: 1073741823 |}] + i: 1073741823 + i: 9223372036854775806 + i: 9223372036854775807 + |}] let%expect_test _ = let module T = Int_types_native.SInt32 in @@ -50,12 +55,17 @@ let%expect_test _ = test_signed32 ~create (module T); [%expect {| Test .int_types_native.SInt32 + i: -2147483648 + i: -2147483647 i: -1073741823 i: -2 i: -1 i: 1 i: 2 - i: 1073741823 |}] + i: 1073741823 + i: 2147483646 + i: 2147483647 + |}] let%expect_test _ = let module T = Int_types_native.Int64 in @@ -63,12 +73,17 @@ let%expect_test _ = test_signed64 ~create (module T); [%expect {| Test .int_types_native.Int64 + i: -9223372036854775808 + i: -9223372036854775807 i: -1073741823 i: -2 i: -1 i: 1 i: 2 - i: 1073741823 |}] + i: 1073741823 + i: 9223372036854775806 + i: 9223372036854775807 + |}] let%expect_test _ = let module T = Int_types_native.Int32 in @@ -77,12 +92,17 @@ let%expect_test _ = [%expect {| Test .int_types_native.Int32 + i: -2147483648 + i: -2147483647 i: -1073741823 i: -2 i: -1 i: 1 i: 2 - i: 1073741823 |}] + i: 1073741823 + i: 2147483646 + i: 2147483647 + |}] let%expect_test _ = let module T = Int_types_native.UInt64 in @@ -102,4 +122,7 @@ let%expect_test _ = Test .int_types_native.UInt32 i: 1 i: 2 - i: 2147483647 |}] + i: 1073741823 + i: 2147483646 + i: 2147483647 + |}]