diff --git a/src/dev_b32_name.erl b/src/dev_b32_name.erl index e35c2200d..af05b393d 100644 --- a/src/dev_b32_name.erl +++ b/src/dev_b32_name.erl @@ -1,7 +1,9 @@ %%% @doc Allows Arweave message IDs to be used via their base32 encoding as %%% subdomains on a HyperBEAM node. -module(dev_b32_name). --export([info/1]). +-export([info/1, decode/1]). +%%% Public helpers. +-export([ encode/1]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/dev_cache.erl b/src/dev_cache.erl index 5dde59030..b7ba552cd 100644 --- a/src/dev_cache.erl +++ b/src/dev_cache.erl @@ -16,17 +16,39 @@ expected_response(Base, Req, Opts) -> {ok, Response} ?= hb_maps:find(<<"body">>, Req, Opts), {ok, Expected} ?= hb_maps:find(<<"expected">>, Base, Opts), true ?= check_response_matches_expected(Response, Expected, Opts), - dev_hook:on( - [<<"~cache@1.0">>, <<"admissible-response">>], - Response, - Opts - ), + fire_admissible_response_hook(Base, Expected, Opts), {ok, true} else Reason -> ?event(debug_admissible, {expected_response_error, Reason}), {ok, false} end. +%% @doc Fire the `admissible-response' hook. Spawns by default so the cache +%% read returns without waiting for downstream handlers. +%% Set `admissible_response_hook_async => false' in `Opts' to run synchronously +fire_admissible_response_hook(Base, Expected, Opts) -> + Run = + fun() -> + dev_hook:on( + [<<"~cache@1.0">>, <<"admissible-response">>], + #{ <<"body">> => admissible_response_body(Base, Expected, Opts) }, + Opts + ) + end, + case hb_opts:get(admissible_response_hook_async, true, Opts) of + false -> Run(); + _ -> + spawn(fun() -> + try Run() + catch C:R:S -> + ?event(error, + {admissible_response_hook_async_error, + {class, C}, {reason, R}, + {stacktrace, {trace, S}}}) + end + end) + end. + %% @doc Verify that a response from a remote cache matches the expected ID. %% There are three cases: %% 1. The response is a raw binary (e.g., a direct content-addressed read @@ -84,6 +106,24 @@ check_response_matches_expected(Response, Expected, Opts) -> true end. +%% @doc Build the body for the `admissible-response' hook. Optionally signs it +%% when `commit_hook_response' is set, so downstream handlers can verify the +%% node attested to admissibility. +admissible_response_body(Base, ID, Opts) -> + Ref = hb_maps:get(<<"http-reference">>, Base, <<>>, Opts), + Body = #{ + <<"reference">> => Ref, + <<"status-class">> => <<"success">>, + <<"event">> => <<"is_admissible">> + }, + ?event(admissible_short, {response_success, {id, ID}, {responded_by, Ref}}), + case hb_opts:get(commit_hook_response, false, Opts) of + true -> + hb_message:commit(Body, Opts#{ priv_wallet => hb:wallet() }); + _ -> + Body + end. + %% @doc Read data from the cache. %% Retrieves data corresponding to a key from a local store. %% The key is extracted from the incoming message under <<"target">>. diff --git a/src/dev_chance.erl b/src/dev_chance.erl new file mode 100644 index 000000000..6d99ad969 --- /dev/null +++ b/src/dev_chance.erl @@ -0,0 +1,30 @@ +%%% @doc A generic probabilistic gate device. When accessed as +%%% `~chance@1.0/N', the key `N' is parsed as an integer rate. +%%% A 1-in-N random check is performed: +%%% +%%% {ok, Req} -- check passed, hook chain continues +%%% {error, _} -- check failed, hook chain halts +%%% +%%% This device uses the 4-arity default handler pattern so that the +%%% rate is supplied via the URL path rather than a config key. +-module(dev_chance). +-export([info/1]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +info(_) -> #{ default => fun default/4 }. + +default(Key, _Base, Req, _Opts) -> + try binary_to_integer(Key) of + Rate when Rate > 0 -> + ?event({request, {rate, Rate}}), + case rand:uniform(Rate) =:= 1 of + true -> {ok, Req}; + false -> {error, <<"Filtered by chance gate.">>} + end; + _ -> + {error, <<"chance@1.0 rate must be a positive integer.">>} + catch + error:badarg -> + {error, <<"chance@1.0 rate must be a valid integer.">>} + end. diff --git a/src/dev_codec_httpsig_siginfo.erl b/src/dev_codec_httpsig_siginfo.erl index 493f9421e..5f857e698 100644 --- a/src/dev_codec_httpsig_siginfo.erl +++ b/src/dev_codec_httpsig_siginfo.erl @@ -284,9 +284,15 @@ decoding_nested_map_binary(Bin) -> lists:foldl( fun (X, Acc) -> case binary:split(X, <<":">>, [global]) of - [ID, Key, Value] -> + [ID | Rest] when length(Rest) >= 2 -> + {KeyParts, [Value]} = + lists:split(length(Rest) - 1, Rest), + Key = + iolist_to_binary( + lists:join(<<":">>, KeyParts) + ), Acc#{ - ID => #{ + ID => #{ <<"name">> => Key, <<"value">> => hb_util:decode(Value) } diff --git a/src/dev_manifest.erl b/src/dev_manifest.erl index 395cc2492..62c13ed96 100644 --- a/src/dev_manifest.erl +++ b/src/dev_manifest.erl @@ -87,50 +87,77 @@ route(Key, M1, M2, Opts) -> %% @doc Implement the `on/request' hook for the `manifest@1.0' device, finding %% requests for legacy (non-device-tagged) manifests and casting them to -%% `manifest@1.0' before execution. Allowing `/ID/path` style access for old data. +%% `manifest@1.0' before execution. When a manifest is hit on the root domain +%% it redirects to its canonical b32 subdomain so browsers resolve absolute +%% asset paths correctly. request(Base, Req, Opts) -> ?event({on_req_manifest_detector, {base, Base}, {req, Req}}), maybe {ok, [PrimaryMsg|Rest]} ?= hb_maps:find(<<"body">>, Req, Opts), {ok, Loaded} ?= load(PrimaryMsg, Opts), ?event(debug_manifest, {loaded, Loaded}), - % Must handle three cases: - % 1. The maybe_cast is not a manifest, so we return the *loaded* request, - % such that the work to load it is not wasted. - % 2. The maybe_cast is a manifest, and there are no other elements of - % the path, so we add the `index' path and return. - % 3. The maybe_cast is a manifest, and there are other elements of - % the path, so we return the original request sequence with the first - % message replaced with the casted manifest. - case {Rest, maybe_cast_manifest(Loaded, Opts)} of - {_, ignored} -> - ?event( - debug_manifest, - {non_manifest_returning_loaded, {loaded, Loaded}, {rest, Rest}}), + case maybe_cast_manifest(Loaded, Opts) of + ignored -> {ok, Req#{ <<"body">> => [Loaded|Rest] }}; - {[], {ok, Casted}} -> - ?event(debug_manifest, {manifest_returning_index, {req, Req}}), - {ok, Req#{ <<"body">> => [Casted, #{<<"path">> => <<"index">>}] }}; - {_, {ok, Casted}} -> - ?event(debug_manifest, {manifest_returning_subpath, {req, Req}}), - {ok, Req#{ <<"body">> => [Casted|Rest] }} + {ok, Casted} -> + serve_or_redirect(PrimaryMsg, Casted, Rest, Req, Opts) end else {error, not_found} -> - ?event(debug_manifest, {not_found_on_load, {req, Req}}), - { - error, - #{ - <<"status">> => 404, - <<"body">> => <<"Not Found">> - } - }; - Error -> - ?event(debug_manifest, {request_ignored, {unexpected, Error}}), - % On other errors, we return the original request. + {error, #{<<"status">> => 404, <<"body">> => <<"Not Found">>}}; + _ -> {ok, Req} end. +serve_or_redirect(TxId, Casted, Rest, Req, Opts) -> + case needs_b32_redirect(TxId, Rest, Req, Opts) of + {redirect, Url} -> + redirect_response(Url); + no_redirect when Rest =:= [] -> + {ok, Req#{<<"body">> => [Casted, #{<<"path">> => <<"index">>}]}}; + no_redirect -> + {ok, Req#{<<"body">> => [Casted|Rest]}} + end. + +%% @doc A redirect is needed when the URL is `//...' on the root domain +%% and no device invocations in the tail, host present, and not already on the +%% canonical `.' subdomain. +needs_b32_redirect(TxId, Rest, Req, Opts) when ?IS_ID(TxId) -> + ReqInner = hb_maps:get(<<"request">>, Req, #{}, Opts), + Host = hb_maps:get(<<"host">>, ReqInner, <<>>, Opts), + B32 = dev_b32_name:encode(TxId), + Already = dev_name:name_from_host(Host, hb_opts:get(node_host, no_host, Opts)), + PlainTail = lists:all(fun(X) -> plain_path_segment(X, Opts) end, Rest), + case {Host =/= <<>>, PlainTail, Already} of + {true, true, NotCanonical} when NotCanonical =/= {ok, B32} -> + {redirect, b32_url(B32, ReqInner, Opts)}; + _ -> no_redirect + end; +needs_b32_redirect(_, _, _, _) -> no_redirect. + +%% @doc A plain path segment is a bare ID binary or a map without a device key. +plain_path_segment(X, _Opts) when is_binary(X) -> true; +plain_path_segment(M, Opts) when is_map(M) -> + not hb_maps:is_key(<<"device">>, M, Opts); +plain_path_segment(_,_) -> false. + +b32_url(B32, ReqInner, Opts) -> + Host = hb_maps:get(<<"host">>, ReqInner, <<>>, Opts), + Path = hb_maps:get(<<"path">>, ReqInner, <<"/">>, Opts), + HostPort = case hb_maps:get(<<"port">>, ReqInner, undefined, Opts) of + P when P == undefined; P == 80; P == 443 -> Host; + P -> <> + end, + <<"//", B32/binary, ".", HostPort/binary, Path/binary>>. + +redirect_response(Url) -> + ?event(debug_manifest, {b32_redirect, {url, Url}}), + {error, #{ + <<"status">> => 302, + <<"location">> => Url, + <<"body">> => <<"Redirecting to canonical manifest subdomain: ", Url/binary>> + }}. + %% @doc Cast a message to `manifest@1.0` if it has the correct content-type but %% no other device is specified. load(Msg, _Opts) when is_map(Msg) -> {ok, Msg}; diff --git a/src/dev_name.erl b/src/dev_name.erl index 1331e3f2d..04655bc32 100644 --- a/src/dev_name.erl +++ b/src/dev_name.erl @@ -4,9 +4,9 @@ %%% match the key against each resolver in turn, and return the value of the %%% first resolver that matches. -module(dev_name). --export([info/1, request/3]). +-export([info/1, request/3, name_from_host/2]). %%% Public helpers. --export([test_arns_opts/0]). +-export([test_arns_opts/0, name_from_host/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/dev_relay.erl b/src/dev_relay.erl index c5782ff1f..9760fb63c 100644 --- a/src/dev_relay.erl +++ b/src/dev_relay.erl @@ -36,10 +36,10 @@ call(M1, RawM2, Opts) -> RelayPath = hb_ao:get_first( [ - {M1, <<"path">>}, - {{as, <<"message@1.0">>, BaseTarget}, <<"path">>}, {RawM2, <<"relay-path">>}, - {M1, <<"relay-path">>} + {M1, <<"relay-path">>}, + {M1, <<"path">>}, + {{as, <<"message@1.0">>, BaseTarget}, <<"path">>} ], Opts ), diff --git a/src/hb_http.erl b/src/hb_http.erl index fefaf4f26..61ac40dfc 100644 --- a/src/hb_http.erl +++ b/src/hb_http.erl @@ -283,9 +283,10 @@ http_response_to_httpsig(Status, HeaderMap, Body, Opts) -> 0 -> #{}; _ -> #{ <<"body">> => Body } end, - ConvertFrom = + NormalizedHeaders = lowercase_header_keys(HeaderMap), + ConvertFrom = hb_maps:merge( - HeaderMap#{ <<"status">> => BinStatus }, + NormalizedHeaders#{ <<"status">> => BinStatus }, BodyMap, Opts ), @@ -296,6 +297,19 @@ http_response_to_httpsig(Status, HeaderMap, Body, Opts) -> Opts ))#{ <<"status">> => hb_util:int(Status) }. +%% @doc Lowercase all binary header keys. Applied at every inbound transport +%% boundary so codecs see stable keys regardless of peer or proxy casing. +%% HTTP/1.1 (RFC 7230) field names are case-insensitive; HTTP/2 (RFC 7540) +%% mandates lowercase. Values are left untouched. +lowercase_header_keys(Headers) when is_map(Headers) -> + maps:fold( + fun(K, V, Acc) when is_binary(K) -> Acc#{string:lowercase(K) => V}; + (K, V, Acc) -> Acc#{K => V} + end, + #{}, + Headers + ). + %% @doc Given a message, return the information needed to make the request. message_to_request(M, Opts) -> % Get the route for the message @@ -529,6 +543,7 @@ reply(InitReq, TABMReq, RawStatus, RawMessage, Opts) -> ?event(http_server_short, {sent, {status, Status}, + {host, get_host(TABMReq, Opts)}, {duration, EndTime - hb_maps:get(start_time, Req, undefined, Opts)}, {body_size, byte_size(EncodedBody)}, {method, cowboy_req:method(Req)}, @@ -854,7 +869,7 @@ req_to_tabm_singleton(Req, Body, Opts) -> "?", (cowboy_req:qs(Req))/binary >>, - Headers = cowboy_req:headers(Req), + Headers = lowercase_header_keys(cowboy_req:headers(Req)), {ok, _Path, QueryKeys} = hb_singleton:from_path(FullPath), PrimitiveMsg = maps:merge(Headers, QueryKeys), Codec = @@ -1096,7 +1111,8 @@ normalize_unsigned(PrimMsg, Req = #{ headers := RawHeaders }, Msg, Opts) -> Device -> WithPrivIP#{<<"device">> => Device} end, Host = cowboy_req:host(Req), - WithDevice#{<<"host">> => Host}. + Port = cowboy_req:port(Req), + WithDevice#{<<"host">> => Host, <<"port">> => Port}. %% @doc Determine the caller, honoring the `x-real-ip' header if present. real_ip(Req = #{ headers := RawHeaders }, Opts) -> @@ -1112,6 +1128,18 @@ real_ip(Req = #{ headers := RawHeaders }, Opts) -> IP -> IP end. +get_host(TABMReq, Opts) -> + Host = hb_maps:get(<<"host">>, TABMReq, <<"no_host">>, Opts), + MsgNode = hb_opts:get(node_host, hb_opts:get(host, no_host, Opts), Opts), + case dev_name:name_from_host(Host, MsgNode) of + {ok, Name} -> + case dev_b32_name:decode(Name) of + error -> Name; + TXID -> {decoded, {explicit, TXID}} + end; + {skip, _} -> no_host + end. + %%% Metrics init_prometheus() -> diff --git a/src/hb_http_multi.erl b/src/hb_http_multi.erl index 5795acfa2..29d680296 100644 --- a/src/hb_http_multi.erl +++ b/src/hb_http_multi.erl @@ -129,7 +129,7 @@ multirequest_opt(Key, Config, Message, Default, Opts) -> %% %% If the response is `ok', we check the status and the response message against %% the configuration. -is_admissible(ok, Res, Admissible, Statuses, Opts) -> +is_admissible(ok, Res, Admissible, Statuses, Node, Opts) -> ?event(debug_multi, {is_admissible, {response, Res}, @@ -139,10 +139,11 @@ is_admissible(ok, Res, Admissible, Statuses, Opts) -> ), AdmissibleStatus = admissible_status(Res, Statuses), ?event(debug_multi, {admissible_status, {result, AdmissibleStatus}}), - AdmissibleResponse = admissible_response(Res, Admissible, Opts), + NodeOpts = hb_maps:get(<<"opts">>, Node, Opts), + AdmissibleResponse = admissible_response(Res, Admissible, NodeOpts, Opts), ?event(debug_multi, {admissible_response, {result, AdmissibleResponse}}), AdmissibleStatus andalso AdmissibleResponse; -is_admissible(_, _, _, _, _) -> false. +is_admissible(_, _, _, _, _, _) -> false. %% @doc Serially request a message, collecting responses until the required %% number of responses have been gathered. Ensure that the statuses are @@ -153,7 +154,7 @@ serial_multirequest(_Nodes, 0, _Method, _Path, _Message, _Admissible, _Statuses, serial_multirequest([], _, _Method, _Path, _Message, _Admissible, _Statuses, _Opts) -> {[], []}; serial_multirequest([Node|Nodes], Remaining, Method, Path, Message, Admissible, Statuses, Opts) -> {ErlStatus, Res} = hb_http:request(Method, Node, Path, Message, Opts), - case is_admissible(ErlStatus, Res, Admissible, Statuses, Opts) of + case is_admissible(ErlStatus, Res, Admissible, Statuses, Node, Opts) of true -> ?event(debug_http, {admissible_status, {response, Res}}), {AdmissibleAcc, AllAcc} = serial_multirequest( @@ -200,12 +201,12 @@ start_workers(Count, Ref, Nodes, Method, Path, Message, Opts) -> fun(Node) -> spawn( fun() -> - Res = + {Status, Res} = try hb_http:request(Method, Node, Path, Message, Opts) catch C:R -> {error, {worker_crash, C, R}} end, receive no_reply -> stopping - after 0 -> Parent ! {Ref, self(), Res} + after 0 -> Parent ! {Ref, self(), {Status, Res, Node}} end end ) @@ -236,8 +237,8 @@ admissible_status(Status, Statuses) when is_list(Statuses) -> %% @doc If an `admissable` message is set for the request, check if the response %% adheres to it. Else, return `true'. -admissible_response(_Response, undefined, _Opts) -> true; -admissible_response(Response, IsAdmissible, Opts) -> +admissible_response(_Response, undefined, _NodeOpts, _Opts) -> true; +admissible_response(Response, IsAdmissible, NOpts, Opts) -> Req = IsAdmissible#{ <<"path">> => @@ -248,7 +249,7 @@ admissible_response(Response, IsAdmissible, Opts) -> Opts ), <<"body">> => Response, - <<"http-reference">> => hb_opts:get(http_reference, undefined, Opts) + <<"http-reference">> => hb_opts:get(http_reference, undefined, NOpts) }, ?event(debug_admissible, {admissible_response, {request, Req}, {opts, Opts}}), try hb_ao:resolve(Req, Opts#{ hashpath => ignore }) of @@ -287,13 +288,13 @@ parallel_responses(AdmissibleRes, AllRes, Procs, _, _, Ref, 0, true, _Admissible {AdmissibleRes, AllRes}; parallel_responses(AdmissibleRes, AllRes, Procs, Queue, {Method, Path, Message}, Ref, Awaiting, StopAfter, Admissible, Statuses, Opts) -> receive - {Ref, Pid, {Status, NewRes}} -> + {Ref, Pid, {Status, NewRes, Node}} -> WorkersWithoutPid = lists:delete(Pid, Procs), {RefilledWorkers, NewQueue} = start_workers(1, Ref, Queue, Method, Path, Message, Opts), NewProcs = RefilledWorkers ++ WorkersWithoutPid, NewAllRes = [{Status, NewRes} | AllRes], - case is_admissible(Status, NewRes, Admissible, Statuses, Opts) of + case is_admissible(Status, NewRes, Admissible, Statuses, Node, Opts) of true -> parallel_responses( [{Status, NewRes} | AdmissibleRes], diff --git a/src/hb_opts.erl b/src/hb_opts.erl index 4d169cf88..3fdcbacc2 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -205,6 +205,7 @@ default_message() -> #{<<"name">> => <<"message@1.0">>, <<"module">> => dev_message}, #{<<"name">> => <<"meta@1.0">>, <<"module">> => dev_meta}, #{<<"name">> => <<"monitor@1.0">>, <<"module">> => dev_monitor}, + #{<<"name">> => <<"chance@1.0">>, <<"module">> => dev_chance}, #{<<"name">> => <<"multipass@1.0">>, <<"module">> => dev_multipass}, #{<<"name">> => <<"name@1.0">>, <<"module">> => dev_name}, #{<<"name">> => <<"node-process@1.0">>, <<"module">> => dev_node_process}, diff --git a/src/hb_store_remote_node.erl b/src/hb_store_remote_node.erl index 7845c0e54..17483538f 100644 --- a/src/hb_store_remote_node.erl +++ b/src/hb_store_remote_node.erl @@ -95,16 +95,22 @@ read(StoreOpts = #{ <<"nodes">> := Nodes }, Key) -> error -> #{} end, ?event({remote_read, {request, HTTPReq}, {hooks, MaybeHooks}}), + MaybeCommitHookRes = + case maps:find(<<"commit-hook-response">>, StoreOpts) of + {ok, true} -> MaybeHooks#{ commit_hook_response => true }; + _ -> MaybeHooks + end, HTTPRes = hb_http:request( HTTPReq, - MaybeHooks#{ + MaybeCommitHookRes#{ cache_control => [<<"no-cache">>, <<"no-store">>], routes => [ #{ <<"template">> => <<"/~cache@1.0/read">>, - <<"nodes">> => Nodes + <<"nodes">> => Nodes, + <<"parallel">> => true } ] } @@ -281,10 +287,16 @@ multinode_env() -> <<"store-module">> => hb_store_remote_node, <<"max-retries">> => 0, <<"nodes">> => [ - #{ <<"prefix">> => Node1, <<"http-reference">> => <<"node1">> }, - #{ <<"prefix">> => Node2, <<"http-reference">> => <<"node2">> } + #{ + <<"prefix">> => Node1, + <<"opts">> => #{ <<"http-reference">> => <<"node1">> } + }, + #{ + <<"prefix">> => Node2, + <<"opts">> => #{ <<"http-reference">> => <<"node2">> } + } ], - <<"parallel">> => 1 + <<"parallel">> => true }, #{ ids_single => [ID1, ID2],