Skip to content

Commit ad23a45

Browse files
committed
poc: rewrite urlgetter using step-by-step
This diff shows how we could incrementally rewrite urlgetter using a step-by-step measurement style. Additionally, this diff modifies the facebook_messanger experiment to show what changes are required to upgrade it. The general idea of these changes is to incrementally move experiments away from depending on ./internal/experiment/urlgetter, and instead use a near drop-in replacement implementation, implemented in ./internal/urlgetter, which uses step-by-step to measure. Because ./internal/experiment/urlgetter depends on ./internal/legacy/netx and, instead, ./internal/urlgetter depends on ./internal/measurexlite, by performing this kind of migration we make ./internal/legacy/netx unnecessary. Also, because most users of ./internal/experiment/urlgetter only use limited functionality, incremental refactoring would be possible. Reference issue: ooni/probe#2751.
1 parent 5be3a9a commit ad23a45

16 files changed

Lines changed: 1394 additions & 39 deletions

File tree

internal/experiment/fbmessenger/fbmessenger.go

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ import (
88
"math/rand"
99
"time"
1010

11-
"github.com/ooni/probe-cli/v3/internal/experiment/urlgetter"
1211
"github.com/ooni/probe-cli/v3/internal/model"
1312
"github.com/ooni/probe-cli/v3/internal/netxlite"
13+
"github.com/ooni/probe-cli/v3/internal/urlgetter"
1414
)
1515

1616
const (
@@ -78,15 +78,21 @@ type Analysis struct {
7878
}
7979

8080
// Update updates the TestKeys using the given MultiOutput result.
81-
func (tk *TestKeys) Update(v urlgetter.MultiOutput) {
81+
func (tk *TestKeys) Update(v *urlgetter.MultiResult) {
82+
// handle the case where there are no test keys
83+
if v.TestKeys.Err != nil {
84+
return
85+
}
86+
8287
// Update the easy to update entries first
83-
tk.NetworkEvents = append(tk.NetworkEvents, v.TestKeys.NetworkEvents...)
84-
tk.Queries = append(tk.Queries, v.TestKeys.Queries...)
85-
tk.Requests = append(tk.Requests, v.TestKeys.Requests...)
86-
tk.TCPConnect = append(tk.TCPConnect, v.TestKeys.TCPConnect...)
87-
tk.TLSHandshakes = append(tk.TLSHandshakes, v.TestKeys.TLSHandshakes...)
88+
tk.NetworkEvents = append(tk.NetworkEvents, v.TestKeys.Value.NetworkEvents...)
89+
tk.Queries = append(tk.Queries, v.TestKeys.Value.Queries...)
90+
tk.Requests = append(tk.Requests, v.TestKeys.Value.Requests...)
91+
tk.TCPConnect = append(tk.TCPConnect, v.TestKeys.Value.TCPConnect...)
92+
tk.TLSHandshakes = append(tk.TLSHandshakes, v.TestKeys.Value.TLSHandshakes...)
93+
8894
// Set the status of endpoints
89-
switch v.Input.Target {
95+
switch v.Target.URL {
9096
case ServiceSTUN:
9197
var ignored *bool
9298
tk.ComputeEndpointStatus(v, &tk.FacebookSTUNDNSConsistent, &ignored)
@@ -117,16 +123,22 @@ var (
117123
)
118124

119125
// ComputeEndpointStatus computes the DNS and TCP status of a specific endpoint.
120-
func (tk *TestKeys) ComputeEndpointStatus(v urlgetter.MultiOutput, dns, tcp **bool) {
126+
func (tk *TestKeys) ComputeEndpointStatus(v *urlgetter.MultiResult, dns, tcp **bool) {
121127
// start where all is unknown
122128
*dns, *tcp = nil, nil
129+
130+
// handle the case where there are no test keys
131+
if v.TestKeys.Err != nil {
132+
return
133+
}
134+
123135
// process DNS first
124-
if v.TestKeys.FailedOperation != nil && *v.TestKeys.FailedOperation == netxlite.ResolveOperation {
136+
if v.TestKeys.Value.FailedOperation.UnwrapOr("") == netxlite.ResolveOperation {
125137
tk.FacebookDNSBlocking = &trueValue
126138
*dns = &falseValue
127139
return // we know that the DNS has failed
128140
}
129-
for _, query := range v.TestKeys.Queries {
141+
for _, query := range v.TestKeys.Value.Queries {
130142
for _, ans := range query.Answers {
131143
if ans.ASN != FacebookASN {
132144
tk.FacebookDNSBlocking = &trueValue
@@ -137,7 +149,7 @@ func (tk *TestKeys) ComputeEndpointStatus(v urlgetter.MultiOutput, dns, tcp **bo
137149
}
138150
*dns = &trueValue
139151
// now process connect
140-
if v.TestKeys.FailedOperation != nil && *v.TestKeys.FailedOperation == netxlite.ConnectOperation {
152+
if v.TestKeys.Value.FailedOperation.UnwrapOr("") == netxlite.ConnectOperation {
141153
tk.FacebookTCPBlocking = &trueValue
142154
*tcp = &falseValue
143155
return // because connect failed
@@ -151,9 +163,6 @@ type Measurer struct {
151163
// Config contains the experiment settings. If empty we
152164
// will be using default settings.
153165
Config Config
154-
155-
// Getter is an optional getter to be used for testing.
156-
Getter urlgetter.MultiGetter
157166
}
158167

159168
// ExperimentName implements ExperimentMeasurer.ExperimentName
@@ -173,24 +182,32 @@ func (m Measurer) Run(ctx context.Context, args *model.ExperimentArgs) error {
173182
sess := args.Session
174183
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
175184
defer cancel()
176-
urlgetter.RegisterExtensions(measurement)
185+
//urlgetter.RegisterExtensions(measurement) // TODO(bassosimone)
186+
177187
// generate targets
178-
var inputs []urlgetter.MultiInput
188+
var inputs []*urlgetter.EasyTarget
179189
for _, service := range Services {
180-
inputs = append(inputs, urlgetter.MultiInput{Target: service})
190+
inputs = append(inputs, &urlgetter.EasyTarget{URL: service})
181191
}
182-
rnd := rand.New(rand.NewSource(time.Now().UnixNano())) // #nosec G404 -- not really important
183-
rnd.Shuffle(len(inputs), func(i, j int) {
192+
rand.Shuffle(len(inputs), func(i, j int) {
184193
inputs[i], inputs[j] = inputs[j], inputs[i]
185194
})
195+
186196
// measure in parallel
187-
multi := urlgetter.Multi{Begin: time.Now(), Getter: m.Getter, Session: sess}
197+
multi := &urlgetter.MultiHandle{
198+
Begin: time.Now(),
199+
IndexGen: &urlgetter.IndexGen{},
200+
Session: sess,
201+
}
188202
testkeys := new(TestKeys)
189203
testkeys.Agent = "redirect"
190204
measurement.TestKeys = testkeys
191-
for entry := range multi.Collect(ctx, inputs, "facebook_messenger", callbacks) {
205+
results := urlgetter.MultiCollect(callbacks, 0, len(inputs),
206+
"facebook_messenger", multi.Run(ctx, inputs...))
207+
for entry := range results {
192208
testkeys.Update(entry)
193209
}
210+
194211
// if we haven't yet determined the status of DNS blocking and TCP blocking
195212
// then no blocking has been detected and we can set them
196213
if testkeys.FacebookDNSBlocking == nil {

internal/experiment/fbmessenger/fbmessenger_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,16 @@ package fbmessenger_test
22

33
import (
44
"context"
5-
"io"
65
"net/url"
76
"testing"
87

98
"github.com/google/go-cmp/cmp"
109
"github.com/google/gopacket/layers"
1110
"github.com/ooni/netem"
1211
"github.com/ooni/probe-cli/v3/internal/experiment/fbmessenger"
13-
"github.com/ooni/probe-cli/v3/internal/experiment/urlgetter"
14-
"github.com/ooni/probe-cli/v3/internal/legacy/tracex"
1512
"github.com/ooni/probe-cli/v3/internal/mocks"
1613
"github.com/ooni/probe-cli/v3/internal/model"
1714
"github.com/ooni/probe-cli/v3/internal/netemx"
18-
"github.com/ooni/probe-cli/v3/internal/netxlite"
1915
"github.com/ooni/probe-cli/v3/internal/runtimex"
2016
)
2117

@@ -326,6 +322,7 @@ func TestMeasurerRun(t *testing.T) {
326322
})
327323
}
328324

325+
/*
329326
func TestComputeEndpointStatsTCPBlocking(t *testing.T) {
330327
failure := io.EOF.Error()
331328
operation := netxlite.ConnectOperation
@@ -385,6 +382,7 @@ func TestComputeEndpointStatsDNSIsLying(t *testing.T) {
385382
t.Fatal("invalid FacebookTCPBlocking")
386383
}
387384
}
385+
*/
388386

389387
func TestSummaryKeysWithNils(t *testing.T) {
390388
measurement := &model.Measurement{TestKeys: &fbmessenger.TestKeys{}}

internal/measurexlite/conn.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@ package measurexlite
66

77
import (
88
"fmt"
9+
"io"
910
"net"
1011
"time"
1112

1213
"github.com/ooni/probe-cli/v3/internal/model"
1314
"github.com/ooni/probe-cli/v3/internal/netxlite"
1415
)
1516

16-
// MaybeClose is a convenience function for closing a [net.Conn] when it is not nil.
17-
func MaybeClose(conn net.Conn) (err error) {
17+
// MaybeClose is a convenience function for closing a [io.Closer] when it is not nil.
18+
func MaybeClose(conn io.Closer) (err error) {
1819
if conn != nil {
1920
err = conn.Close()
2021
}
@@ -39,18 +40,21 @@ type connTrace struct {
3940

4041
var _ net.Conn = &connTrace{}
4142

42-
type remoteAddrProvider interface {
43+
// RemoteAddrProvider is something returning the remote address.
44+
type RemoteAddrProvider interface {
4345
RemoteAddr() net.Addr
4446
}
4547

46-
func safeRemoteAddrNetwork(rap remoteAddrProvider) (result string) {
48+
// SafeRemoteAddrNetwork is a safe accessor to get the remote addr network.
49+
func SafeRemoteAddrNetwork(rap RemoteAddrProvider) (result string) {
4750
if addr := rap.RemoteAddr(); addr != nil {
4851
result = addr.Network()
4952
}
5053
return result
5154
}
5255

53-
func safeRemoteAddrString(rap remoteAddrProvider) (result string) {
56+
// SafeRemoteAddrString is a safe accessor to get the remote addr string representation.
57+
func SafeRemoteAddrString(rap RemoteAddrProvider) (result string) {
5458
if addr := rap.RemoteAddr(); addr != nil {
5559
result = addr.String()
5660
}
@@ -60,8 +64,8 @@ func safeRemoteAddrString(rap remoteAddrProvider) (result string) {
6064
// Read implements net.Conn.Read and saves network events.
6165
func (c *connTrace) Read(b []byte) (int, error) {
6266
// collect preliminary stats when the connection is surely active
63-
network := safeRemoteAddrNetwork(c)
64-
addr := safeRemoteAddrString(c)
67+
network := SafeRemoteAddrNetwork(c)
68+
addr := SafeRemoteAddrString(c)
6569
started := c.tx.TimeSince(c.tx.ZeroTime())
6670

6771
// perform the underlying network operation
@@ -117,8 +121,8 @@ func (tx *Trace) CloneBytesReceivedMap() (out map[string]int64) {
117121

118122
// Write implements net.Conn.Write and saves network events.
119123
func (c *connTrace) Write(b []byte) (int, error) {
120-
network := safeRemoteAddrNetwork(c)
121-
addr := safeRemoteAddrString(c)
124+
network := SafeRemoteAddrNetwork(c)
125+
addr := SafeRemoteAddrString(c)
122126
started := c.tx.TimeSince(c.tx.ZeroTime())
123127

124128
count, err := c.Conn.Write(b)

internal/measurexlite/conn_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ func TestRemoteAddrProvider(t *testing.T) {
1919
return nil
2020
},
2121
}
22-
if safeRemoteAddrNetwork(conn) != "" {
22+
if SafeRemoteAddrNetwork(conn) != "" {
2323
t.Fatal("expected empty network")
2424
}
25-
if safeRemoteAddrString(conn) != "" {
25+
if SafeRemoteAddrString(conn) != "" {
2626
t.Fatal("expected empty string")
2727
}
2828
})
@@ -40,10 +40,10 @@ func TestRemoteAddrProvider(t *testing.T) {
4040
}
4141
},
4242
}
43-
if safeRemoteAddrNetwork(conn) != "tcp" {
43+
if SafeRemoteAddrNetwork(conn) != "tcp" {
4444
t.Fatal("unexpected network")
4545
}
46-
if safeRemoteAddrString(conn) != "1.1.1.1:443" {
46+
if SafeRemoteAddrString(conn) != "1.1.1.1:443" {
4747
t.Fatal("unexpected string")
4848
}
4949
})

internal/urlgetter/config.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package urlgetter
2+
3+
// Config contains the configuration.
4+
type Config struct {
5+
// HTTPHost allows overriding the default HTTP host.
6+
HTTPHost string `ooni:"Force using specific HTTP Host header"`
7+
8+
// HTTPReferer sets the HTTP referer value.
9+
HTTPReferer string `ooni:"Force using the specific HTTP Referer header"`
10+
11+
// Method selects the HTTP method to use.
12+
Method string `ooni:"Force HTTP method different than GET"`
13+
14+
// NoFollowRedirects disables following redirects.
15+
NoFollowRedirects bool `ooni:"Disable following redirects"`
16+
17+
// TLSNextProtos is an OPTIONAL comma separated ALPN list.
18+
TLSNextProtos string `ooni:"Comma-separated list of next protocols for ALPN"`
19+
20+
// TLSServerName is the OPTIONAL SNI value.
21+
TLSServerName string `ooni:"SNI value to use"`
22+
}
23+
24+
// Clone returns a deep copy of the given [*Config].
25+
func (cx *Config) Clone() *Config {
26+
return &Config{
27+
HTTPHost: cx.HTTPHost,
28+
HTTPReferer: cx.HTTPReferer,
29+
Method: cx.Method,
30+
NoFollowRedirects: cx.NoFollowRedirects,
31+
TLSNextProtos: cx.TLSNextProtos,
32+
TLSServerName: cx.TLSServerName,
33+
}
34+
}

internal/urlgetter/dnslookup.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package urlgetter
2+
3+
import (
4+
"context"
5+
"net"
6+
"net/url"
7+
"time"
8+
9+
"github.com/ooni/probe-cli/v3/internal/logx"
10+
"github.com/ooni/probe-cli/v3/internal/measurexlite"
11+
"github.com/ooni/probe-cli/v3/internal/netxlite"
12+
)
13+
14+
// DNSLookup measures a dnslookup://domain/ URL.
15+
func (rx *Runner) DNSLookup(ctx context.Context, config *Config, URL *url.URL) error {
16+
_, err := rx.DNSLookupOp(ctx, config, URL)
17+
return err
18+
}
19+
20+
// DNSLookupResult contains the results of a DNS lookup.
21+
type DNSLookupResult struct {
22+
// Address is the resolved address.
23+
Address string
24+
25+
// Config is the original config.
26+
Config *Config
27+
28+
// URL is the original URL.
29+
URL *url.URL
30+
}
31+
32+
// endpoint returns an endpoint given the address and the URL scheme.
33+
func (rx *DNSLookupResult) endpoint() (string, error) {
34+
// handle the case where there is an explicit port
35+
if port := rx.URL.Port(); port != "" {
36+
return net.JoinHostPort(rx.Address, port), nil
37+
}
38+
39+
// use the scheme to deduce the port
40+
switch rx.URL.Scheme {
41+
case "http":
42+
return net.JoinHostPort(rx.Address, "80"), nil
43+
case "https":
44+
return net.JoinHostPort(rx.Address, "443"), nil
45+
case "dot":
46+
return net.JoinHostPort(rx.Address, "853"), nil
47+
default:
48+
return "", ErrUnknownURLScheme
49+
}
50+
}
51+
52+
// DNSLookupOp resolves a domain name using the configured resolver.
53+
func (rx *Runner) DNSLookupOp(ctx context.Context, config *Config, URL *url.URL) ([]*DNSLookupResult, error) {
54+
// TODO(bassosimone): choose the proper DNS resolver depending on configuration.
55+
return rx.DNSLookupGetaddrinfoOp(ctx, config, URL)
56+
}
57+
58+
// DNSLookupGetaddrinfoOp performs a DNS lookup using getaddrinfo.
59+
func (rx *Runner) DNSLookupGetaddrinfoOp(ctx context.Context, config *Config, URL *url.URL) ([]*DNSLookupResult, error) {
60+
// enforce timeout
61+
const timeout = 4 * time.Second
62+
ctx, cancel := context.WithTimeout(ctx, timeout)
63+
defer cancel()
64+
65+
// obtain the next trace index
66+
index := rx.IndexGen.Next()
67+
68+
// create trace using the given underlying network
69+
trace := measurexlite.NewTrace(index, rx.Begin)
70+
trace.Netx = &netxlite.Netx{Underlying: rx.UNet}
71+
72+
// obtain logger
73+
logger := rx.Session.Logger()
74+
75+
// create resolver
76+
reso := trace.NewStdlibResolver(logger)
77+
78+
// the domain to resolve is the URL's hostname
79+
domain := URL.Hostname()
80+
81+
// start operation logger
82+
ol := logx.NewOperationLogger(logger, "[#%d] lookup %s using getaddrinfo", index, domain)
83+
84+
// perform the lookup
85+
addrs, err := reso.LookupHost(ctx, domain)
86+
87+
// append the DNS lookup results
88+
rx.TestKeys.AppendQueries(trace.DNSLookupsFromRoundTrip()...)
89+
90+
// stop the operation logger
91+
ol.Stop(err)
92+
93+
// manually set the failure and failed operation
94+
if err != nil {
95+
rx.TestKeys.MaybeSetFailedOperation(netxlite.DNSRoundTripOperation)
96+
rx.TestKeys.MaybeSetFailure(err.Error())
97+
return nil, err
98+
}
99+
100+
// emit results
101+
var results []*DNSLookupResult
102+
for _, addr := range addrs {
103+
results = append(results, &DNSLookupResult{
104+
Address: addr,
105+
Config: config,
106+
URL: URL,
107+
})
108+
}
109+
return results, nil
110+
}

0 commit comments

Comments
 (0)