Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ jobs:
GH_REPOSITORY: ${{ github.repository }}
GH_LABELS: ${{ format('ci-storage-test-{0}-{1}', github.run_id, github.run_attempt) }}
TZ: America/Los_Angeles
FORWARD_HOST: host.docker.internal
FORWARD_HOST: "host.docker.internal:42 host.docker.internal:4242"

# Test the job with ci-storage-test tag which is initially queued, but then is
# picked up by the ci-runner container booted in the previous job. In the end,
Expand Down
6 changes: 5 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,9 @@
"strerror",
"tmpfs",
"topo"
]
],
"python.languageServer": "Default",
"cursorpyright.analysis.typeCheckingMode": "basic",
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
}
6 changes: 3 additions & 3 deletions docker/ci-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ RUN true \
# https://forums.docker.com/t/etc-init-d-docker-62-ulimit-error-setting-limit-invalid-argument-problem/139424
RUN true \
&& install -m 0755 -d /etc/apt/keyrings \
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
&& curl -fsSL --retry 3 --retry-all-errors https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
&& chmod a+r /etc/apt/keyrings/docker.asc \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list \
&& apt-get update -y \
Expand Down Expand Up @@ -54,8 +54,8 @@ RUN true \
aarch64|arm64) arch=linux-arm64 ;; \
*) echo >&2 "unsupported architecture: $arch"; exit 1 ;; \
esac \
&& runner_version=$(curl --silent "https://api.github.com/repos/actions/runner/releases/latest" | jq -r ".tag_name[1:]") \
&& curl --no-progress-meter -L https://github.com/actions/runner/releases/download/v$runner_version/actions-runner-$arch-$runner_version.tar.gz | tar xz \
&& runner_version=$(curl -fsSL --retry 3 --retry-all-errors "https://api.github.com/repos/actions/runner/releases/latest" | jq -r ".tag_name[1:]") \
&& curl -fsSL --retry 3 --retry-all-errors --no-progress-meter https://github.com/actions/runner/releases/download/v$runner_version/actions-runner-$arch-$runner_version.tar.gz | tar xz \
&& date > .updated_at

# Install OS dependencies needed by the action runner.
Expand Down
9 changes: 7 additions & 2 deletions docker/ci-runner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@ self-hosted runners as you want. An example scenario:
- `GH_LABELS` (required): labels added to this runner, comma-separated
- `TZ` (optional): timezone name
- `FORWARD_HOST` (optional): some ports at localhost (provided in
FORWARD_PORTS) will be forwarded to this host
FORWARD_PORTS) will be forwarded to this host; can also be a
space-separated list of hosts, in which case the 1st host plays the role of
a primary and the rest are backups (1st backup server available receives
all traffic)
- `FORWARD_PORTS` (optional): a space-delimited list of forwarded TCP or UDP
ports; any port number may be suffixed with "/udp" to forward UDP, e.g.
"12345/udp"
"12345/udp"; if it's "12345/tcp-backup", then the primary and backup hosts
are flipped (i.e. the traffic is first sent to backup host and only then,
if it's not available, to the primary host)
- `CI_STORAGE_HOST` (optional): the host which the initial ci-storage run
will pull the data from; often times it is set to "127.0.0.1:10022" where
10022 is an example of SSH port forwarded via FORWARD_HOST/FORWARD_PORTS
Expand Down
4 changes: 2 additions & 2 deletions docker/ci-runner/guest/entrypoint.03-update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if [[ ! -f "$updated_at_file" || "$(find . -name "$updated_at_file" -mtime +21)"
esac

say "Getting the latest runner version using HEAD to avoid rate limiting (previously updated at $(cat $updated_at_file))..."
runner_location=$(curl --head -sS --fail https://github.com/actions/runner/releases/latest | sed 's/\r$//' | grep -i "location:")
runner_location=$(curl -fsSL --retry 3 --retry-all-errors --head https://github.com/actions/runner/releases/latest | sed 's/\r$//' | grep -i "location:")
runner_version="${runner_location##*/tag/v}"

if [[ "$runner_version" == *.*.* ]]; then
Expand All @@ -32,7 +32,7 @@ if [[ ! -f "$updated_at_file" || "$(find . -name "$updated_at_file" -mtime +21)"

if [[ ! -r "$path" ]]; then
say "Downloading $url to $CACHE_DIR..."
curl --no-progress-meter -L "$url" > "$path.tmp"
curl -fsSL --retry 3 --retry-all-errors --no-progress-meter "$url" > "$path.tmp"
mv -f "$path.tmp" "$path"
else
say "Using previously downloaded $path"
Expand Down
10 changes: 5 additions & 5 deletions docker/ci-runner/root/entrypoint.01-validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ fi

export TZ
if [[ "${TZ:=}" != "" && ! "$TZ" =~ ^[-+_/a-zA-Z0-9]+$ ]]; then
say "If TZ is passed, it must be a valid TZ Idenfitier from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones"
say "If TZ is passed, it must be a valid TZ Identifier from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones"
exit 1
fi

export FORWARD_HOST
if [[ "${FORWARD_HOST:=}" != "" && ! "$FORWARD_HOST" =~ ^[-.[:alnum:]]+(:[0-9]+)?$ ]]; then
say "If FORWARD_HOST is passed, it must be a hostname."
if [[ "${FORWARD_HOST:=}" != "" && ! "$FORWARD_HOST" =~ ^([-.[:alnum:]]+(:[0-9]+)?[[:space:]]*)+$ ]]; then
say "If FORWARD_HOST is passed, it must be a hostname or a space-separated list of hostnames."
exit 1
fi

export FORWARD_PORTS
if [[ "${FORWARD_PORTS:=}" != "" && ! "$FORWARD_PORTS" =~ ^([[:space:]]*[0-9]+(/tcp|/udp)?[[:space:]]*)+$ ]]; then
echo 'If FORWARD_PORTS is passed, it must be in the form of (example): "123 456/udp 789/tcp".';
if [[ "${FORWARD_PORTS:=}" != "" && ! "$FORWARD_PORTS" =~ ^([[:space:]]*[0-9]+(/tcp|/tcp-backup|/udp)?[[:space:]]*)+$ ]]; then
echo 'If FORWARD_PORTS is passed, it must be in the form of (example): "123 789/tcp 123/tcp-backup 456/udp".';
exit 1
fi

Expand Down
37 changes: 32 additions & 5 deletions docker/ci-runner/root/entrypoint.09-forward.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,51 @@
#
# Sets up port forwarding to the storage host.
#
# Format for each entry in FORWARD_PORTS:
# - 1234 (implies tcp)
# - 1234/udp
# - 1234/tcp
# - 1234/tcp-backup (flips primary server with backup in FORWARD_HOST list)
#
set -u -e

if [[ "$FORWARD_HOST" != "" && "$FORWARD_PORTS" != "" ]]; then
FORWARD_HOST="${FORWARD_HOST%%:*}"
# Remove port numbers from the FORWARD_HOST list, in case the client passed
# them. Sometimes, it's easier to erase the port numbers here than on the
# client's side, where FORWARD_HOST is passed as host:ignored_port from some
# other data source.
FORWARD_HOST=$(echo "$FORWARD_HOST" | sed -E 's/:[0-9]+//g')

tcp_lines=()
udp_lines=()
for spec in $FORWARD_PORTS; do
hosts=$(echo "$FORWARD_HOST" | xargs)
port=${spec%%/*}
proto=${spec##*/}
[[ "$proto" == "$port" ]] && proto=tcp
if [[ "$proto" == "$port" ]]; then
proto=tcp
fi
if [[ "$proto" == "tcp-backup" ]]; then
proto="tcp"
hosts=$(echo "$FORWARD_HOST" | awk '{for(i=NF;i>0;i--) printf "%s ", $i; print ""}' | xargs)
fi
if [[ "$proto" == udp ]]; then
udp_lines+=("127.0.0.1 $port/$proto $FORWARD_HOST $port/$proto")
# UDP forwarding doesn't support backup servers, so use the first host.
udp_lines+=("127.0.0.1 $port/$proto ${hosts%% *} $port/$proto")
else
tcp_lines+=("listen ${proto}_${port}")
tcp_lines+=(" bind 127.0.0.1:$port")
# ipv4 is needed for e.g. host.docker.internal
tcp_lines+=(" server server1 $FORWARD_HOST:$port resolvers res resolve-prefer ipv4")
i=0
for host in $hosts; do
# ipv4 is needed for e.g. host.docker.internal
tcp_line=" server server$i $host:$port resolvers res resolve-prefer ipv4 check inter 10s fall 6 rise 6"
if [[ $i == 0 ]]; then
tcp_lines+=("$tcp_line")
else
tcp_lines+=("$tcp_line backup")
fi
i=$((i+1))
done
tcp_lines+=(" mode $proto")
fi
done
Expand Down
6 changes: 5 additions & 1 deletion docker/ci-scaler/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ FROM $BASE_IMAGE
ENV GH_TOKEN=""
ENV ASGS=""
ENV DOMAIN=""
ENV DYNAMODB_TABLE_PREFIX=""
ENV AWS_ENDPOINT_URL=""
ENV AWS_ACCESS_KEY_ID=""
ENV AWS_SECRET_ACCESS_KEY=""
ENV TZ=""

ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -12,7 +16,7 @@ RUN true \
&& apt-get update -y \
&& apt-get install -y --no-install-recommends \
awscli jq rsync python3 python3-yaml rsyslog systemctl tzdata gosu less mc git curl wget pv psmisc unzip vim nano telnet net-tools apt-transport-https ca-certificates locales gnupg lsb-release \
&& curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | gpg --dearmor -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& curl -fsSL --retry 3 --retry-all-errors https://cli.github.com/packages/githubcli-archive-keyring.gpg | gpg --dearmor -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" > /etc/apt/sources.list.d/github-cli.list \
&& apt-get update -y \
&& apt-get install -y --no-install-recommends gh \
Expand Down
7 changes: 7 additions & 0 deletions docker/ci-scaler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ To use:
"{owner}/{repo}:{label}:{asg_name}"
- `DOMAIN`: domain of API Gateway which listens for GitHub webhook
requests via HTTPS and forwards all requests to this container's port 8088
- `DYNAMODB_TABLE_PREFIX`: if set, use DynamoDB tables to store the state
across webhook requests; useful when running multiple instances of
ci-scaler
- `AWS_ENDPOINT_URL`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`:
optionally, you may pass these variables to access AWS API; used in
debugging mostly
- `TZ` (optional): timezone name

Example for docker compose:
Expand All @@ -33,6 +39,7 @@ services:
- GH_TOKEN
- ASGS
- DOMAIN
- DYNAMODB_TABLE_PREFIX
- TZ
```

Expand Down
3 changes: 2 additions & 1 deletion docker/ci-scaler/guest/entrypoint.99-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ set -u -e
if [[ "$ASGS" != "" ]]; then
exec python3 ./scaler/main.py \
--asgs="$ASGS" \
--domain="$DOMAIN"
--domain="$DOMAIN" \
--dynamodb-table-prefix="$DYNAMODB_TABLE_PREFIX"
else
exec sleep 1000000000
fi
12 changes: 11 additions & 1 deletion docker/ci-scaler/guest/scaler/api_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,19 @@ def aws(
input: str | None = None,
) -> str | None:
region = aws_region()
endpoint_url = os.environ.get("AWS_ENDPOINT_URL")
if args[0] == "dynamodb" and endpoint_url:
region = "us-east-1"
if not region:
return None
return check_output(["aws", f"--region={region}", *args], input=input)
cmd = [
"aws",
f"--region={region}",
*([f"--endpoint-url={endpoint_url}"] if endpoint_url else ()),
*args,
]
out = check_output(cmd, input=input)
return out


def aws_json(
Expand Down
10 changes: 6 additions & 4 deletions docker/ci-scaler/guest/scaler/api_gh.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import traceback
import yaml
from helpers import Runner, RateLimits, check_output
from typing import Any, cast
from typing import Any, Literal, cast


def gh(
Expand Down Expand Up @@ -98,7 +98,7 @@ def gh_webhook_ensure_exists(
url: str,
secret: str,
events: list[str],
):
) -> Literal["created", "already_exists"]:
try:
gh_api(
"-XPOST",
Expand All @@ -113,9 +113,11 @@ def gh_webhook_ensure_exists(
"active": True,
},
)
return "created"
except subprocess.CalledProcessError as e:
if "Hook already exists" not in e.stdout:
raise
if "Hook already exists" in e.stdout:
return "already_exists"
raise


def gh_webhook_ensure_absent(
Expand Down
8 changes: 5 additions & 3 deletions docker/ci-scaler/guest/scaler/handler_idle_runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
AsgSpec,
Runner,
RunnersRegistry,
ExpiringDict,
logged_result,
)
from typing import Literal
from storage import StorageFactory

REVISIT_TERMINATED_INSTANCE_SEC = datetime.timedelta(minutes=10).total_seconds()

Expand All @@ -26,12 +25,15 @@ def __init__(
*,
asg_spec: AsgSpec,
max_idle_age_sec: int,
storage: StorageFactory,
):
super().__init__(asg_spec=asg_spec)
self.max_idle_age_sec = max_idle_age_sec
self.idle_runners = RunnersRegistry()
self.terminated_instance_ids = ExpiringDict[str, Literal[True]](
self.terminated_instance_ids = storage.create(
bool,
ttl=REVISIT_TERMINATED_INSTANCE_SEC,
name="terminated-instance-ids",
)

def handle(self, runners: list[Runner]) -> None:
Expand Down
Loading
Loading