diff --git a/README.md b/README.md index 351a160a62..54867e9347 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [instance\_profile\_path](#input\_instance\_profile\_path) | The path that will be added to the instance\_profile, if not set the environment name will be used. | `string` | `null` | no | | [instance\_target\_capacity\_type](#input\_instance\_target\_capacity\_type) | Default lifecycle used for runner instances, can be either `spot` or `on-demand`. | `string` | `"spot"` | no | | [instance\_termination\_watcher](#input\_instance\_termination\_watcher) | Configuration for the instance termination watcher. This feature is Beta, changes will not trigger a major release as long in beta.

`enable`: Enable or disable the spot termination watcher.
'features': Enable or disable features of the termination watcher.
`memory_size`: Memory size limit in MB of the lambda.
`s3_key`: S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas.
`s3_object_version`: S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket.
`timeout`: Time out of the lambda in seconds.
`zip`: File location of the lambda zip file. |
object({
enable = optional(bool, false)
features = optional(object({
enable_spot_termination_handler = optional(bool, true)
enable_spot_termination_notification_watcher = optional(bool, true)
}), {})
memory_size = optional(number, null)
s3_key = optional(string, null)
s3_object_version = optional(string, null)
timeout = optional(number, null)
zip = optional(string, null)
})
| `{}` | no | -| [instance\_types](#input\_instance\_types) | List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win). | `list(string)` |
[
"m5.large",
"c5.large"
]
| no | +| [instance\_types](#input\_instance\_types) | List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux, macOS Sequoia for osx, Windows Server Core for win). | `list(string)` |
[
"m5.large",
"c5.large"
]
| no | | [job\_queue\_retention\_in\_seconds](#input\_job\_queue\_retention\_in\_seconds) | The number of seconds the job is held in the queue before it is purged. | `number` | `86400` | no | | [job\_retry](#input\_job\_retry) | Experimental! Can be removed / changed without trigger a major release.Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the instances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the rate limit of the GitHub app.

`enable`: Enable or disable the job retry feature.
`delay_in_seconds`: The delay in seconds before the job retry check lambda will check the job status.
`delay_backoff`: The backoff factor for the delay.
`lambda_memory_size`: Memory size limit in MB for the job retry check lambda.
`lambda_timeout`: Time out of the job retry check lambda in seconds.
`max_attempts`: The maximum number of attempts to retry the job. |
object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
})
| `{}` | no | | [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no | @@ -198,10 +198,11 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [runner\_hook\_job\_completed](#input\_runner\_hook\_job\_completed) | Script to be ran in the runner environment at the end of every job | `string` | `""` | no | | [runner\_hook\_job\_started](#input\_runner\_hook\_job\_started) | Script to be ran in the runner environment at the beginning of every job | `string` | `""` | no | | [runner\_iam\_role\_managed\_policy\_arns](#input\_runner\_iam\_role\_managed\_policy\_arns) | Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role | `list(string)` | `[]` | no | +| [runner\_license\_specifications](#input\_runner\_license\_specifications) | The license specifications for the instance. See https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template#license_specification for details. |
list(object({
license_configuration_arn = string
}))
| `[]` | no | | [runner\_log\_files](#input\_runner\_log\_files) | (optional) List of logfiles to send to CloudWatch, will only be used if `enable_cloudwatch_agent` is set to true. Object description: `log_group_name`: Name of the log group, `prefix_log_group`: If true, the log group name will be prefixed with `/github-self-hosted-runners/`, `file_path`: path to the log file, `log_stream_name`: name of the log stream, `log_class`: The log class of the log group. Valid values are `STANDARD` or `INFREQUENT_ACCESS`. Defaults to `STANDARD`. |
list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
log_class = optional(string, "STANDARD")
}))
| `null` | no | | [runner\_metadata\_options](#input\_runner\_metadata\_options) | Metadata options for the ec2 runner instances. By default, the module uses metadata tags for bootstrapping the runner, only disable `instance_metadata_tags` when using custom scripts for starting the runner. | `map(any)` |
{
"http_endpoint": "enabled",
"http_put_response_hop_limit": 1,
"http_tokens": "required",
"instance_metadata_tags": "enabled"
}
| no | | [runner\_name\_prefix](#input\_runner\_name\_prefix) | The prefix used for the GitHub runner name. The prefix will be used in the default start script to prefix the instance name when register the runner in GitHub. The value is available via an EC2 tag 'ghr:runner\_name\_prefix'. | `string` | `""` | no | -| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux,windows). | `string` | `"linux"` | no | +| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux, osx, windows). | `string` | `"linux"` | no | | [runner\_placement](#input\_runner\_placement) | The placement options for the instance. See https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template#placement for details. |
object({
affinity = optional(string)
availability_zone = optional(string)
group_id = optional(string)
group_name = optional(string)
host_id = optional(string)
host_resource_group_arn = optional(string)
spread_domain = optional(string)
tenancy = optional(string)
partition_number = optional(number)
})
| `null` | no | | [runner\_run\_as](#input\_runner\_run\_as) | Run the GitHub actions agent as user. | `string` | `"ec2-user"` | no | | [runners\_ebs\_optimized](#input\_runners\_ebs\_optimized) | Enable EBS optimization for the runner instances. | `bool` | `false` | no | @@ -224,6 +225,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | +| [use\_dedicated\_host](#input\_use\_dedicated\_host) | Use a dedicated host for the runner instances. | `bool` | `false` | no | | [user\_agent](#input\_user\_agent) | User agent used for API calls by lambda functions. | `string` | `"github-aws-runners"` | no | | [userdata\_content](#input\_userdata\_content) | Alternative user-data content, replacing the templated one. By providing your own user\_data you have to take care of installing all required software, including the action runner and registering the runner. Be-aware configuration parameters in SSM as well as tags are treated as internals. Changes will not trigger a breaking release. | `string` | `null` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | Script to be ran after the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | diff --git a/docs/configuration.md b/docs/configuration.md index 8ec7e4caef..802317b859 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -282,6 +282,10 @@ In case the setup does not work as intended, trace the events through this seque ## Experimental features +### macOS Runners + +This feature is in early stage and should be considered experimental. The module supports macOS-based GitHub Actions self-hosted runners on AWS EC2 Mac instances (`mac1.metal`, `mac2.metal`, `mac2-m2.metal`). macOS runners require dedicated hosts due to Apple's licensing requirements and have longer boot times (6–20 minutes). Set `runner_os = "osx"` and `use_dedicated_host = true` to enable. See the full [macOS Runners documentation](mac-runners.md) for details. + ### Termination watcher This feature is in early stage and therefore disabled by default. To enable the watcher, set `instance_termination_watcher.enable = true`. diff --git a/docs/examples/dedicated-mac-hosts.md b/docs/examples/dedicated-mac-hosts.md new file mode 100644 index 0000000000..fd2caa4291 --- /dev/null +++ b/docs/examples/dedicated-mac-hosts.md @@ -0,0 +1 @@ +--8<-- "examples/dedicated-mac-hosts/README.md" diff --git a/docs/examples/index.md b/docs/examples/index.md index ac611575fc..f095e68ad9 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -9,4 +9,5 @@ Examples are located in the [examples](https://github.com/github-aws-runners/ter - _[Prebuilt Images](prebuilt.md)_: Example usages of deploying runners with a custom prebuilt image. - _[Windows](windows.md)_: Example usage of creating a runner using Windows as the OS. - _[Termination watcher](termination-watcher.md)_: Example usages of termination watcher. +- _[Dedicated Mac Hosts](dedicated-mac-hosts.md)_: Example usage of setting up dedicated hosts for macOS runners. - _[Externally managed SSM secrets](external-managed-ssm-secrets.md)_: Example usage of externally managed SSM secrets for the GitHub App credentials. diff --git a/docs/mac-runners.md b/docs/mac-runners.md new file mode 100644 index 0000000000..509d045751 --- /dev/null +++ b/docs/mac-runners.md @@ -0,0 +1,188 @@ +# macOS Runners (Experimental) + +!!! warning + This feature is in early stage and should be considered experimental. macOS runners on AWS have unique constraints compared to Linux and Windows runners. Please review all sections below before deploying. + +## Overview + +The module supports provisioning macOS-based GitHub Actions self-hosted runners on AWS using [Amazon EC2 Mac instances](https://aws.amazon.com/ec2/instance-types/mac/). macOS runners use the `osx` value for the `runner_os` variable and require **dedicated hosts** due to Apple's macOS licensing requirements. + +Key differences from Linux/Windows runners: + +- **Dedicated hosts required.** EC2 Mac instances must run on dedicated hosts. Each dedicated host can run **only one Mac VM at a time** (1:1 ratio). The module uses `RunInstances` directly instead of the `CreateFleet` API when `use_dedicated_host` is enabled. +- **Longer boot times.** macOS instances can take 6–20 minutes to launch, significantly longer than Linux (~1 min) or Windows (~5 min). The default `minimum_running_time_in_minutes` for `osx` is set to 20 minutes to prevent premature scale-down. +- **~50 minute host recycle time.** After an EC2 Mac instance is terminated, AWS performs install cleanup and software upgrades on the dedicated host before it becomes available again. This process takes approximately **50 minutes**, during which the host cannot launch a new instance. +- **ARM64 (Apple Silicon) and x64 (Intel) support.** Both `mac1.metal` (Intel), `mac2.metal` (M1), and `mac2-m2.metal` (M2) instance types are supported. Set `runner_architecture` accordingly (`x64` or `arm64`). +- **Only ephemeral mode is recommended.** Due to the long host allocation time and dedicated host cost model, we recommend using ephemeral runners. + +## Scaling caveats + +Running macOS runners at scale introduces challenges that do not exist with Linux or Windows runners: + +1. **1:1 host-to-VM ratio.** Unlike Linux where many instances share underlying hardware, each Mac VM requires its own dedicated host. To run N concurrent macOS jobs, you need at least N dedicated hosts. +2. **Host recycle delay.** After a Mac instance is terminated, the dedicated host enters a ~50 minute cleanup cycle (scrubbing, software updates). During this window the host is unavailable. For bursty workloads, you need additional hosts to absorb demand while others recycle. +3. **Capacity planning.** As a rule of thumb, if you expect N peak concurrent macOS jobs and each job takes T minutes, account for the extra ~50 minutes of host downtime per cycle when sizing your host pool. + +## Prerequisites + +Before deploying macOS runners, you must set up dedicated host infrastructure. There are two approaches: + +### Option A: Single dedicated host + +The simplest setup — allocate a single dedicated host and reference it directly. This works for low-scale or testing scenarios, but you must update the Terraform configuration whenever you replace the host. + +1. **Dedicated Host** — Allocate an EC2 dedicated host for your Mac instance type in the target availability zone. + +### Option B: Host resource group (recommended for scale) + +A host resource group allows you to associate **multiple dedicated hosts within an availability zone** into a logical group. When launching a Mac instance, AWS randomly selects an available host from the group. This means you can add, release, or replace individual dedicated hosts **without changing Terraform state or module inputs** — you only reference the group ARN, not individual host ARNs. + +This approach requires three resources: + +1. **Dedicated Hosts** — Allocate one or more EC2 dedicated hosts for Mac instance types in your target availability zones. +2. **Host Resource Group** — Create an AWS Resource Groups group of type `AWS::EC2::HostManagement` and add your dedicated hosts as members. +3. **License Configuration** — Create an AWS License Manager license configuration for Mac dedicated hosts (counting type: `Socket`). Associate it with the macOS AMI and the host resource group. The license configuration ARN is passed to the module via the `license_specifications` input. + +The [dedicated-mac-hosts example](examples/dedicated-mac-hosts.md) provides a ready-to-use Terraform configuration for all three resources. + +## Configuration + +### Basic setup + +```hcl +module "runners" { + source = "github-aws-runners/github-runners/aws" + + # macOS-specific settings + runner_os = "osx" + runner_architecture = "arm64" # or "x64" for Intel Mac instances + instance_types = ["mac2.metal"] + + # Dedicated host settings (required for macOS) + use_dedicated_host = true + placement = { + host_resource_group_arn = "" + } + license_specifications = [""] + + # Recommended: ephemeral mode with a pool + enable_ephemeral_runners = true + delay_webhook_event = 0 + enable_job_queued_check = true + + # ...other common settings... +} +``` + +### AMI selection + +By default, the module selects an Amazon EC2 macOS Sequoia (macOS 15) AMI: + +- **ARM64:** `amzn-ec2-macos-15.*-arm64` +- **x64:** `amzn-ec2-macos-15.*` + +You can override the AMI using filters or an SSM parameter: + +```hcl +# Custom AMI filter +ami = { + filter = { + name = ["amzn-ec2-macos-14.*-arm64"] + state = ["available"] + } + owners = ["amazon"] +} + +# Or via SSM parameter +ami = { + id_ssm_parameter_arn = "arn:aws:ssm:region:account:parameter/path/to/mac/ami" +} +``` + +### Multi-runner setup + +When using the multi-runner module, you can add a macOS runner configuration alongside Linux and Windows runners: + +```hcl +multi_runner_config = { + "mac-arm64" = { + runner_config = { + runner_os = "osx" + runner_architecture = "arm64" + instance_types = ["mac2.metal"] + use_dedicated_host = true + placement = { + host_resource_group_arn = "" + } + license_specifications = [""] + runner_extra_labels = ["osx", "arm64"] + } + matcherConfig = { + labelMatchers = [["self-hosted", "osx", "arm64"]] + exactMatch = false + } + } +} +``` + +## Instance launch behavior + +Because EC2 Fleet (`CreateFleet`) does not support launching instances onto dedicated hosts for `mac*.metal` instance types, the scale-up lambda automatically falls back to using `RunInstances` when `use_dedicated_host` is `true`. This is handled transparently — no additional configuration is needed. + +## User data and scripts + +The module uses macOS-specific templates for provisioning: + +| Script | Description | +| --- | --- | +| `user-data-osx.sh` | Boot script for macOS instances. Uses `ec2-user` and supports Homebrew. | +| `install-runner-osx.sh` | Downloads and installs the GitHub Actions runner agent to `/opt/actions-runner`. | +| `start-runner-osx.sh` | Registers the runner with GitHub and handles ephemeral cleanup. | + +Custom pre/post install scripts and job hooks (`hook_job_started`, `hook_job_completed`) work the same as on Linux. + +## Scale-down considerations + +macOS instances have a default minimum running time of **20 minutes** (vs. 5 for Linux, 15 for Windows) to account for the longer boot cycle. Adjust `minimum_running_time_in_minutes` if needed, but setting it too low risks terminating instances before they can execute a job. + +Additionally, remember that after an instance is terminated, the dedicated host enters a **~50 minute cleanup cycle** before it can launch a new instance. Aggressive scale-down can leave you with no available hosts during this window. + +```hcl +# Override the minimum running time (not recommended to go below 20 for macOS) +minimum_running_time_in_minutes = 25 +``` + +## Cost considerations + +!!! note + macOS dedicated hosts have a **minimum allocation period of 24 hours**. You are billed for the dedicated host for the full 24-hour period, regardless of instance usage. Plan your host allocation accordingly. + +- **Dedicated host costs**: Billed per-host, per-hour with a 24-hour minimum. Each host supports only one Mac VM at a time. See [EC2 Dedicated Hosts Pricing](https://aws.amazon.com/ec2/dedicated-hosts/pricing/). +- **Instance costs**: Mac instances are billed on-demand only (no spot pricing available for Mac instances). +- **Over-provisioning for recycle time**: Because hosts are unavailable for ~50 minutes after instance termination, you may need more dedicated hosts than your peak concurrency to avoid queuing. Factor this into your cost model. +- **Pool sizing**: Keep pool sizes minimal to control costs, but large enough to avoid cold-start delays. + +## Known limitations + +- **No spot instance support.** EC2 Mac instances do not support the spot lifecycle. Runners always use on-demand pricing. +- **1:1 host-to-VM ratio.** Each dedicated host can run only one Mac instance at a time. +- **~50 minute host recycle time.** After instance termination, AWS performs cleanup and software upgrades on the dedicated host. The host is unavailable for approximately 50 minutes during this process. +- **24-hour minimum host allocation.** Dedicated hosts cannot be released within 24 hours of allocation. +- **Limited instance types.** Only `mac1.metal` (Intel x86), `mac2.metal` (M1 ARM64), and `mac2-m2.metal` (M2 ARM64) are available. Instance type availability varies by region. +- **Longer startup.** Boot times of 6–20 minutes mean jobs will queue longer when no warm runners are available. +- **No SSM Session Manager.** Unlike Linux instances, connecting via AWS Session Manager may not be available depending on your AMI. +- **GHES limited testing.** macOS runner support has only been validated against GitHub Enterprise Server 3.17.3. + +## Debugging + +- Check `/var/log/user-data.log` on the macOS instance for boot script output. +- CloudWatch log streams under `/runners` will contain runner agent logs if CloudWatch logging is enabled. +- Verify your dedicated host has available capacity in the EC2 console under **Dedicated Hosts**. +- Ensure the host resource group ARN and license configuration ARN match what is configured in Terraform. +- If runners fail to register, verify the GitHub App has the correct permissions and the SSM token path is accessible. + +## Example + +A complete example for setting up the dedicated host infrastructure is available at: + +- [Dedicated Mac Hosts example](examples/dedicated-mac-hosts.md) diff --git a/examples/dedicated-mac-hosts/README.md b/examples/dedicated-mac-hosts/README.md new file mode 100644 index 0000000000..ad00c2533e --- /dev/null +++ b/examples/dedicated-mac-hosts/README.md @@ -0,0 +1,42 @@ + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [aws](#requirement\_aws) | >= 6.21 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | >= 6.21 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_ec2_host.mac_dedicated_host](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ec2_host) | resource | +| [aws_licensemanager_license_configuration.mac_dedicated_host_license_configuration](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/licensemanager_license_configuration) | resource | +| [aws_resourcegroups_group.mac_host_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/resourcegroups_group) | resource | +| [aws_resourcegroups_resource.mac_host_membership](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/resourcegroups_resource) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes | +| [environment](#input\_environment) | Environment name, used as prefix. | `string` | `null` | no | +| [host\_groups](#input\_host\_groups) | Map of host groups, each with a name, host instance type, and a list of hosts (name + AZ). |
map(object({
name = string
host_instance_type = string
hosts = list(object({
name = string
availability_zone = string
}))
}))
| n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [license\_specification\_arn](#output\_license\_specification\_arn) | ARN of the License Manager configuration used for Mac dedicated hosts. | +| [resource\_group\_arns](#output\_resource\_group\_arns) | Map of resource group names to their ARNs. | + diff --git a/examples/dedicated-mac-hosts/main.tf b/examples/dedicated-mac-hosts/main.tf new file mode 100644 index 0000000000..f0b1537859 --- /dev/null +++ b/examples/dedicated-mac-hosts/main.tf @@ -0,0 +1,105 @@ +locals { + + environment = var.environment != null ? var.environment : "default" + aws_region = var.aws_region + + # Flatten host_groups into a map of individual host definitions keyed by + # "groupKey-hostName" so we can create one aws_ec2_host per host. + mac_dedicated_hosts = merge([ + for group_key, group in var.host_groups : { + for host in group.hosts : + "${group_key}-${host.name}" => { + instance_type = group.host_instance_type + availability_zone = host.availability_zone + group_name = group.name + host_name = host.name + } + } + ]...) +} + +resource "aws_ec2_host" "mac_dedicated_host" { + for_each = local.mac_dedicated_hosts + + instance_type = each.value.instance_type + availability_zone = each.value.availability_zone + auto_placement = "on" + + tags = { + "Name" = each.value.host_name + "HostGroup" = each.value.group_name + } +} + +resource "aws_resourcegroups_group" "mac_host_group" { + for_each = { for _, group in var.host_groups : group.name => group } + + name = each.value.name + + configuration { + type = "AWS::EC2::HostManagement" + + parameters { + name = "any-host-based-license-configuration" + values = ["true"] + } + + parameters { + name = "auto-allocate-host" + values = [ + "false", + ] + } + parameters { + name = "auto-host-recovery" + values = [ + "false", + ] + } + parameters { + name = "auto-release-host" + values = [ + "false", + ] + } + } + + configuration { + type = "AWS::ResourceGroups::Generic" + parameters { + name = "allowed-resource-types" + values = [ + "AWS::EC2::Host", + ] + } + + parameters { + name = "deletion-protection" + values = [ + "UNLESS_EMPTY", + ] + } + } + + tags = { + "Name" = each.value.name + } +} + +resource "aws_resourcegroups_resource" "mac_host_membership" { + for_each = local.mac_dedicated_hosts + + group_arn = aws_resourcegroups_group.mac_host_group[each.value.group_name].arn + resource_arn = aws_ec2_host.mac_dedicated_host[each.key].arn +} + + +resource "aws_licensemanager_license_configuration" "mac_dedicated_host_license_configuration" { + name = "mac-dedicated-host-license-configuration" + description = "Mac dedicated host license configuration" + license_counting_type = "Socket" + + tags = { + "Name" = "mac-dedicated-host-license-configuration" + } +} diff --git a/examples/dedicated-mac-hosts/outputs.tf b/examples/dedicated-mac-hosts/outputs.tf new file mode 100644 index 0000000000..4aa7dda086 --- /dev/null +++ b/examples/dedicated-mac-hosts/outputs.tf @@ -0,0 +1,12 @@ +output "resource_group_arns" { + description = "Map of resource group names to their ARNs." + value = { + for k, rg in aws_resourcegroups_group.mac_host_group : + rg.name => rg.arn + } +} + +output "license_specification_arn" { + description = "ARN of the License Manager configuration used for Mac dedicated hosts." + value = aws_licensemanager_license_configuration.mac_dedicated_host_license_configuration.arn +} diff --git a/examples/dedicated-mac-hosts/providers.tf b/examples/dedicated-mac-hosts/providers.tf new file mode 100644 index 0000000000..eca2fe96a7 --- /dev/null +++ b/examples/dedicated-mac-hosts/providers.tf @@ -0,0 +1,9 @@ +provider "aws" { + region = local.aws_region + + default_tags { + tags = { + Example = local.environment + } + } +} diff --git a/examples/dedicated-mac-hosts/variables.tf b/examples/dedicated-mac-hosts/variables.tf new file mode 100644 index 0000000000..3efed4af38 --- /dev/null +++ b/examples/dedicated-mac-hosts/variables.tf @@ -0,0 +1,23 @@ +variable "aws_region" { + description = "AWS region." + type = string +} + +variable "environment" { + description = "Environment name, used as prefix." + + type = string + default = null +} + +variable "host_groups" { + description = "Map of host groups, each with a name, host instance type, and a list of hosts (name + AZ)." + type = map(object({ + name = string + host_instance_type = string + hosts = list(object({ + name = string + availability_zone = string + })) + })) +} diff --git a/examples/dedicated-mac-hosts/versions.tf b/examples/dedicated-mac-hosts/versions.tf new file mode 100644 index 0000000000..af69406fbd --- /dev/null +++ b/examples/dedicated-mac-hosts/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 6.21" + } + } + + required_version = ">= 1.3.0" +} diff --git a/examples/multi-runner/templates/runner-configs/mac-arm64-26.yaml b/examples/multi-runner/templates/runner-configs/mac-arm64-26.yaml new file mode 100644 index 0000000000..acc44983ad --- /dev/null +++ b/examples/multi-runner/templates/runner-configs/mac-arm64-26.yaml @@ -0,0 +1,43 @@ +matcherConfig: + exactMatch: true + labelMatchers: + - [self-hosted, osx, arm64] +fifo: true +redrive_build_queue: + enabled: false + maxReceiveCount: null +runner_config: + # Replace and with the actual + # values for your target region. These can be obtained from the outputs of the + # examples/dedicated-mac-hosts example. + placement: + host_resource_group_arn: "" + tenancy: host + license_specifications: + - license_configuration_arn: "" + runner_os: osx + runner_architecture: arm64 + runner_user: ec2-user + runner_name_prefix: macos_26_ + enable_ssm_on_runners: true + credit_specification: unlimited + instance_types: + - mac2.metal + ami: + id_ssm_parameter_arn: ${ami_id_ssm_parameter_arn} + ami: + owners: + - amazon + filter: + name: + - amzn-ec2-macos-26* + state: + - available + runners_maximum_count: 1 + delay_webhook_event: 0 + scale_down_schedule_expression: cron(* * * * ? *) + runner_hook_job_started: | + echo "Running pre job hook as $(whoami)" + runner_hook_job_completed: | + echo "Running post job hook as $(whoami)" + diff --git a/examples/multi-runner/variables.tf b/examples/multi-runner/variables.tf index 009c3643db..bc490ae273 100644 --- a/examples/multi-runner/variables.tf +++ b/examples/multi-runner/variables.tf @@ -19,4 +19,4 @@ variable "aws_region" { type = string default = "eu-west-1" -} +} \ No newline at end of file diff --git a/examples/prebuilt/README.md b/examples/prebuilt/README.md index b24f47a01d..c2d66139d4 100644 --- a/examples/prebuilt/README.md +++ b/examples/prebuilt/README.md @@ -112,7 +112,7 @@ terraform output webhook_secret | [aws\_region](#input\_aws\_region) | AWS region. | `string` | `"eu-west-1"` | no | | [environment](#input\_environment) | Environment name, used as prefix. | `string` | `null` | no | | [github\_app](#input\_github\_app) | GitHub for API usages. |
object({
id = string
key_base64 = string
})
| n/a | yes | -| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux,windows). | `string` | `"linux"` | no | +| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux, osx, windows). | `string` | `"linux"` | no | ## Outputs diff --git a/examples/prebuilt/variables.tf b/examples/prebuilt/variables.tf index 643072a163..11670a5d2e 100644 --- a/examples/prebuilt/variables.tf +++ b/examples/prebuilt/variables.tf @@ -22,7 +22,7 @@ variable "aws_region" { } variable "runner_os" { - description = "The EC2 Operating System type to use for action runner instances (linux,windows)." + description = "The EC2 Operating System type to use for action runner instances (linux, osx, windows)." type = string default = "linux" diff --git a/lambdas/functions/control-plane/src/aws/runners.d.ts b/lambdas/functions/control-plane/src/aws/runners.d.ts index c891500f27..7fb260e4b1 100644 --- a/lambdas/functions/control-plane/src/aws/runners.d.ts +++ b/lambdas/functions/control-plane/src/aws/runners.d.ts @@ -48,4 +48,5 @@ export interface RunnerInputParameters { tracingEnabled?: boolean; onDemandFailoverOnError?: string[]; scaleErrors: string[]; + useDedicatedHost?: boolean; } diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index 4243e4b06b..71bb74b635 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -10,6 +10,7 @@ import { DescribeInstancesCommand, type DescribeInstancesResult, EC2Client, + RunInstancesCommand, SpotAllocationStrategy, TerminateInstancesCommand, } from '@aws-sdk/client-ec2'; @@ -754,6 +755,7 @@ interface RunnerConfig { tracingEnabled?: boolean; onDemandFailoverOnError?: string[]; scaleErrors: string[]; + useDedicatedHost?: boolean; source: LambdaRunnerSource; } @@ -775,6 +777,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { tracingEnabled: runnerConfig.tracingEnabled, onDemandFailoverOnError: runnerConfig.onDemandFailoverOnError, scaleErrors: runnerConfig.scaleErrors, + useDedicatedHost: runnerConfig.useDedicatedHost, source: runnerConfig.source, }; } @@ -864,3 +867,217 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): return request; } + +describe('create runner with useDedicatedHost', () => { + const dedicatedHostRunnerConfig: RunnerConfig = { + allocationStrategy: SpotAllocationStrategy.CAPACITY_OPTIMIZED, + capacityType: 'on-demand', + type: 'Org', + scaleErrors: [], + useDedicatedHost: true, + }; + + beforeEach(() => { + vi.clearAllMocks(); + mockEC2Client.reset(); + mockSSMClient.reset(); + + mockEC2Client.on(RunInstancesCommand).resolves({ + Instances: [{ InstanceId: 'i-dedicated-1' }], + }); + mockSSMClient.on(GetParameterCommand).resolves({}); + }); + + it('uses RunInstances instead of CreateFleet when useDedicatedHost is true', async () => { + const result = await createRunner(createRunnerConfig(dedicatedHostRunnerConfig)); + + expect(result).toEqual(['i-dedicated-1']); + expect(mockEC2Client).toHaveReceivedCommand(RunInstancesCommand); + expect(mockEC2Client).not.toHaveReceivedCommand(CreateFleetCommand); + }); + + it('uses CreateFleet when useDedicatedHost is false', async () => { + mockEC2Client.on(CreateFleetCommand).resolves({ Instances: [{ InstanceIds: ['i-fleet-1'] }] }); + + const result = await createRunner( + createRunnerConfig({ + ...dedicatedHostRunnerConfig, + useDedicatedHost: false, + }), + ); + + expect(result).toEqual(['i-fleet-1']); + expect(mockEC2Client).toHaveReceivedCommand(CreateFleetCommand); + expect(mockEC2Client).not.toHaveReceivedCommand(RunInstancesCommand); + }); + + it('uses CreateFleet when useDedicatedHost is undefined', async () => { + mockEC2Client.on(CreateFleetCommand).resolves({ Instances: [{ InstanceIds: ['i-fleet-1'] }] }); + + const result = await createRunner( + createRunnerConfig({ + ...dedicatedHostRunnerConfig, + useDedicatedHost: undefined, + }), + ); + + expect(result).toEqual(['i-fleet-1']); + expect(mockEC2Client).toHaveReceivedCommand(CreateFleetCommand); + expect(mockEC2Client).not.toHaveReceivedCommand(RunInstancesCommand); + }); + + it('passes correct parameters to RunInstances', async () => { + await createRunner(createRunnerConfig(dedicatedHostRunnerConfig)); + + expect(mockEC2Client).toHaveReceivedCommandWith(RunInstancesCommand, { + LaunchTemplate: { + LaunchTemplateName: LAUNCH_TEMPLATE, + Version: '$Default', + }, + InstanceType: 'm5.large', + MinCount: 1, + MaxCount: 1, + SubnetId: 'subnet-123', + TagSpecifications: [ + { + ResourceType: 'instance', + Tags: [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: 'scale-up-lambda' }, + { Key: 'ghr:Type', Value: 'Org' }, + { Key: 'ghr:Owner', Value: REPO_NAME }, + ], + }, + { + ResourceType: 'volume', + Tags: [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: 'scale-up-lambda' }, + { Key: 'ghr:Type', Value: 'Org' }, + { Key: 'ghr:Owner', Value: REPO_NAME }, + ], + }, + ], + }); + }); + + it('creates multiple instances via RunInstances', async () => { + mockEC2Client.on(RunInstancesCommand).resolves({ + Instances: [{ InstanceId: 'i-dedicated-1' }, { InstanceId: 'i-dedicated-2' }], + }); + + const result = await createRunner({ + ...createRunnerConfig(dedicatedHostRunnerConfig), + numberOfRunners: 2, + }); + + expect(result).toEqual(['i-dedicated-1', 'i-dedicated-2']); + expect(mockEC2Client).toHaveReceivedCommandWith(RunInstancesCommand, { + LaunchTemplate: { + LaunchTemplateName: LAUNCH_TEMPLATE, + Version: '$Default', + }, + InstanceType: 'm5.large', + MinCount: 2, + MaxCount: 2, + SubnetId: 'subnet-123', + TagSpecifications: [ + { + ResourceType: 'instance', + Tags: [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: 'pool-lambda' }, + { Key: 'ghr:Type', Value: 'Org' }, + { Key: 'ghr:Owner', Value: REPO_NAME }, + ], + }, + { + ResourceType: 'volume', + Tags: [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: 'pool-lambda' }, + { Key: 'ghr:Type', Value: 'Org' }, + { Key: 'ghr:Owner', Value: REPO_NAME }, + ], + }, + ], + }); + }); + + it('throws error when spot is used with dedicated host', async () => { + await expect( + createRunner( + createRunnerConfig({ + ...dedicatedHostRunnerConfig, + capacityType: 'spot', + }), + ), + ).rejects.toThrow('Spot instances are not supported with RunInstances'); + expect(mockEC2Client).not.toHaveReceivedCommand(RunInstancesCommand); + }); + + it('throws error when RunInstances returns no instances', async () => { + mockEC2Client.on(RunInstancesCommand).resolves({ Instances: [] }); + + await expect(createRunner(createRunnerConfig(dedicatedHostRunnerConfig))).rejects.toThrow( + 'RunInstances returned no instances for dedicated host.', + ); + }); + + it('throws error when RunInstances fails', async () => { + mockEC2Client.on(RunInstancesCommand).rejects(new Error('EC2 error')); + + await expect(createRunner(createRunnerConfig(dedicatedHostRunnerConfig))).rejects.toThrow('EC2 error'); + }); + + it('uses ami id override from ssm parameter', async () => { + const paramValue: GetParameterResult = { + Parameter: { + Value: 'ami-dedicated', + }, + }; + mockSSMClient.on(GetParameterCommand).resolves(paramValue); + + await createRunner( + createRunnerConfig({ + ...dedicatedHostRunnerConfig, + amiIdSsmParameterName: 'my-ami-id-param', + }), + ); + + expect(mockEC2Client).toHaveReceivedCommandWith(RunInstancesCommand, { + LaunchTemplate: { + LaunchTemplateName: LAUNCH_TEMPLATE, + Version: '$Default', + }, + InstanceType: 'm5.large', + MinCount: 1, + MaxCount: 1, + SubnetId: 'subnet-123', + ImageId: 'ami-dedicated', + TagSpecifications: [ + { + ResourceType: 'instance', + Tags: [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: 'scale-up-lambda' }, + { Key: 'ghr:Type', Value: 'Org' }, + { Key: 'ghr:Owner', Value: REPO_NAME }, + ], + }, + { + ResourceType: 'volume', + Tags: [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: 'scale-up-lambda' }, + { Key: 'ghr:Type', Value: 'Org' }, + { Key: 'ghr:Owner', Value: REPO_NAME }, + ], + }, + ], + }); + expect(mockSSMClient).toHaveReceivedCommandWith(GetParameterCommand, { + Name: 'my-ami-id-param', + }); + }); +}); diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 193c82d2e7..6200b1f669 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -5,6 +5,7 @@ import { DeleteTagsCommand, DescribeInstancesCommand, DescribeInstancesResult, + RunInstancesCommand, EC2Client, FleetLaunchTemplateOverridesRequest, Tag, @@ -152,6 +153,15 @@ export async function createRunner(runnerParameters: Runners.RunnerInputParamete const ec2Client = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION })); const amiIdOverride = await getAmiIdOverride(runnerParameters); + // EC2 Fleet (CreateFleet) does not support launching instances onto dedicated hosts + // for instance types like mac*.metal. Use RunInstances directly instead. + if (runnerParameters.useDedicatedHost) { + logger.info('Using RunInstances for dedicated host placement (CreateFleet does not support dedicated hosts).'); + const instances = await createInstancesWithRunInstances(runnerParameters, amiIdOverride, ec2Client); + logger.info(`Created instance(s) via RunInstances: ${instances.join(',')}`); + return instances; + } + const fleet: CreateFleetResult = await createInstances(runnerParameters, amiIdOverride, ec2Client); const instances: string[] = await processFleetResult(fleet, runnerParameters); @@ -288,6 +298,7 @@ async function createInstances( ], Type: 'instant', }); + logger.debug('CreateFleet request payload.', { payload: createFleetCommand.input }); fleet = await ec2Client.send(createFleetCommand); } catch (e) { logger.warn('Create fleet request failed.', { error: e as Error }); @@ -296,6 +307,68 @@ async function createInstances( return fleet; } +async function createInstancesWithRunInstances( + runnerParameters: Runners.RunnerInputParameters, + amiIdOverride: string | undefined, + ec2Client: EC2Client, +): Promise { + const tags = [ + { Key: 'ghr:Application', Value: 'github-action-runner' }, + { Key: 'ghr:created_by', Value: runnerParameters.numberOfRunners === 1 ? 'scale-up-lambda' : 'pool-lambda' }, + { Key: 'ghr:Type', Value: runnerParameters.runnerType }, + { Key: 'ghr:Owner', Value: runnerParameters.runnerOwner }, + ]; + + if (runnerParameters.tracingEnabled) { + const traceId = tracer.getRootXrayTraceId(); + tags.push({ Key: 'ghr:trace_id', Value: traceId! }); + } + + try { + if (runnerParameters.ec2instanceCriteria.targetCapacityType === 'spot') { + throw new Error( + 'Spot instances are not supported with RunInstances. Please set targetCapacityType to on-demand for dedicated hosts.', + ); + } + + const instanceType = runnerParameters.ec2instanceCriteria.instanceTypes[0] as _InstanceType; + const runInstancesCommand = new RunInstancesCommand({ + LaunchTemplate: { + LaunchTemplateName: runnerParameters.launchTemplateName, + Version: '$Default', + }, + InstanceType: instanceType, + MinCount: runnerParameters.numberOfRunners, + MaxCount: runnerParameters.numberOfRunners, + SubnetId: runnerParameters.subnets[0], + ...(amiIdOverride ? { ImageId: amiIdOverride } : {}), + TagSpecifications: [ + { + ResourceType: 'instance', + Tags: tags, + }, + { + ResourceType: 'volume', + Tags: tags, + }, + ], + }); + + logger.debug('RunInstances request payload.', { payload: runInstancesCommand.input }); + const result = await ec2Client.send(runInstancesCommand); + const instanceIds = result.Instances?.map((i) => i.InstanceId!).filter(Boolean) || []; + + if (instanceIds.length === 0) { + throw new Error('RunInstances returned no instances for dedicated host.'); + } + + return instanceIds; + } catch (e) { + logger.warn('RunInstances request failed for dedicated host.', { error: e as Error }); + throw e; + } +} + // If launchTime is undefined, this will return false export function bootTimeExceeded(ec2Runner: { launchTime?: Date }): boolean { const runnerBootTimeInMinutes = process.env.RUNNER_BOOT_TIME_IN_MINUTES; diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts index 8ac2c14489..270233704e 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts @@ -113,6 +113,7 @@ const EXPECTED_RUNNER_PARAMS: RunnerInputParameters = { tracingEnabled: false, onDemandFailoverOnError: [], scaleErrors: ['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded', 'TargetCapacityLimitExceededException'], + useDedicatedHost: false, source: 'scale-up-lambda', }; let expectedRunnerParams: RunnerInputParameters; @@ -2019,6 +2020,36 @@ describe('Retry mechanism tests', () => { }); }); +describe('useDedicatedHost', () => { + beforeEach(() => { + process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; + process.env.ENABLE_EPHEMERAL_RUNNERS = 'true'; + process.env.RUNNER_NAME_PREFIX = 'unit-test-'; + process.env.RUNNER_GROUP_NAME = 'Default'; + process.env.SSM_CONFIG_PATH = '/github-action-runners/default/runners/config'; + process.env.SSM_TOKEN_PATH = '/github-action-runners/default/runners/config'; + process.env.RUNNER_LABELS = 'label1,label2'; + }); + + it('defaults to false when USE_DEDICATED_HOST env var is not set', async () => { + delete process.env.USE_DEDICATED_HOST; + await scaleUpModule.scaleUp(TEST_DATA); + expect(createRunner).toHaveBeenCalledWith(expect.objectContaining({ useDedicatedHost: false })); + }); + + it('is true when USE_DEDICATED_HOST is "true"', async () => { + process.env.USE_DEDICATED_HOST = 'true'; + await scaleUpModule.scaleUp(TEST_DATA); + expect(createRunner).toHaveBeenCalledWith(expect.objectContaining({ useDedicatedHost: true })); + }); + + it('is false when USE_DEDICATED_HOST is "false"', async () => { + process.env.USE_DEDICATED_HOST = 'false'; + await scaleUpModule.scaleUp(TEST_DATA); + expect(createRunner).toHaveBeenCalledWith(expect.objectContaining({ useDedicatedHost: false })); + }); +}); + function defaultOctokitMockImpl() { mockOctokit.actions.getJobForWorkflowRun.mockImplementation(() => ({ data: { diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index 395c87e8f8..024299cde8 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -67,6 +67,7 @@ interface CreateEC2RunnerConfig { tracingEnabled?: boolean; onDemandFailoverOnError?: string[]; scaleErrors: string[]; + useDedicatedHost?: boolean; } function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfig, token: string) { @@ -321,6 +322,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami: "(Optional) AMI configuration for the action runner instances. This object allows you to specify all AMI-related settings in one place."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT configuration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
scale\_errors: "List of aws error codes that should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts"
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot instances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is available. In case you are upgrading from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the instances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the rate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami = optional(object({
filter = optional(map(list(string)), { state = ["available"] })
owners = optional(list(string), ["amazon"])
id_ssm_parameter_arn = optional(string, null)
kms_key_arn = optional(string, null)
}), null)
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
scale_errors = optional(list(string), [
"UnfulfillableCapacity",
"MaxSpotInstanceCountExceeded",
"TargetCapacityLimitExceededException",
"RequestLimitExceeded",
"ResourceLimitExceeded",
"MaxSpotInstanceCountExceeded",
"MaxSpotFleetRequestCountExceeded",
"InsufficientInstanceCapacity",
"InsufficientCapacityOnHost",
])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
cpu_options = optional(object({
core_count = number
threads_per_core = number
}), null)
placement = optional(object({
affinity = optional(string)
availability_zone = optional(string)
group_id = optional(string)
group_name = optional(string)
host_id = optional(string)
host_resource_group_arn = optional(string)
spread_domain = optional(string)
tenancy = optional(string)
partition_number = optional(number)
}), null)
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
log_class = optional(string, "STANDARD")
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux, osx, windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami: "(Optional) AMI configuration for the action runner instances. This object allows you to specify all AMI-related settings in one place."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT configuration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
scale\_errors: "List of aws error codes that should trigger retry during scale up. This list will replace the default errors defined in the variable `defaultScaleErrors` in https://github.com/github-aws-runners/terraform-aws-github-runner/blob/main/lambdas/functions/control-plane/src/aws/runners.ts"
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot instances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux, macOS Sequoia for osx, Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is available. In case you are upgrading from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
use\_dedicated\_host: "Experimental! Can be removed / changed without trigger a major release. Whether to use EC2 dedicated hosts for the runners. Needed for macos runners Note that using dedicated hosts can increase cost significantly."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the instances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the rate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami = optional(object({
filter = optional(map(list(string)), { state = ["available"] })
owners = optional(list(string), ["amazon"])
id_ssm_parameter_arn = optional(string, null)
kms_key_arn = optional(string, null)
}), null)
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
scale_errors = optional(list(string), [
"UnfulfillableCapacity",
"MaxSpotInstanceCountExceeded",
"TargetCapacityLimitExceededException",
"RequestLimitExceeded",
"ResourceLimitExceeded",
"MaxSpotInstanceCountExceeded",
"MaxSpotFleetRequestCountExceeded",
"InsufficientInstanceCapacity",
"InsufficientCapacityOnHost",
])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
cpu_options = optional(object({
core_count = number
threads_per_core = number
}), null)
placement = optional(object({
affinity = optional(string)
availability_zone = optional(string)
group_id = optional(string)
group_name = optional(string)
host_id = optional(string)
host_resource_group_arn = optional(string)
spread_domain = optional(string)
tenancy = optional(string)
partition_number = optional(number)
}), null)
license_specifications = optional(list(object({
license_configuration_arn = string
})), [])
use_dedicated_host = optional(bool, false)
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
log_class = optional(string, "STANDARD")
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [parameter\_store\_tags](#input\_parameter\_store\_tags) | Map of tags that will be added to all the SSM Parameter Store parameters created by the Lambda function. | `map(string)` | `{}` | no | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 59b6307aa0..38c5ffa98a 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -56,6 +56,8 @@ module "runners" { credit_specification = each.value.runner_config.credit_specification cpu_options = each.value.runner_config.cpu_options placement = each.value.runner_config.placement + license_specifications = each.value.runner_config.license_specifications + use_dedicated_host = each.value.runner_config.use_dedicated_host enable_runner_binaries_syncer = each.value.runner_config.enable_runner_binaries_syncer lambda_s3_bucket = var.lambda_s3_bucket diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index 613cf8b2ce..35ccefeb4b 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -147,6 +147,10 @@ variable "multi_runner_config" { tenancy = optional(string) partition_number = optional(number) }), null) + license_specifications = optional(list(object({ + license_configuration_arn = string + })), []) + use_dedicated_host = optional(bool, false) runner_log_files = optional(list(object({ log_group_name = string prefix_log_group = bool @@ -197,7 +201,7 @@ variable "multi_runner_config" { description = < [role\_path](#input\_role\_path) | The path that will be added to the role, if not set the environment name will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | | [runner\_architecture](#input\_runner\_architecture) | The platform architecture of the runner instance\_type. | `string` | `"x64"` | no | -| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux,windows). | `string` | `"linux"` | no | +| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux, osx, windows). | `string` | `"linux"` | no | | [s3\_logging\_bucket](#input\_s3\_logging\_bucket) | Bucket for action runner distribution bucket access logging. | `string` | `null` | no | | [s3\_logging\_bucket\_prefix](#input\_s3\_logging\_bucket\_prefix) | Bucket prefix for action runner distribution bucket access logging. | `string` | `null` | no | | [s3\_tags](#input\_s3\_tags) | Map of tags that will be added to the S3 bucket. Note these are additional tags to the default tags. | `map(string)` | `{}` | no | diff --git a/modules/runner-binaries-syncer/main.tf b/modules/runner-binaries-syncer/main.tf index c1cbf382af..2b1e212262 100644 --- a/modules/runner-binaries-syncer/main.tf +++ b/modules/runner-binaries-syncer/main.tf @@ -1,5 +1,5 @@ locals { - action_runner_distribution_object_key = "actions-runner-${var.runner_os}.${var.runner_os == "linux" ? "tar.gz" : "zip"}" + action_runner_distribution_object_key = "actions-runner-${var.runner_os}.${var.runner_os == "windows" ? "zip" : "tar.gz"}" } resource "aws_s3_bucket" "action_dist" { diff --git a/modules/runner-binaries-syncer/runner-binaries-syncer.tf b/modules/runner-binaries-syncer/runner-binaries-syncer.tf index 00b6e700f5..b893c70393 100644 --- a/modules/runner-binaries-syncer/runner-binaries-syncer.tf +++ b/modules/runner-binaries-syncer/runner-binaries-syncer.tf @@ -4,6 +4,7 @@ locals { gh_binary_os_label = { windows = "win", linux = "linux" + osx = "osx" } } diff --git a/modules/runner-binaries-syncer/variables.tf b/modules/runner-binaries-syncer/variables.tf index e274f043a2..b48e6da5fd 100644 --- a/modules/runner-binaries-syncer/variables.tf +++ b/modules/runner-binaries-syncer/variables.tf @@ -99,13 +99,13 @@ variable "role_path" { } variable "runner_os" { - description = "The EC2 Operating System type to use for action runner instances (linux,windows)." + description = "The EC2 Operating System type to use for action runner instances (linux, osx, windows)." type = string default = "linux" validation { - condition = contains(["linux", "windows"], var.runner_os) - error_message = "Valid values for runner_os are (linux, windows)." + condition = contains(["linux", "osx", "windows"], var.runner_os) + error_message = "Valid values for runner_os are (linux, osx, windows)." } } diff --git a/modules/runners/README.md b/modules/runners/README.md index 6a27276624..f44a5e75a3 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -168,7 +168,7 @@ yarn run dist | [instance\_max\_spot\_price](#input\_instance\_max\_spot\_price) | Max price price for spot instances per hour. This variable will be passed to the create fleet as max spot price for the fleet. | `string` | `null` | no | | [instance\_profile\_path](#input\_instance\_profile\_path) | The path that will be added to the instance\_profile, if not set the prefix will be used. | `string` | `null` | no | | [instance\_target\_capacity\_type](#input\_instance\_target\_capacity\_type) | Default lifecycle used runner instances, can be either `spot` or `on-demand`. | `string` | `"spot"` | no | -| [instance\_types](#input\_instance\_types) | List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win). | `list(string)` | `null` | no | +| [instance\_types](#input\_instance\_types) | List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux, macOS Sequoia for osx, Windows Server Core for win). | `list(string)` | `null` | no | | [job\_retry](#input\_job\_retry) | Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the instances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the rate limit of the GitHub app.

`enable`: Enable or disable the job retry feature.
`delay_in_seconds`: The delay in seconds before the job retry check lambda will check the job status.
`delay_backoff`: The backoff factor for the delay.
`lambda_memory_size`: Memory size limit in MB for the job retry check lambda.
'lambda\_reserved\_concurrent\_executions': Amount of reserved concurrent executions for the job retry check lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations.
`lambda_timeout`: Time out of the job retry check lambda in seconds.
`max_attempts`: The maximum number of attempts to retry the job. |
object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_reserved_concurrent_executions = optional(number, 1)

lambda_timeout = optional(number, 30)

max_attempts = optional(number, 1)
})
| `{}` | no | | [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. | `string` | `null` | no | @@ -185,6 +185,7 @@ yarn run dist | [lambda\_timeout\_scale\_down](#input\_lambda\_timeout\_scale\_down) | Time out for the scale down lambda in seconds. | `number` | `60` | no | | [lambda\_timeout\_scale\_up](#input\_lambda\_timeout\_scale\_up) | Time out for the scale up lambda in seconds. | `number` | `60` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | +| [license\_specifications](#input\_license\_specifications) | The license specifications for the instance. See https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template#license_specification for details. |
list(object({
license_configuration_arn = string
}))
| `[]` | no | | [log\_class](#input\_log\_class) | The log class of the CloudWatch log groups for the lambda functions. Valid values are `STANDARD` or `INFREQUENT_ACCESS`. | `string` | `"STANDARD"` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -216,7 +217,7 @@ yarn run dist | [runner\_labels](#input\_runner\_labels) | All the labels for the runners (GitHub) including the default one's(e.g: self-hosted, linux, x64, label1, label2). Separate each label by a comma | `list(string)` | n/a | yes | | [runner\_log\_files](#input\_runner\_log\_files) | (optional) List of logfiles to send to CloudWatch, will only be used if `enable_cloudwatch_agent` is set to true. Object description: `log_group_name`: Name of the log group, `prefix_log_group`: If true, the log group name will be prefixed with `/github-self-hosted-runners/`, `file_path`: path to the log file, `log_stream_name`: name of the log stream, `log_class`: The log class of the log group. Valid values are `STANDARD` or `INFREQUENT_ACCESS`. Defaults to `STANDARD`. |
list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
log_class = optional(string, "STANDARD")
}))
| `null` | no | | [runner\_name\_prefix](#input\_runner\_name\_prefix) | The prefix used for the GitHub runner name. The prefix will be used in the default start script to prefix the instance name when register the runner in GitHub. The value is available via an EC2 tag 'ghr:runner\_name\_prefix'. | `string` | `""` | no | -| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux,windows). | `string` | `"linux"` | no | +| [runner\_os](#input\_runner\_os) | The EC2 Operating System type to use for action runner instances (linux, osx, windows). | `string` | `"linux"` | no | | [runner\_run\_as](#input\_runner\_run\_as) | Run the GitHub actions agent as user. | `string` | `"ec2-user"` | no | | [runners\_lambda\_s3\_key](#input\_runners\_lambda\_s3\_key) | S3 key for runners lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [runners\_lambda\_s3\_object\_version](#input\_runners\_lambda\_s3\_object\_version) | S3 object version for runners lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | @@ -231,6 +232,7 @@ yarn run dist | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name. | `map(string)` | `{}` | no | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | +| [use\_dedicated\_host](#input\_use\_dedicated\_host) | Experimental! Can be removed / changed without trigger a major release. Whether to use EC2 dedicated hosts for the runners. Needed for macos runners Note that using dedicated hosts can increase cost significantly. | `bool` | `false` | no | | [user\_agent](#input\_user\_agent) | User agent used for API calls. | `string` | `null` | no | | [userdata\_content](#input\_userdata\_content) | Alternative user-data content, replacing the templated one. By providing your own user\_data you have to take care of installing all required software, including the action runner and registering the runner. Be-aware configuration parameters in SSM as well as tags are treated as internals. Changes will not trigger a breaking release. | `string` | `null` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | User-data script snippet to insert after GitHub action runner install | `string` | `""` | no | diff --git a/modules/runners/main.tf b/modules/runners/main.tf index 9a85a2f2c3..3cc259a732 100644 --- a/modules/runners/main.tf +++ b/modules/runners/main.tf @@ -20,21 +20,25 @@ locals { default_ami = { "windows" = { name = ["Windows_Server-2022-English-Full-ECS_Optimized-*"] } "linux" = var.runner_architecture == "arm64" ? { name = ["al2023-ami-2023.*-kernel-6.*-arm64"] } : { name = ["al2023-ami-2023.*-kernel-6.*-x86_64"] } + "osx" = var.runner_architecture == "arm64" ? { name = ["amzn-ec2-macos-15.*-arm64"] } : { name = ["amzn-ec2-macos-15.*"] } } default_userdata_template = { "windows" = "${path.module}/templates/user-data.ps1" "linux" = "${path.module}/templates/user-data.sh" + "osx" = "${path.module}/templates/user-data-osx.sh" } userdata_install_runner = { "windows" = "${path.module}/templates/install-runner.ps1" "linux" = "${path.module}/templates/install-runner.sh" + "osx" = "${path.module}/templates/install-runner-osx.sh" } userdata_start_runner = { "windows" = "${path.module}/templates/start-runner.ps1" "linux" = "${path.module}/templates/start-runner.sh" + "osx" = "${path.module}/templates/start-runner-osx.sh" } # Handle AMI configuration @@ -78,6 +82,13 @@ locals { enable_cloudwatch_agent = var.enable_cloudwatch_agent ssm_key_cloudwatch_agent_config = var.enable_cloudwatch_agent ? aws_ssm_parameter.cloudwatch_agent_config_runner[0].name : "" }) : var.userdata_content) : "" + + encoded_user_data = ( + var.runner_os == "linux" ? base64gzip(local.user_data) : + var.runner_os == "windows" ? base64encode(local.user_data) : + var.runner_os == "osx" ? base64encode(local.user_data) : + null + ) } data "aws_ami" "runner" { @@ -186,6 +197,13 @@ resource "aws_launch_template" "runner" { } } + dynamic "license_specification" { + for_each = var.license_specifications + content { + license_configuration_arn = license_specification.value.license_configuration_arn + } + } + monitoring { enabled = var.enable_runner_detailed_monitoring } @@ -267,7 +285,7 @@ resource "aws_launch_template" "runner" { ) } - user_data = var.runner_os == "windows" ? base64encode(local.user_data) : base64gzip(local.user_data) + user_data = local.encoded_user_data tags = local.tags diff --git a/modules/runners/pool.tf b/modules/runners/pool.tf index 53c5d1c2cd..3434829018 100644 --- a/modules/runners/pool.tf +++ b/modules/runners/pool.tf @@ -52,6 +52,7 @@ module "pool" { name_prefix = var.runner_name_prefix pool_owner = var.pool_runner_owner role = aws_iam_role.runner + use_dedicated_host = var.use_dedicated_host } subnet_ids = var.subnet_ids ssm_token_path = "${var.ssm_paths.root}/${var.ssm_paths.tokens}" diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md index a09538aced..560ad7f763 100644 --- a/modules/runners/pool/README.md +++ b/modules/runners/pool/README.md @@ -49,7 +49,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [aws\_partition](#input\_aws\_partition) | (optional) partition for the arn if not 'aws' | `string` | `"aws"` | no | -| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
log_class = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
parameter_store_tags = string
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
scale_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owner = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
ami_id_ssm_parameter_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | +| [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
log_class = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
memory_size = number
timeout = number
zip = string
subnet_ids = list(string)
parameter_store_tags = string
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
enable_on_demand_failover_for_errors = list(string)
scale_errors = list(string)
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owner = string
role = object({
arn = string
})
use_dedicated_host = bool
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
schedule_expression = string
schedule_expression_timezone = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
ami_id_ssm_parameter_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
lambda_tags = map(string)
user_agent = string
})
| n/a | yes | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf index 5363f3c3fb..8f70713f9e 100644 --- a/modules/runners/pool/main.tf +++ b/modules/runners/pool/main.tf @@ -49,6 +49,7 @@ resource "aws_lambda_function" "pool" { ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS = jsonencode(var.config.runner.enable_on_demand_failover_for_errors) SSM_PARAMETER_STORE_TAGS = var.config.lambda.parameter_store_tags SCALE_ERRORS = jsonencode(var.config.runner.scale_errors) + USE_DEDICATED_HOST = var.config.runner.use_dedicated_host } } diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf index 4bfdd68010..a613c5563b 100644 --- a/modules/runners/pool/variables.tf +++ b/modules/runners/pool/variables.tf @@ -46,6 +46,7 @@ variable "config" { role = object({ arn = string }) + use_dedicated_host = bool }) instance_types = list(string) instance_target_capacity_type = string diff --git a/modules/runners/scale-down-state-diagram.md b/modules/runners/scale-down-state-diagram.md index b4f260eb2a..64e32bc141 100644 --- a/modules/runners/scale-down-state-diagram.md +++ b/modules/runners/scale-down-state-diagram.md @@ -117,7 +117,7 @@ stateDiagram-v2 note right of CheckMinimumTime Minimum running time in minutes - (Linux: 5min, Windows: 15min) + (Linux: 5min, Windows: 15min, OSX: 20min) end note note right of CheckBootTime @@ -145,6 +145,6 @@ stateDiagram-v2 ## Configuration Parameters - **Cron Schedule**: `cron(*/5 * * * ? *)` (every 5 minutes) -- **Minimum Runtime**: Linux 5min, Windows 15min +- **Minimum Runtime**: Linux 5min, Windows 15min, OSX 20min - **Boot Timeout**: Configurable via `runner_boot_time_in_minutes` - **Idle Config**: Per-environment configuration for desired idle runners diff --git a/modules/runners/scale-down.tf b/modules/runners/scale-down.tf index b304e8066e..449d1970ed 100644 --- a/modules/runners/scale-down.tf +++ b/modules/runners/scale-down.tf @@ -1,8 +1,11 @@ locals { # Windows Runners can take their sweet time to do anything + # For an AWS vended AMI with an x86 Mac instance or an Apple silicon Mac instance, + # the launch time can range from approximately 6 minutes to 20 minutes. min_runtime_defaults = { "windows" = 15 "linux" = 5 + "osx" = 20 } } resource "aws_lambda_function" "scale_down" { diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index c5503f6394..f729a2a613 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -62,6 +62,7 @@ resource "aws_lambda_function" "scale_up" { ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS = jsonencode(var.enable_on_demand_failover_for_errors) SCALE_ERRORS = jsonencode(var.scale_errors) JOB_RETRY_CONFIG = jsonencode(local.job_retry_config) + USE_DEDICATED_HOST = var.use_dedicated_host } } diff --git a/modules/runners/templates/install-runner-osx.sh b/modules/runners/templates/install-runner-osx.sh new file mode 100644 index 0000000000..73abce4500 --- /dev/null +++ b/modules/runners/templates/install-runner-osx.sh @@ -0,0 +1,59 @@ +# shellcheck shell=bash + +## install the runner (macOS) + +s3_location=${S3_LOCATION_RUNNER_DISTRIBUTION} +architecture=${RUNNER_ARCHITECTURE} + +if [ -z "$RUNNER_TARBALL_URL" ] && [ -z "$s3_location" ]; then + echo "Neither RUNNER_TARBALL_URL or s3_location are set" + exit 1 +fi + +file_name="actions-runner.tar.gz" + +echo "Setting up GH Actions runner tool cache" +mkdir -p /Users/runner/hostedtoolcache + +echo "Creating actions-runner directory for the GH Action installation" +sudo mkdir -p /opt/actions-runner +cd /opt/actions-runner || exit 1 + +if [[ -n "$RUNNER_TARBALL_URL" ]]; then + echo "Downloading the GH Action runner from $RUNNER_TARBALL_URL to $file_name" + curl -s -o "$file_name" -L "$RUNNER_TARBALL_URL" +else + echo "Retrieving REGION from AWS API" + token="$(curl -s -f -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 180")" + + region="$(curl -s -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .region)" + echo "Retrieved REGION from AWS API ($region)" + + echo "Downloading the GH Action runner from s3 bucket $s3_location" + aws s3 cp "$s3_location" "$file_name" --region "$region" --no-progress +fi + +echo "Un-tar action runner" +tar xzf "./$file_name" +echo "Delete tar file" +rm -rf "$file_name" + +os_name=$(sw_vers -productName 2>/dev/null || echo "macOS") +os_version=$(sw_vers -productVersion 2>/dev/null || echo "unknown") +arch_name=$(uname -m) + +echo "OS: $os_name $os_version ($arch_name)" + +if ! command -v brew >/dev/null 2>&1; then + echo "Homebrew not found; skipping dependency installation via brew" +else + echo "Homebrew detected; install any macOS-specific dependencies here if needed" + # Example: brew install jq awscli +fi + +echo "Set file ownership of action runner" +sudo chown -R "$user_name":staff /opt/actions-runner +sudo chmod 755 "/Users/runner" +sudo chown -R "$user_name":staff /Users/runner/hostedtoolcache diff --git a/modules/runners/templates/install-runner.ps1 b/modules/runners/templates/install-runner.ps1 index 4219773ad8..a13f91a65b 100644 --- a/modules/runners/templates/install-runner.ps1 +++ b/modules/runners/templates/install-runner.ps1 @@ -11,4 +11,3 @@ Expand-Archive -Path actions-runner.zip -DestinationPath . Write-Host "Delete zip file" Remove-Item actions-runner.zip - diff --git a/modules/runners/templates/start-runner-osx.sh b/modules/runners/templates/start-runner-osx.sh new file mode 100644 index 0000000000..c3ea08af20 --- /dev/null +++ b/modules/runners/templates/start-runner-osx.sh @@ -0,0 +1,194 @@ +#!/bin/bash + +set -euo pipefail + +# macOS variant of start-runner.sh + +tag_instance_with_runner_id() { + echo "Checking for .runner file to extract agent ID" + + if [[ ! -f "/opt/actions-runner/.runner" ]]; then + echo "Warning: .runner file not found" + return 0 + fi + + echo "Found .runner file, extracting agent ID" + local agent_id + agent_id=$(jq -r '.agentId' /opt/actions-runner/.runner 2>/dev/null || echo "") + + if [[ -z "$agent_id" || "$agent_id" == "null" ]]; then + echo "Warning: Could not extract agent ID from .runner file" + return 0 + fi + + echo "Tagging instance with GitHub runner agent ID: $agent_id" + if aws ec2 create-tags \ + --region "$region" \ + --resources "$instance_id" \ + --tags Key=ghr:github_runner_id,Value="$agent_id"; then + echo "Successfully tagged instance with agent ID: $agent_id" + return 0 + else + echo "Warning: Failed to tag instance with agent ID" + return 0 + fi +} + +cleanup() { + local exit_code="$1" + + if [ "$exit_code" -ne 0 ]; then + echo "ERROR: runner-start-failed with exit code $exit_code" + fi + + if [ "$agent_mode" = "ephemeral" ] || [ "$exit_code" -ne 0 ]; then + echo "Terminating instance" + aws ec2 terminate-instances \ + --instance-ids "$instance_id" \ + --region "$region" || true + fi +} + +trap 'cleanup $?' EXIT + +echo "Retrieving TOKEN from AWS API" +token=$(curl -f -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 180" || true) +if [ -z "$token" ]; then + retrycount=0 + until [ -n "$token" ]; do + echo "Failed to retrieve token. Retrying in 5 seconds." + sleep 5 + token=$(curl -f -X PUT "http://169.254.169.254/latest/api/token" \ + -H "X-aws-ec2-metadata-token-ttl-seconds: 180" || true) + retrycount=$((retrycount + 1)) + if [ $retrycount -gt 40 ]; then + break + fi + done +fi + +region=$(curl -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .region) +echo "Retrieved REGION from AWS API ($region)" + +instance_id=$(curl -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/meta-data/instance-id) +echo "Retrieved INSTANCE_ID from AWS API ($instance_id)" + +availability_zone=$(curl -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/meta-data/placement/availability-zone) + +environment=$(curl -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/meta-data/tags/instance/ghr:environment || echo "") +ssm_config_path=$(curl -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/meta-data/tags/instance/ghr:ssm_config_path || echo "") +runner_name_prefix=$(curl -f -H "X-aws-ec2-metadata-token: $token" \ + http://169.254.169.254/latest/meta-data/tags/instance/ghr:runner_name_prefix || echo "") + +echo "Retrieved ghr:environment tag - ($environment)" +echo "Retrieved ghr:ssm_config_path tag - ($ssm_config_path)" +echo "Retrieved ghr:runner_name_prefix tag - ($runner_name_prefix)" + +parameters=$(aws ssm get-parameters-by-path \ + --path "$ssm_config_path" \ + --region "$region" \ + --query "Parameters[*].{Name:Name,Value:Value}") +echo "Retrieved parameters from AWS SSM ($parameters)" + +run_as=$(echo "$parameters" | jq -r '.[] | select(.Name == "'$ssm_config_path'/run_as") | .Value') +echo "Retrieved /$ssm_config_path/run_as parameter - ($run_as)" + +agent_mode=$(echo "$parameters" | jq -r '.[] | select(.Name == "'$ssm_config_path'/agent_mode") | .Value') +echo "Retrieved /$ssm_config_path/agent_mode parameter - ($agent_mode)" + +disable_default_labels=$(echo "$parameters" | jq -r '.[] | select(.Name == "'$ssm_config_path'/disable_default_labels") | .Value') +echo "Retrieved /$ssm_config_path/disable_default_labels parameter - ($disable_default_labels)" + +enable_jit_config=$(echo "$parameters" | jq -r '.[] | select(.Name == "'$ssm_config_path'/enable_jit_config") | .Value') +echo "Retrieved /$ssm_config_path/enable_jit_config parameter - ($enable_jit_config)" + +token_path=$(echo "$parameters" | jq -r '.[] | select(.Name == "'$ssm_config_path'/token_path") | .Value') +echo "Retrieved /$ssm_config_path/token_path parameter - ($token_path)" + +echo "Get GH Runner config from AWS SSM" +config=$(aws ssm get-parameter \ + --name "$token_path"/"$instance_id" \ + --with-decryption \ + --region "$region" | jq -r ".Parameter | .Value") + +while [[ -z "$config" ]]; do + echo "Waiting for GH Runner config to become available in AWS SSM" + sleep 1 + config=$(aws ssm get-parameter \ + --name "$token_path"/"$instance_id" \ + --with-decryption \ + --region "$region" | jq -r ".Parameter | .Value") +done + +echo "Delete GH Runner token from AWS SSM" +aws ssm delete-parameter --name "$token_path"/"$instance_id" --region "$region" + +if [ -z "$run_as" ]; then + echo "No user specified, using default ec2-user account" + run_as="ec2-user" +fi + +if [[ "$run_as" == "root" ]]; then + echo "run_as is set to root - export RUNNER_ALLOW_RUNASROOT=1" + export RUNNER_ALLOW_RUNASROOT=1 +fi + +sudo chown -R "$run_as" /opt/actions-runner + +info_arch=$(uname -m) +info_os=$(sw_vers -productName 2>/dev/null || echo "macOS") +info_ver=$(sw_vers -productVersion 2>/dev/null || echo "unknown") + +tee /opt/actions-runner/.setup_info </dev/null 2>&1; then + echo "Homebrew detected; you can install extra dependencies via brew if needed" +fi + +user_name=ec2-user + +${install_runner} + +${post_install} + +# Register runner job hooks +# Ref: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job +%{ if hook_job_started != "" } +cat > /opt/actions-runner/hook_job_started.sh <<'EOF' +${hook_job_started} +EOF +echo ACTIONS_RUNNER_HOOK_JOB_STARTED=/opt/actions-runner/hook_job_started.sh | tee -a /opt/actions-runner/.env +%{ endif } + +%{ if hook_job_completed != "" } +cat > /opt/actions-runner/hook_job_completed.sh <<'EOF' +${hook_job_completed} +EOF +echo ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/opt/actions-runner/hook_job_completed.sh | tee -a /opt/actions-runner/.env +%{ endif } + +${start_runner} diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index e2a33280b9..18e04fd884 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -119,18 +119,18 @@ variable "instance_max_spot_price" { } variable "runner_os" { - description = "The EC2 Operating System type to use for action runner instances (linux,windows)." + description = "The EC2 Operating System type to use for action runner instances (linux, osx, windows)." type = string default = "linux" validation { - condition = contains(["linux", "windows"], var.runner_os) - error_message = "Valid values for runner_os are (linux, windows)." + condition = contains(["linux", "osx", "windows"], var.runner_os) + error_message = "Valid values for runner_os are (linux, osx, windows)." } } variable "instance_types" { - description = "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux and Windows Server Core for win)." + description = "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux, macOS Sequoia for osx, Windows Server Core for win)." type = list(string) default = null } @@ -671,6 +671,14 @@ variable "placement" { default = null } +variable "license_specifications" { + description = "The license specifications for the instance. See https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template#license_specification for details." + type = list(object({ + license_configuration_arn = string + })) + default = [] +} + variable "enable_jit_config" { description = "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is available. In case you are upgrading from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI." type = bool @@ -812,3 +820,9 @@ variable "parameter_store_tags" { type = map(string) default = {} } + +variable "use_dedicated_host" { + description = "Experimental! Can be removed / changed without trigger a major release. Whether to use EC2 dedicated hosts for the runners. Needed for macos runners Note that using dedicated hosts can increase cost significantly." + type = bool + default = false +} diff --git a/variables.tf b/variables.tf index d739e916fb..f7d37de1b8 100644 --- a/variables.tf +++ b/variables.tf @@ -570,7 +570,7 @@ variable "instance_max_spot_price" { } variable "instance_types" { - description = "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux and Windows Server Core for win)." + description = "List of instance types for the action runner. Defaults are based on runner_os (al2023 for linux, macOS Sequoia for osx, Windows Server Core for win)." type = list(string) default = ["m5.large", "c5.large"] } @@ -686,13 +686,13 @@ variable "enable_managed_runner_security_group" { } variable "runner_os" { - description = "The EC2 Operating System type to use for action runner instances (linux,windows)." + description = "The EC2 Operating System type to use for action runner instances (linux, osx, windows)." type = string default = "linux" validation { - condition = contains(["linux", "windows"], var.runner_os) - error_message = "Valid values for runner_os are (linux, windows)." + condition = contains(["linux", "osx", "windows"], var.runner_os) + error_message = "Valid values for runner_os are (linux, osx, windows)." } } @@ -902,6 +902,14 @@ variable "runner_placement" { default = null } +variable "runner_license_specifications" { + description = "The license specifications for the instance. See https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/launch_template#license_specification for details." + type = list(object({ + license_configuration_arn = string + })) + default = [] +} + variable "enable_jit_config" { description = "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is available. In case you are upgrading from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI." type = bool @@ -1058,3 +1066,9 @@ variable "parameter_store_tags" { type = map(string) default = {} } + +variable "use_dedicated_host" { + description = "Use a dedicated host for the runner instances." + type = bool + default = false +}