Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
[![CI for specs](https://github.com/WebAssembly/spec/actions/workflows/ci-spec.yml/badge.svg)](https://github.com/WebAssembly/spec/actions/workflows/ci-spec.yml)
[![CI for interpreter & tests](https://github.com/WebAssembly/spec/actions/workflows/ci-interpreter.yml/badge.svg)](https://github.com/WebAssembly/spec/actions/workflows/ci-interpreter.yml)

# JIT interface Proposal for WebAssembly

This repository is a clone of [`WebAssembly/spec`](https://github.com/WebAssembly/spec/). It is meant for discussion, prototype specification, and implementation of a proposal to add support for dynamically-generating new code as a core feature.

See the [explainer](proposals/jit-interface/Explainer.md) for a high-level summary of the proposal.

# spec

Expand Down
2 changes: 2 additions & 0 deletions interpreter/binary/encode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ struct
| ExternConvert Internalize -> op 0xfb; op 0x1a
| ExternConvert Externalize -> op 0xfb; op 0x1b

| FuncNew (x, y, z) -> op 0xfb; op 0x27; byte 0x00; idx x; idx y; idx z

| Const {it = I32 c; _} -> op 0x41; s32 c
| Const {it = I64 c; _} -> op 0x42; s64 c
| Const {it = F32 c; _} -> op 0x43; f32 c
Expand Down
2 changes: 2 additions & 0 deletions interpreter/syntax/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ type localidx = idx
type labelidx = idx
type fieldidx = I32.t
type laneidx = I8.t
type scopeidx = idx

type num = Value.num Source.phrase
type vec = Value.vec Source.phrase
Expand Down Expand Up @@ -240,6 +241,7 @@ and instr' =
| ArrayInitData of typeidx * dataidx (* fill array from data segment *)
| ArrayInitElem of typeidx * elemidx (* fill array from elem segment *)
| ExternConvert of externop (* extern conversion *)
| FuncNew of memoryidx * typeidx * scopeidx (* create new function *)
| Const of num (* numeric constant *)
| Test of testop (* numeric test *)
| Compare of relop (* numeric comparison *)
Expand Down
1 change: 1 addition & 0 deletions interpreter/syntax/free.ml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ let rec instr (e : instr) =
| ArrayInitData (x, y) -> types (idx x) ++ datas (idx y)
| ArrayInitElem (x, y) -> types (idx x) ++ elems (idx y)
| ExternConvert _ -> empty
| FuncNew (_, y, _) -> types (idx y)
| Const _ | Test _ | Compare _ | Unary _ | Binary _ | Convert _ -> empty
| VecConst _ | VecTest _
| VecUnary _ | VecBinary _ | VecTernary _ | VecCompare _
Expand Down
2 changes: 2 additions & 0 deletions interpreter/syntax/mnemonics.ml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ let array_init_elem x y = ArrayInitElem (x, y)
let any_convert_extern = ExternConvert Internalize
let extern_convert_any = ExternConvert Externalize

let func_new x y z = FuncNew (x, y, z)

let i32_clz = Unary (I32 I32Op.Clz)
let i32_ctz = Unary (I32 I32Op.Ctz)
let i32_popcnt = Unary (I32 I32Op.Popcnt)
Expand Down
1 change: 1 addition & 0 deletions interpreter/text/arrange.ml
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ let rec instr e =
| ArrayInitData (x, y) -> "array.init_data " ^ idx x ^ " " ^ idx y, []
| ArrayInitElem (x, y) -> "array.init_elem " ^ idx x ^ " " ^ idx y, []
| ExternConvert op -> externop op, []
| FuncNew (x, y, z) -> "func.new" ^ idx x ^ " " ^ idx y ^ " " ^ idx z, []
| Const n -> constop n.it ^ " " ^ num n, []
| Test op -> testop op, []
| Compare op -> relop op, []
Expand Down
2 changes: 2 additions & 0 deletions interpreter/text/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ rule token = parse
| "any.convert_extern" -> EXTERN_CONVERT any_convert_extern
| "extern.convert_any" -> EXTERN_CONVERT extern_convert_any

| "func.new" -> FUNC_NEW func_new

| "i32.clz" -> UNARY i32_clz
| "i32.ctz" -> UNARY i32_ctz
| "i32.popcnt" -> UNARY i32_popcnt
Expand Down
1 change: 1 addition & 0 deletions interpreter/text/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ let parse_annots (m : module_) : Custom.section list =
%token<Ast.instr'> I31_GET
%token<Ast.idx -> Ast.instr'> STRUCT_NEW ARRAY_NEW ARRAY_GET
%token STRUCT_SET
%token<Ast.idx -> Ast.idx -> Ast.idx -> Ast.instr'> FUNC_NEW
%token<Ast.idx -> int32 -> Ast.instr'> STRUCT_GET
%token ARRAY_NEW_FIXED ARRAY_NEW_ELEM ARRAY_NEW_DATA
%token ARRAY_SET ARRAY_LEN
Expand Down
6 changes: 6 additions & 0 deletions interpreter/valid/valid.ml
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,12 @@ let rec check_instr (c : context) (e : instr) (s : infer_resulttype) : infer_ins
let (nul, _ht) = peek_ref 0 s e.at in
[RefT (nul, ht1)] --> [RefT (nul, ht2)], []

| FuncNew (x, y, z) -> (* TODO: validate scope index z *)
let MemoryT (at, _lim) = memory c x in
let _ft = func_type c y in
[NumT (numtype_of_addrtype at);
NumT (numtype_of_addrtype at)] --> [RefT (Null, UseHT (Def (type_ c y)))], []

| Const v ->
let t = NumT (type_num v.it) in
[] --> [t], []
Expand Down
127 changes: 127 additions & 0 deletions proposals/jit-interface/Explainer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# JIT interface

This proposal adds safe, programmably-constrained dynamic code generation to WebAssembly.
We propose a new `func.new` instruction and a new kind of section called a "scope", which declares the elements of the surrounding module that new code may legally access.

# Problem

Core WebAssembly has no mechanism for dynamically generating code.
Yet generating new code at runtime is important in a number of important use cases, such as guest language virtual machines that otherwise must use relatively slow interpretation.

### Harvard architecture, no JIT

Core WebAssembly's separates code (functions) and data (memories, tables, and managed data) as a fundamental design principle.
This important property of Wasm allows efficient validation of all code in a module and establishes other important security properties.
Further, this restriction allows static analysis of modules; state declared in a module but not exported admits sound closed-world optimizations, which is common in tooling today.

### Guest runtimes want to JIT new code

However, some programs running on top of Wasm, such as guest language runtimes like Python or Lua, and hardware emulators, such as QEMU, can benefit tremendously by generating specialized code at runtime.
The performance benefit of dynamic code generation can be extreme; as much as *10 to 100 times* faster than running in interpreted mode. Today, these runtimes can run *only* in interpreted mode and are prohibitively slow.

### Inconsistent host support for new modules

To date, generating new Wasm code has been an optional host capability provided by the embedder.
On the web, this is done through JavaScript APIs for creating new modules directly from bytes.
The [wasm-c-api](https://github.com/WebAssembly/wasm-c-api) allows embedding a Wasm engine in a C/C++ program and creating new modules dynamically.
Other host platforms may offer the ability to create new modules from bytes.
Generally, the granularity of generation is Wasm modules. Some experiments have been done with function-at-a-time mechanisms, but no standard mechanism has been proposed and nothing is portable across platforms.

Today, guest runtimes that generate new code are few, and they make a number of compromises, such as batching generated functions, in order to workaround limitations and cost of new modules on host platforms.

# Proposing a lightweight core-Wasm mechanism

To address this problem, we propose to add a new mechanism to core Wasm. Doing so exposes the mechanism to security features/mitigations, makes behavior explicit and documented, allows sound toolchain analysis and transformations, and exposes it to engine optimizations.

## A new instruction: `func.new`

The main component of this idea is a new core Wasm bytecode, `func.new`, which creates a new function at runtime from bytecode stored in Wasm memory.

```
func.new $mt $ft $scope: [at at] -> [(ref $ft)]
where:
- $mt = memory at limits
- $ft = func [t1*] -> [t2*]
- $scope = export*

```

This bytecode has immediates:
- a memory index, indicating the memory which contains the code for the new function
- a function type, indicating the signature of the new function
- a scope, indicating a list of functions, tables, globals, tags, and memories that new code may legally access

At runtime, this instruction takes operands:
- start, an integer indicating the start offset within the memory of the code
- length, an integer indicating the length of the code

The memory index and function type are straightforward.
The `$scope` immediate is an explicit enumeration of the module contents the new code may reference.
In particular, `$scope` produces a new index space for types, globals, functions, memories, etc. For compactness of the bytecode, and to reuse the same scope for multiple different `func.new` instructions, `$scope` will be factored out to its own section rather than inline as immediates.

To execute the instruction, the engine first copies the bytes of code from the Wasm memory, then validates the bytecode under a module context corresponding to `$scope` and a function context corresponding to the expected function signature `$ft`.
Upon out of bounds memory access or validation error, the instruction traps.
If validation of the code is successful, the engine creates an internal representation of a new function and pushes a non-null reference to it onto the operand stack.
The function's *store* is derived from the store under which the instruction was executed; i.e. the new function's instance is (a subset of) the caller's instance.

## A new section: JIT scope

A scope defines what declarations are accessible to new code passed to a `func.new` instruction.
We introduce a new "scope" section to factor out the list of accessible declarations so that `func.new` may refer to a scope by index rather than listing declarations individually. A scope section allows declaring multiple scopes to allow different `func.new` instructions different accessibility.
A scope consists of a list of declarations (similar to export declarations, but without names).
The order of declarations in a scope creates new index spaces, i.e. it *renumbers* the declarations from the surrounding module, starting each respective index space over from 0.

## An example

Putting the pieces together, we can write an example that uses a memory and a scope and creates new functions at runtime.

```
(module
(type $t1 (func)) ;; the type for new functions
(func $f1 ...)
(func $f2 ...)
(func $f3 ...)
(memory $m1 1 1) ;; the memory used to temporarily store code for func.new
(memory $m2 1 1) ;; a memory accessible to new code

(scope $s1 ;; the scope a new function may use
(func $f1 $f2) ;; expose $f1 and $f2 to new code
(memory $m2)) ;; expose only $m2 to new code

(func $gen
(local $n (ref $t1)) ;; a variable to hold the new funcref
...
(local.set $n
(func.new $m1 $t1 $s1 ;; code lives in $m1, result sig is $t1, scope is $s1
(i32.const 1024) (i32.const 10))) ;; code is stored at address 1024 and is 10 bytes long
...
(call_ref $t1 (local.get $n)) ;; call the new function!!
)
)
```

### Sound static analysis

Sound static analysis of Wasm code is vital for offline transformations such as `wasm-opt`.
This proposal preserves reasoning about a module's internals and transformations of them by making its interactions with runtime-generated code explicit.
Since runtime generated code can only access the declarations explicitly provided by its scope, analysis can soundly treat declarations mentioned in scopes similarly to exported functions.
However, unlike exported functions, scopes cannot be accessed outside a module.
Thus a module's public interface is not polluted with its internal use of dynamically-generated code.

### Toolchain transformations

Sound static analysis implies that toolchains making closed-world assumptions for optimization (e.g. dead-code elimination) can account for all potential future uses by dynamically-generated code by simply considering scopes.
Reorganization of modules resulting from DCE is sound, as scopes can be rewritten for updated indices.
Since dynamically-generated code must use scoped indices, it is not affected by the renumbering of the containing module.
Thus DCE and other aggressive transformations can be made sound and not affect dynamically-generated code.

### Engine optimizations

The possibility of dynamically-generated code implies that an engine has runtime capability to parse function bodies and perform code validation. However, the `func.new` bytecode doesn't require parsing and validating sections, so the runtime system doesn't need a fully-featured Wasm module parser.

New code implies the need for at least one execution tier, such as interpreter or compiler.
This implies AOT scenarios need to either disable the feature, e.g. reject code memories, trap on `func.new`, or integrate a new tier. Note that since a scope defines a module context, the execution tier only needs to support runtime features that could be legally used under that context--basically, no memories implies no load/store instructions, no GC types implies no GC instructions.

In the future, we could consider additional restrictions, such as an explicit list of allowable bytecodes for new functions. That could allow a module to limit language features for new functions and greatly reduce the runtime system requirement. Since this is a restriction a module imposes on itself, it need not be standardized. As an example of the usefulness of this, a module could limit its `func.new` capabilities to only allow `i32` arithmetic and control flow. An AOT implementation could then provide a runtime execution tier that *only* supported that restricted set, keeping it both simple and optimized for the use case.