diff --git a/.gitignore b/.gitignore index 01b8a80..daeef79 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ __pycache__/ /out /.vscode build.ninja +objdiff.json +.ninja_* diff --git a/README.md b/README.md index 8cf5426..129b3f0 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,25 @@ # Decomp Training -This repository builds a DOL with CodeWarrior for the purpose of loading it into analysis software. It is intended to be compiled with minimal runtime, and is not intended to play back on console. +Welcome to decomp! + +This series of (currently WIP) example files is designed to teach you the basics of decompilation for the Gamecube and Wii. It is recommended to read [this](https://wiki.decomp.dev/en/resources/decomp-intro) accompanying article on the wiki before getting started. + +To get started, follow the [build](#building) steps and download [objdiff](https://github.com/encounter/objdiff). After building, set the active project in objdiff by selecting your project directory in File -> Project -> Project directory. All lessons are contained in [src/training_template](src/training_template). Avoid looking at files in `src/training_answers` to prevent accidentally getting spoiled on the solutions. + +## Technical information + +This repository builds a DOL with CodeWarrior for the purpose of loading it into analysis software. It is intended to be compiled with minimal runtime, and is not intended to play back on console. The build script is fairly simple for now, but more features could be added in the future to support things like relocatables or splitting the executable with dtk to demonstrate linking behavior. + +## Planned content: + +see [here](https://github.com/thefoxcam/decomp-training/wiki) ## Dependencies - ninja 1.3 - Python 3.6 -- [dtk](https://github.com/encounter/decomp-toolkit) -- CodeWarrior v3.0a5.2 -Extract dtk to `build`, and extract CodeWarrior to `build/compiler`. +The configure script will pull all other dependencies. ## Building diff --git a/configure.py b/configure.py index 25545d6..01eca01 100644 --- a/configure.py +++ b/configure.py @@ -2,8 +2,41 @@ import glob import io import os -from vendor.ninja_syntax import Writer +import json +import sys +import platform +from tools.ninja_syntax import Writer, serialize_path +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast +binutils_tag = "2.42-1" +compilers_tag = "20250513" +dtk_tag = "v1.0.0" # currently unused? +objdiff_tag = "v2.7.1" +sjiswrap_tag = "v1.1.1" +wibo_tag = "0.6.11" + +linker_version = "GC/3.0a5.2" + +target_src_dir = "training_answers" +base_src_dir = "training_template" + +tools_dir = Path("tools") +build_dir = Path("build") +out_dir = "build" + +target_build_dir = os.path.join(build_dir, "src", "target") +target_out_dir = os.path.join(out_dir, "src", "target") +base_build_dir = os.path.join(build_dir, "src", "base") +base_out_dir = os.path.join(out_dir, "src", "base") + +def is_windows() -> bool: + return os.name == "nt" + +# On Windows, we need this to use && in commands +CHAIN = "cmd /c " if is_windows() else "" +# Native executable extension +EXE = ".exe" if is_windows() else "" # TODO: Debug? RELEASE_MWCC_FLAGS = [ @@ -13,15 +46,24 @@ "-proc gekko", "-align powerpc", "-enum int", - "-enc SJIS", "-fp hardware", "-Cpp_exceptions off", '-pragma "cats off"', - "-ipa file", "-opt all", "-inline auto", # User - "-i include", + "-i src/runtime", + "-Isrc/shared", +] + +TARGET_MWCC_FLAGS = \ + RELEASE_MWCC_FLAGS + [ + f"-Isrc/{target_src_dir}/main_content", +] + +BASE_MWCC_FLAGS = \ + RELEASE_MWCC_FLAGS + [ + f"-Isrc/{base_src_dir}/main_content", ] # TODO: Debug? @@ -30,75 +72,344 @@ "-nodefaults", "-mapunused", "-listclosure", - "-lcf " + os.path.join("$builddir", "ldscript.lcf"), + "-lcf " + os.path.join("$build_dir", "ldscript.lcf"), ] +class BuildObject: + def __init__(self, path: str, should_diff: bool, **options: Any) -> None: + self.name = os.path.splitext(path)[0] + self.file_path = path + self.should_diff = should_diff + if should_diff: + self.target_path = os.path.join(target_src_dir, path) + self.base_path = os.path.join(base_src_dir, path) + self.target_obj = os.path.join(target_src_dir, self.name + ".o") + self.base_obj = os.path.join(base_src_dir, self.name + ".o") + else: + self.target_path = path + self.base_path = path + self.target_obj = self.name + ".o" + self.base_obj = self.name + ".o" + self.options: Dict[str, Any] = { + "mw_version": "GC/1.2.5n", + } + +build_objects = [ + BuildObject('main_content/00_basic_assembly_and_isa.c', True), + BuildObject('main_content/01_abi_basics.c', True), + BuildObject('runtime/runtime_core.c', False), + BuildObject('runtime/runtime_exception.c', False), + BuildObject('runtime/main.cpp', False), + BuildObject('shared/stuff.c', False), + BuildObject('shared/sample_functions.c', False), +] + +def write_objdiff(build_objects: list) -> None: + + objdiff_config: Dict[str, Any] = { + "min_version": "2.0.0-beta.5", + "custom_make": "ninja", + "build_target": False, + "watch_patterns": [ + "*.c", + "*.cp", + "*.cpp", + "*.h", + "*.hpp", + "*.inc", + "*.py", + "*.yml", + "*.txt", + "*.json", + ], + "units": [], + "progress_categories": [], + } + + # decomp.me compiler name mapping + COMPILER_MAP = { + "GC/1.0": "mwcc_233_144", + "GC/1.1": "mwcc_233_159", + "GC/1.2.5": "mwcc_233_163", + "GC/1.2.5e": "mwcc_233_163e", + "GC/1.2.5n": "mwcc_233_163n", + "GC/1.3": "mwcc_242_53", + "GC/1.3.2": "mwcc_242_81", + "GC/1.3.2r": "mwcc_242_81r", + "GC/2.0": "mwcc_247_92", + "GC/2.5": "mwcc_247_105", + "GC/2.6": "mwcc_247_107", + "GC/2.7": "mwcc_247_108", + "GC/3.0a3": "mwcc_41_51213", + "GC/3.0a3.2": "mwcc_41_60126", + "GC/3.0a3.3": "mwcc_41_60209", + "GC/3.0a3.4": "mwcc_42_60308", + "GC/3.0a5": "mwcc_42_60422", + "GC/3.0a5.2": "mwcc_41_60831", + "GC/3.0": "mwcc_41_60831", + "Wii/1.0RC1": "mwcc_42_140", + "Wii/0x4201_127": "mwcc_42_142", + "Wii/1.0a": "mwcc_42_142", + "Wii/1.0": "mwcc_43_145", + "Wii/1.1": "mwcc_43_151", + "Wii/1.3": "mwcc_43_172", + "Wii/1.5": "mwcc_43_188", + "Wii/1.6": "mwcc_43_202", + "Wii/1.7": "mwcc_43_213", + } + + for build_object in build_objects: + if build_object.should_diff: + compiler_version = COMPILER_MAP.get(build_object.options["mw_version"]) + if compiler_version is None: + print(f"Missing scratch compiler mapping for {build_object.options['mw_version']}") + else: + unit_config = { + "name": build_object.file_path, + "target_path": os.path.join(target_build_dir, build_object.target_obj), + "base_path": os.path.join(base_build_dir, build_object.base_obj), + "scratch": { + "platform": "gc_wii", + "compiler": compiler_version, + "c_flags": " ".join(BASE_MWCC_FLAGS), + "ctx_path": os.path.join(base_build_dir, build_object.target_obj), + "build_ctx": True + }, + "metadata": { + "complete": False, + "reverse_fn_order": False, + "source_path": os.path.join("src", build_object.base_path), + "auto_generated": False + } + } + objdiff_config["units"].append(unit_config) + + # Write objdiff.json + with open("objdiff.json", "w", encoding="utf-8") as w: + + def unix_path(input: Any) -> str: + return str(input).replace(os.sep, "/") if input else "" + + json.dump(objdiff_config, w, indent=4, default=unix_path) + + out_buf = io.StringIO() n = Writer(out_buf) n.variable("ninja_required_version", "1.3") n.newline() -n.variable("builddir", "build") -n.variable("outdir", "out") +n.variable("build_dir", build_dir) +n.variable("out_dir", out_dir) +n.newline() +n.variable("target_build_dir", target_build_dir) +n.variable("target_out_dir", target_out_dir) +n.variable("base_build_dir", base_build_dir) +n.variable("base_out_dir", base_out_dir) n.newline() -# TODO: The non-Windows people aren't gonna be happy about this one -# NOTE: Perhaps DDD has the answer to this -n.variable("compiler", os.path.join("$builddir", "compiler", "mwcceppc.exe")) -n.variable("linker", os.path.join("$builddir", "compiler", "mwldeppc.exe")) -n.variable("dtk", os.path.join("$builddir", "dtk.exe")) +n.variable("mw_version", Path(linker_version)) + +# The command line args and the ability to pass in an executable/compiler +# folder could be added if needed. + +### +# Tooling +### +n.comment("Tooling") + +build_tools_path = build_dir / "tools" + +download_tool = tools_dir / "download_tool.py" +n.rule( + name="download_tool", + command=f"$python {download_tool} $tool $out --tag $tag", + description="TOOL $out", +) + +dtk = build_tools_path / f"dtk{EXE}" +n.build( + outputs=dtk, + rule="download_tool", + implicit=download_tool, + variables={ + "tool": "dtk", + "tag": dtk_tag, + }, +) + +objdiff = build_tools_path / f"objdiff-cli{EXE}" +n.build( + outputs=objdiff, + rule="download_tool", + implicit=download_tool, + variables={ + "tool": "objdiff-cli", + "tag": objdiff_tag, + }, +) + +sjiswrap = build_tools_path / "sjiswrap.exe" +n.build( + outputs=sjiswrap, + rule="download_tool", + implicit=download_tool, + variables={ + "tool": "sjiswrap", + "tag": sjiswrap_tag, + }, +) + +# Only add an implicit dependency on wibo if we download it +wrapper = None +wrapper_implicit: Optional[Path] = None +if ( + wibo_tag is not None + and sys.platform == "linux" + and platform.machine() in ("i386", "x86_64") +): + wrapper = build_tools_path / "wibo" + wrapper_implicit = wrapper + n.build( + outputs=wrapper, + rule="download_tool", + implicit=download_tool, + variables={ + "tool": "wibo", + "tag": wibo_tag, + }, + ) +using_wine = False +if not is_windows() and wrapper is None: + using_wine = True + wrapper = Path("wine") +wrapper_cmd = f"{wrapper} " if wrapper else "" + +compilers = build_dir / "compilers" +compilers_implicit = compilers +n.build( + outputs=compilers, + rule="download_tool", + implicit=download_tool, + variables={ + "tool": "compilers", + "tag": compilers_tag, + }, +) + +binutils = build_dir / "binutils" +binutils_implicit = binutils +n.build( + outputs=binutils, + rule="download_tool", + implicit=download_tool, + variables={ + "tool": "binutils", + "tag": binutils_tag, + }, +) + +n.newline() + +### +# Helper rule for downloading all tools +### +tools_inputs = [dtk, sjiswrap, compilers, binutils, objdiff] +if using_wine is False: + tools_inputs.append(wrapper) + +n.comment("Download all tools") +n.build( + outputs="tools", + rule="phony", + inputs=tools_inputs, +) +n.newline() + +### +# Build rules +### + +compiler_path = compilers / "$mw_version" + +# MWCC +mwcc = compiler_path / "mwcceppc.exe" +mwcc_cmd = f"{wrapper_cmd}{mwcc} $cflags -MMD -c $in -o $basedir" +mwcc_implicit: List[Optional[Path]] = [compilers_implicit or mwcc, wrapper_implicit] + +mwld = compiler_path / "mwldeppc.exe" +mwld_cmd = f"{wrapper_cmd}{mwld} $ldflags -o $out @$out.rsp" +mwld_implicit: List[Optional[Path]] = [compilers_implicit or mwld, wrapper_implicit] + n.newline() n.rule( "mwcc", - command="$compiler $cflags -MMD -c $in -o $basedir", + command=mwcc_cmd, description="MWCC $out", depfile="$out.d", deps="gcc", ) n.rule( "mwld", - command="$linker $ldflags -o $out @$out.rsp", + command=mwld_cmd, description="MWLD $out", rspfile="$out.rsp", rspfile_content="$in_newline", ) -n.rule("dol", command="$dtk elf2dol $in $out", description="DOL $out") -# TODO: Fix dependencies on both "src" and "cpp" -code_in_files = [ - *sorted(glob.glob("src/**/*.c", recursive=True)), - *sorted(glob.glob("src/**/*.cpp", recursive=True)), -] -code_out_files = [] +n.comment("Generate DOL") +n.rule( + name="elf2dol", + command=f"{dtk} elf2dol $in $out", + description="DOL $out", +) +n.newline() -for in_file in code_in_files: - code_out_file = os.path.join("$builddir", os.path.splitext(in_file)[0] + ".o") - code_out_files.append(code_out_file) +# TODO: this signature is pretty bad +def write_build_object(out_files: list, in_file: str, input_build_dir: str, mwcc_flags: list, options: Dict[str, Any]): + out_file = os.path.join(f"${input_build_dir}", os.path.splitext(in_file)[0] + ".o") + out_files.append(out_file) n.build( - outputs=code_out_file, + outputs=out_file, rule="mwcc", - inputs=in_file, + inputs=os.path.join("src", in_file), variables={ - "cflags": " ".join(RELEASE_MWCC_FLAGS), - "basedir": os.path.join("$builddir", os.path.dirname(in_file)), + "cflags": " ".join(mwcc_flags), + "basedir": os.path.join(f"${input_build_dir}", os.path.dirname(in_file)), + "mw_version": options["mw_version"] }, + implicit=mwcc_implicit, ) -n.build( - outputs=os.path.join("$outdir", "main.elf"), - rule="mwld", - inputs=code_out_files, - variables={"ldflags": " ".join(RELEASE_MWLD_FLAGS)}, -) +def write_link(out_files: list, input_out_dir: str): + n.build( + outputs=os.path.join(f"${input_out_dir}", "main.elf"), + rule="mwld", + inputs=out_files, + variables={"ldflags": " ".join(RELEASE_MWLD_FLAGS)}, + implicit=mwld_implicit, + ) + + n.build( + outputs=os.path.join(f"${input_out_dir}", "main.dol"), + rule="elf2dol", + inputs=os.path.join(f"${input_out_dir}", "main.elf"), + implicit=dtk, + ) -n.build( - outputs=os.path.join("$outdir", "main.dol"), - rule="dol", - inputs=os.path.join("$outdir", "main.elf"), -) +target_out_files = [] +base_out_files = [] + +for build_object in build_objects: + write_build_object(target_out_files, build_object.target_path, "target_build_dir", TARGET_MWCC_FLAGS, build_object.options) + write_build_object(base_out_files, build_object.base_path, "base_build_dir", BASE_MWCC_FLAGS, build_object.options) + +write_link(target_out_files, "target_out_dir") +write_link(base_out_files, "base_out_dir") + +write_objdiff(build_objects) with open("build.ninja", "w") as out_file: out_file.write(out_buf.getvalue()) diff --git a/include/Common.h b/src/runtime/Common.h similarity index 100% rename from include/Common.h rename to src/runtime/Common.h diff --git a/src/runtime/main.cpp b/src/runtime/main.cpp index 90ba06c..edc41df 100644 --- a/src/runtime/main.cpp +++ b/src/runtime/main.cpp @@ -1,4 +1,8 @@ #include +extern "C" +{ + #include "sample_functions.h" +} void *operator new( size_t size ) { @@ -20,5 +24,6 @@ static __Sample __sample; int main( void ) { __sample._0 = 0xf3; + sample_funcs(); return 0; } diff --git a/src/runtime/runtime_core.c b/src/runtime/runtime_core.c index 94f77fb..113df8d 100644 --- a/src/runtime/runtime_core.c +++ b/src/runtime/runtime_core.c @@ -1,5 +1,5 @@ -#include -#include +#include +#include int main( int argc, char **argv ); diff --git a/include/runtime/runtime_core.h b/src/runtime/runtime_core.h similarity index 100% rename from include/runtime/runtime_core.h rename to src/runtime/runtime_core.h diff --git a/src/runtime/runtime_exception.c b/src/runtime/runtime_exception.c index 44d1a4f..8ed54a2 100644 --- a/src/runtime/runtime_exception.c +++ b/src/runtime/runtime_exception.c @@ -1,4 +1,4 @@ -#include +#include /* ================================ * * global_destructor_chain.c diff --git a/include/runtime/runtime_exception.h b/src/runtime/runtime_exception.h similarity index 100% rename from include/runtime/runtime_exception.h rename to src/runtime/runtime_exception.h diff --git a/src/shared/sample_functions.c b/src/shared/sample_functions.c new file mode 100644 index 0000000..1a6602d --- /dev/null +++ b/src/shared/sample_functions.c @@ -0,0 +1,66 @@ +#include "00_basic_assembly_and_isa.h" +#include "01_abi_basics.h" + +#if 0 +void chapter_0() { + int a = 1; + int b = 1; + float a_f = 1.0F; + float b_f = 1.0F; + float c_f = 1.0F; + double a_d = 1.0F; + double b_d = 1.0F; + double c_d = 1.0F; + + addition(a, b); + addition_with_immediate(a); + load(); + store(&a); + store_offset(&a); + addition_float(a_f, b_f); + addition_float_load_store(&a_f, &b_f, &c_f); + addition_double(a_d, b_d); + addition_double_load_store(&a_d, &b_d, &c_d); + load_int(); +} + +void chapter_1() { + int b = 3; + + abi_parameters(1, 2); + abi_float_parameters(0.1F, 0.2F); + abi_volatile_nonvolatile(4); + abi_func_call(); + typical_stack_usage(2.0F); + weird_func(5); + call_weird_func(2, &b); +} + +void integers_64() { + u64 a_64 = 1; + u64 b_64 = 1; + u32 a_32 = 1; + + example_64(a_64, b_64); + aligned_64(a_32, b_64); + add_64(a_64, b_64); + add_64_downcast(a_64, b_64); + sub_64(a_64, b_64); + sub_64_downcast(a_64, b_64); + mul_64(a_64, b_64); + mul_64_downcast(a_64, b_64); + div_64(a_64, b_64); + mod_64(a_64, b_64); + shl_64(a_64, b_64); + shr_64(a_64, b_64); + and_64(a_64, b_64); + or_64(a_64, b_64); + xor_64(a_64, b_64); +} +#endif + +void sample_funcs() { + // chapter_0(); + // chapter_1(); + // integers_64(); +} diff --git a/src/shared/sample_functions.h b/src/shared/sample_functions.h new file mode 100644 index 0000000..84cea21 --- /dev/null +++ b/src/shared/sample_functions.h @@ -0,0 +1 @@ +void sample_funcs(); diff --git a/src/shared/stuff.c b/src/shared/stuff.c new file mode 100644 index 0000000..be08bcc --- /dev/null +++ b/src/shared/stuff.c @@ -0,0 +1,10 @@ +// Don't know what to call this yet, basically put stuff you want outside a +// chapter's TU in here + +#include "stuff.h" + +void some_func() { +} + +void some_func_vec3(Vec3 *vec) { +} diff --git a/src/shared/stuff.h b/src/shared/stuff.h new file mode 100644 index 0000000..da18ee2 --- /dev/null +++ b/src/shared/stuff.h @@ -0,0 +1,13 @@ +#ifndef STUFF_H +#define STUFF_H + +typedef struct { + float x; + float y; + float z; +} Vec3; + +void some_func(); +void some_func_vec3(Vec3 *); + +#endif diff --git a/src/training_answers/main_content/00_basic_assembly_and_isa.c b/src/training_answers/main_content/00_basic_assembly_and_isa.c new file mode 100644 index 0000000..e6d83a2 --- /dev/null +++ b/src/training_answers/main_content/00_basic_assembly_and_isa.c @@ -0,0 +1,55 @@ +#include "00_basic_assembly_and_isa.h" + +int addition(int a, int b) { + return a + b; +} + +int addition_with_immediate(int a) { + return a + 7; +} + +int subtraction(int a, int b) { + return a - b; +} + +int multiplication(int a, int b) { + return a * b; +} + +int division(int a, int b) { + return a / b; +} + +int load() { + return 7; +} + +int store(int *a) { + *a = 7; +} + +int store_offset(int *a) { + a[1] = 7; +} + +float addition_float(float a, float b) { + return a + b; +} + +void addition_float_load_store(float *a, float *b, float *c) { + *c = *a + *b; +} + +double addition_double(double a, double b) { + return a + b; +} + +void addition_double_load_store(double *a, double *b, double *c) { + *c = *a + *b; +} + +int some_int = 21; + +int load_int() { + return some_int; +} diff --git a/src/training_answers/main_content/00_basic_assembly_and_isa.h b/src/training_answers/main_content/00_basic_assembly_and_isa.h new file mode 100644 index 0000000..f096119 --- /dev/null +++ b/src/training_answers/main_content/00_basic_assembly_and_isa.h @@ -0,0 +1,17 @@ +#ifndef CHAPTER_0_H +#define CHAPTER_0_H + +int addition(int a, int b); +int addition_with_immediate(int a); +int load(); +int store(int *a); +int store_offset(int *a); +float addition_float(float a, float b); +void addition_float_load_store(float *a, float *b, float *c); +double addition_double(double a, double b); +void addition_double_load_store(double *a, double *b, double *c); +int load_int(); + +extern int some_int; + +#endif diff --git a/src/training_answers/main_content/01_abi_basics.c b/src/training_answers/main_content/01_abi_basics.c new file mode 100644 index 0000000..896b20b --- /dev/null +++ b/src/training_answers/main_content/01_abi_basics.c @@ -0,0 +1,42 @@ +#include "stuff.h" + +int abi_parameters(int a, int b) { + return a + b; +} + +float abi_float_parameters(float a, float b) { + return a + b; +} + +int abi_volatile_nonvolatile(int a) { + some_func(); + return a; +} + +void abi_func_call() { + some_func(); +} + +void typical_stack_usage(float a) { + Vec3 pos; + pos.x = a; + pos.y = a; + pos.z = a; + some_func_vec3(&pos); +} + +#pragma push +#pragma dont_inline on +int weird_func(int a) { + return a; +} + +void call_weird_func(int a, int *b) { + *b = weird_func(a); +} + +void call_weird_func_2(int a, int *b) { + a = a + 2; + *b = weird_func(a); +} +#pragma pop diff --git a/src/training_answers/main_content/01_abi_basics.h b/src/training_answers/main_content/01_abi_basics.h new file mode 100644 index 0000000..34ce225 --- /dev/null +++ b/src/training_answers/main_content/01_abi_basics.h @@ -0,0 +1,12 @@ +#ifndef CHAPTER_1_H +#define CHAPTER_1_H + +int abi_parameters(int a, int b); +float abi_float_parameters(float a, float b); +int abi_volatile_nonvolatile(int a); +void abi_func_call(); +void typical_stack_usage(float a); +int weird_func(int a); +void call_weird_func(int a, int *b); + +#endif diff --git a/src/training_template/main_content/00_basic_assembly_and_isa.c b/src/training_template/main_content/00_basic_assembly_and_isa.c new file mode 100644 index 0000000..5ef26fd --- /dev/null +++ b/src/training_template/main_content/00_basic_assembly_and_isa.c @@ -0,0 +1,209 @@ + +/* ================================================================ * + * + * Welcome to decomp! + * + * This series of example files is designed to teach you the basics of + * decompilation for the Gamecube and Wii. It is recommended to read this + * accompanying article on the wiki before getting started: + + * https://wiki.decomp.dev/en/resources/decomp-intro + * + * ================================================================ */ + +/* ================================================================ * + + * **** CHAPTER 0 - BASIC ASSEMBLY AND ISA OPERATIONS + + * The first place you should look when you encounter an instruction + * you are unfamiliar with is here: + + * https://files.decomp.dev/ppc_isa.pdf + + * This is a specification of what is known as an instruction set + * architecture (ISA), which defines various fundamental properties of + * a CPU like what instructions exist, how they behave, and what + * registers are available [1]. + + * The implementations of each function in this section are already + * filled out, as jumping straight into writing functions will be a + * bit confusing without learning about how functions are defined + * (covered in the next section). For now simply uncomment the code + * lines and observe the assembly; also consider looking up an + * instruction in the ISA document to get a feel for how the + * ISA definitions are written. + * + * [1] IBM additionally added SIMD-type instructions for + * floating-point registers on the GC/Wii's processors known as + * "paired singles" which can be viewed below, though you don't have + * to worry about them since they almost always generate from manual + * assembler usage. + + * https://wiibrew.org/wiki/Paired_single + + * ================================================================ */ + +#include "00_basic_assembly_and_isa.h" + +/* ================================================================ * + * + * The simplest instruction to consider is `add RT, RA, RB`, as all it + * does is add two registers `RA` and `RB` together and place the + * result in `RT`. Most instructions use general-purpose registers + * (r0-r31), which hold 32 bits of data each on 32-bit PowerPC + * processors and 64 bits on 64-bit processors. The Gamecube and Wii + * both are 32-bit. + * + * ================================================================ */ + +int addition(int a, int b) { + // return a + b; +} + +/* ================================================================ * + * + * Data can also be embedded in instructions themselves, such as + * `addi`: + * + * ================================================================ */ + +int addition_with_immediate(int a) { + // return a + 7; +} + +/* ================================================================ * + * + * Here are some other arithmetic operations that compile to + * simple assembly: + * + * ================================================================ */ + +int subtraction(int a, int b) { + // return a - b; +} + +int multiplication(int a, int b) { + // return a * b; +} + +int division(int a, int b) { + // return a / b; +} + +/* ================================================================ * + * + * If you look up `li` (or hover over it in objdiff) you'll notice it + * doesn't actually have its own instruction entry, instead being + * labeled as an "extended mnemonic" of addi. These mnemonics exist + * for developer convenience and don't change the functionality of the + * instruction, so `li, r3, 7` is equivalent to `addi, r3, 0, 7`. + * + * Using addi as the example for this is a bit confusing because addi + * also has a property where if you plug in 0 for the second register + * argument, it's designed to interpret that as a literal 0 instead of + * the register r0, which is why it's possible for it to double as a + * "load immediate" instruction. Note how the mnemonic table and + * objdiff write it as `addi, r3, 0, 7` instead of `addi, r3, r0, 7`. + * + * ================================================================ */ + +int load() { + // return 7; +} + +/* ================================================================ * + * + * The first thing you should do if you get disoriented when looking + * at a general-purpose register and you aren't sure what it's doing is + * to verify whether it's a pointer or not. If you're a bit shaky on + * how pointers work, now is a good time to watch a video or something + * to refresh yourself. + * + * Basically, if you see a register that appears to the right of a + * series of parenthesis in a store or load instruction, like the + * 0x0(r3) and 0x4(r3) below, that register (until its data gets + * written over by something else) is guaranteed to be a pointer. The + * contents of r0, which is the number 7, is being written to the + * memory address which is derived from the contents of r3, plus an + * optional offset. + * + * ================================================================ */ + +int store(int *a) { + // *a = 7; +} + +int store_offset(int *a) { + // a[1] = 7; +} + +/* ================================================================ * + * + * Floating point operations use bespoke floating-point registers + * (f0-f31). They hold 64 bits each regardless of the processor being + * 32-bit or 64-bit, so that both processors can use double-precision + * floats. + * + * In fact, every floating point instruction always operates on FPRs + * as if they were doubles [1], as written in section 4.2.1 of the ISA + * doc. The way this is achieved is that any time a single-precision + * float has to be loaded from memory or stored to memory, it + * implicitly does a conversion to and from double precision + * respectively, which you can verify by looking up "lfs" and "stfs" + * in the ISA. + * + * You don't have to worry about this in decomp since there still has + * to be separate instructions for single versus double operations + * (adds vs add), so you can easily identify the intended precision + * for any floating-point related instruction. Just remember FPRs are + * always 64 bits long. + * + * [1] minus the aforementioned "paired single" instructions since + * they treat and operate on an FPR register as two single-precision + * floats, which is how they gain the benefit of SIMD. + * + * ================================================================ */ + +float addition_float(float a, float b) { + // return a + b; +} + +void addition_float_load_store(float *a, float *b, float *c) { + // *c = *a + *b; +} + +double addition_double(double a, double b) { + // return a + b; +} + +void addition_double_load_store(double *a, double *b, double *c) { + // *c = *a + *b; +} + +/* ================================================================ * + * + * This is an example of the use of *symbols* for a data load, which + * is the same concept as how it was used to refer to a function in + * `bl adder` in the "Compiling and linking" section of the decomp + * intro. The finalized address for `some_int` has yet to be decided + * until the linker is run, so the compiler inserts this symbol on the + * `lwz` instruction. + * + * If you're using objdiff, you'll also notice `some_int` appears + * below a tab labelled `.sdata`. This is the *data section* that the + * value resides in, and more specifics on the different data section + * types will be covered later. + * + * ================================================================ */ + +int some_int = 21; + +int load_int() { + // return some_int; +} + +/* ================================================================ * + * + * End of chapter 0. + * + * ================================================================ */ diff --git a/src/training_template/main_content/00_basic_assembly_and_isa.h b/src/training_template/main_content/00_basic_assembly_and_isa.h new file mode 100644 index 0000000..82ec68a --- /dev/null +++ b/src/training_template/main_content/00_basic_assembly_and_isa.h @@ -0,0 +1,21 @@ +// No problems require you to modify this header. + +#ifndef CHAPTER_0_H +#define CHAPTER_0_H + +int addition(int a, int b); +int addition_with_immediate(int a); +int subtraction(int a, int b); +int multiplication(int a, int b); +int division(int a, int b); +int store(int *a); +int store_offset(int *a); +float addition_float(float a, float b); +void addition_float_load_store(float *a, float *b, float *c); +double addition_double(double a, double b); +void addition_double_load_store(double *a, double *b, double *c); +int load_int(); + +extern int some_int; + +#endif diff --git a/src/training_template/main_content/01_abi_basics.c b/src/training_template/main_content/01_abi_basics.c new file mode 100644 index 0000000..f9fe781 --- /dev/null +++ b/src/training_template/main_content/01_abi_basics.c @@ -0,0 +1,245 @@ +/* ================================================================ * + * + **** CHATPER 1 - ABI BASICS + * + * This section will likely be dense and boring, but it will clear up + * a lot of uncertainties as to what is going on when you look at a + * block of assembly. I'll start to not provide the solutions on some + * of the functions, so try to start matching the functions when + * they're empty. + * + * While the ISA describes many of the fundamental features and + * aspects of a target architecture, it doesn't prescribe how a given + * program should behave on a given target operating system. That + * instead is delegated to a specification called the *application + * binary interface* (ABI), which theoretically allows a program to + * interoperate with other programs on the system and be able to run + * on any computer with the OS and hardware it was compiled to run on. + * The compiler ultimately gets to define the "final ABI" that the + * program compiled on it uses, often being layered on top of a more + * general OS ABI, so you could hear phrases like "the Metrowerks C++ + * ABI" or "the Linux ABI." + * + * For example, every library and function compiled with a given + * compiler for the GC/Wii "agrees" that it will use a specific + * register to return values if the function returns something (r3), + * which you'll see in the first training function. + * + * An ABI similar to the what Metrowerks uses that is useful to + * reference is the System V PowerPC ABI, which can be found here: + * + * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf + * + * In addition, since the GC/Wii is an embedded system, it also obeys + * a superset of the ABI known as an *embedded* application binary + * interface, or EABI, which can be viewed here: + * + * https://files.decomp.dev/E500ABIUG.pdf + * + * References to the general ABI will use the notation (ABI page #-#), + * and the EABI will use (EABI page #-#), + * + * ================================================================ */ + +#include "stuff.h" + +/* ================================================================ * + * + * Registers and parameters (Function Calling Sequence, ABI page 3-14) + * + * As you may be familiar with from a computer architecture or + * assembly class, the CPU has to move any data it wants to operate on + * from main memory (RAM) into its own registers to be able to access + * it. Thus, an optimization most ABIs utilize to reduce the amount of + * times a given function has to access RAM is to allow registers + * themselves to be used as both function arguments and return values. + * + * In the case of non-float values, up to eight general-purpose + * registers can used to pass arguments to a function, starting at + * r3 and ending at r10. Additionally, r3 acts a return register + * that a callsite of the function can read from. + * + * For example, to write a function add() that adds two integer + * arguments together and returns the result, you simply have to add + * r3 and r4 and store the result in r3. + * + * ================================================================ */ + +int abi_parameters(int a, int b) { +} + +/* ================================================================ * + * + * For floats, up to eight floating-point registers can be used, + * starting at f1 and ending at f8. f1 also acts as the return register. + * + * ================================================================ */ + +float abi_float_parameters(float a, float b) { +} + +/* ================================================================ * + * + * Volatile and non-volatile registers (Registers, ABI page 3-14) + * + * The sets of registers used for function passing (r3-r8, f1-f8) are + * also known as *volatile* registers, because whenever you branch + * into a new function, the ABI allows the values in those registers + * to get overwritten. In other words, any time you step over a "b + * some_func", you must assume that all volatile registers have + * effectively been destroyed (or modified in the case of return + * registers). Thus the concept of "non-volatile" registers becomes + * useful, which allows us to preserve data between registers that + * cross callsites without having to read/write from main memory. + * + * The simplest example of this behavior can be seen in the `mr r31, + * r3` below, where r3 is moved to the non-volatile register r31, + * since r3 can get overwritten by some_func [1]. r31 is then passed back + * to r3 to be used as the return register. The other instructions, + * which are related to the stack, will be explained next. + * + * [1] some_func is defined outside this TU, and its implementation is + * not important for this discussion. + * + * ================================================================ */ + +int abi_volatile_nonvolatile(int a) { + // some_func(); + // return a; +} + +/* ================================================================ * + * + * Function prologues/epilogues, the stack, and the link register + * (Function Prologue and Epilogue, ABI page 3-34) + * (The Stack Frame, ABI page 3-17) + * + * You may have noticed a pattern among the functions listed so far in + * that they all end in the "blr" opcode, and many of them contain a + * "mflr" towards the top and a "mtlr" towards the bottom. These + * opcodes all concern a special register called the *link register*, + * which holds the memory address of the previous function that called + * the function you're currently in. This was also explained in the + * intro doc, but to reiterate again to help make it stick in your + * brain more: + * + * | // some_func + * | 0x80103F18 | blr + * | + * | // a bunch of code + * | + * | // abi_func_call + * | 0x802D8910 | mflr r0 + * | 0x802D8914 | stw r0, 0x4(r1) + * | 0x802D8918 | stwu r1, -0x8(r1) + * | 0x802D891C | bl 0x80103F14 // address of some_func + * | 0x802D8920 | lwz r0, 0xc(r1) + * | 0x802D8924 | addi r1, r1, 0x8 + * | 0x802D8918 | mtlr r0 + * | 0x802D891C | blr + * + * (note: addresses may be different from the actual executable) + * + * When the "bl 0x80103F14" instruction gets executed, the link + * register (LR) is automatically set with the address of the next + * instruction at 0x802D8920, which is "lwz r0, 0xc(r1)". Then once it + * hits the blr in "some_func" at 0x80103F18, it branches to the value + * at the LR (0x802D8920) and resumes where it left off in + * "abi_func_call." + * + * However you may be wondering, if the LR gets overwritten by the "bl + * 0x80103F14" in "abi_func_call," then what will happen to the LR + * that "abi_func_call" is currently holding? It won't be able to + * return to the function that's calling itself when it executes its + * "blr" if its LR gets overwritten! Luckily, that problem is exactly + * what all of the other instructions are addressing, which are a part + * of what is known as the "prologue" and "epilogue" that are executed + * at the beginning and end of a function respectively. + + * In the prologue, "mflr" moves the address in the LR to a register + * r0, which is then stored in what is known as the "stack" in the + * "stw". Note that r1 is a special register which holds the stack + * pointer, and its current value is required to be decremented and + * placed on the stack (in the "stwu"). Then in the epilogue, that + * address is loaded out from the stack back into r0 and moved back + * into the LR in the "lwz" and "mtlr" instructions, which allows the + * "blr" to successfully return to "abi_func_call"'s caller. [1] + * + * The reason "mflr" and "mtlr" don't show up in every function, as + * you may be able to guess, is that a function doesn't need to save + * and restore the LR unless it actually needs to (i.e. it calls a + * function), which is why some_func doesn't have them. + * + * [1] You can read more about the stack setup on section 3-34 of the + * ABI doc. + * + * ================================================================ */ + +void abi_func_call(int a) { + // some_func(); +} + +/* ================================================================ * + * + * Besides saving/restoring the LR in the prologue/epilogue, the stack + * will also get used by normal code for various reasons when + * registers aren't enough to represent the data. Normally when you + * declare variables on "the stack" in C, the compiler will try to + * avoid actually implementing a stack and simply use registers. + * However one case where the compiler won't do that is if you declare + * a struct on the stack and you pass its address to a function, in + * which case it always implements a real stack. Typically you'll see + * this with math-related structs like a Vec3 or Vec4 or Matrix struct. + * + * ================================================================ */ + +void typical_stack_usage(float a) { + // Vec3 pos; + // pos.x = a; + // pos.y = a; + // pos.z = a; + // some_func_vec3(&pos); +} + +/* ================================================================ * + * + * Here is your first "real" problem without an explanation. It's a + * bit tricky, but with the knowledge you now have, you should be + * equipped to tackle and understand what at first glance looks like a + * strange peculiarity. Pretend that "weird_func" is in another TU and + * figure out why the first two functions match but the second one + * doesn't. Don't remove the pragma statements (it's the same trick + * from the intro article to make it not automatically get inlined by + * the compiler). + * + * View the solution here if you need hints, get stuck, or figure it + * out; it contains an important explanation as well: + * + * https://wiki.decomp.dev/en/resources/decomp-training-answers/chapter_01 + * + * ================================================================ */ + +#pragma push +#pragma dont_inline on + +int weird_func(void) { +} + +void call_weird_func(int a, int *b) { + *b = weird_func(); +} + +void call_weird_func_2(int a, int *b) { + a = a + 2; + *b = weird_func(); +} + +#pragma pop + +/* ================================================================ * + * + * End of chapter 1. + * + * ================================================================ */ + + diff --git a/src/training_template/main_content/01_abi_basics.h b/src/training_template/main_content/01_abi_basics.h new file mode 100644 index 0000000..d9f6520 --- /dev/null +++ b/src/training_template/main_content/01_abi_basics.h @@ -0,0 +1,14 @@ +// Some problems require you to modify this header. + +#ifndef CHAPTER_1_H +#define CHAPTER_1_H + +int abi_parameters(int a, int b); +float abi_float_parameters(float a, float b); +int abi_volatile_nonvolatile(int a); +void abi_func_call(); +void typical_stack_usage(float a); +// ??? weird_func(???); +// ??? call_weird_func(???); + +#endif diff --git a/tools/download_tool.py b/tools/download_tool.py new file mode 100755 index 0000000..f4512d0 --- /dev/null +++ b/tools/download_tool.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 + +### +# Downloads various tools from GitHub releases. +# +# Usage: +# python3 tools/download_tool.py wibo build/tools/wibo --tag 1.0.0 +# +# If changes are made, please submit a PR to +# https://github.com/encounter/dtk-template +### + +import argparse +import io +import os +import platform +import shutil +import stat +import urllib.request +import zipfile +from typing import Callable, Dict +from pathlib import Path + + +def binutils_url(tag): + uname = platform.uname() + system = uname.system.lower() + arch = uname.machine.lower() + if system == "darwin": + system = "macos" + arch = "universal" + elif arch == "amd64": + arch = "x86_64" + + repo = "https://github.com/encounter/gc-wii-binutils" + return f"{repo}/releases/download/{tag}/{system}-{arch}.zip" + + +def compilers_url(tag: str) -> str: + return f"https://files.decomp.dev/compilers_{tag}.zip" + + +def dtk_url(tag: str) -> str: + uname = platform.uname() + suffix = "" + system = uname.system.lower() + if system == "darwin": + system = "macos" + elif system == "windows": + suffix = ".exe" + arch = uname.machine.lower() + if arch == "amd64": + arch = "x86_64" + + repo = "https://github.com/encounter/decomp-toolkit" + return f"{repo}/releases/download/{tag}/dtk-{system}-{arch}{suffix}" + + +def objdiff_cli_url(tag: str) -> str: + uname = platform.uname() + suffix = "" + system = uname.system.lower() + if system == "darwin": + system = "macos" + elif system == "windows": + suffix = ".exe" + arch = uname.machine.lower() + if arch == "amd64": + arch = "x86_64" + + repo = "https://github.com/encounter/objdiff" + return f"{repo}/releases/download/{tag}/objdiff-cli-{system}-{arch}{suffix}" + + +def sjiswrap_url(tag: str) -> str: + repo = "https://github.com/encounter/sjiswrap" + return f"{repo}/releases/download/{tag}/sjiswrap-windows-x86.exe" + + +def wibo_url(tag: str) -> str: + repo = "https://github.com/decompals/wibo" + return f"{repo}/releases/download/{tag}/wibo" + + +TOOLS: Dict[str, Callable[[str], str]] = { + "binutils": binutils_url, + "compilers": compilers_url, + "dtk": dtk_url, + "objdiff-cli": objdiff_cli_url, + "sjiswrap": sjiswrap_url, + "wibo": wibo_url, +} + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("tool", help="Tool name") + parser.add_argument("output", type=Path, help="output file path") + parser.add_argument("--tag", help="GitHub tag", required=True) + args = parser.parse_args() + + url = TOOLS[args.tool](args.tag) + output = Path(args.output) + + print(f"Downloading {url} to {output}") + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urllib.request.urlopen(req) as response: + if url.endswith(".zip"): + data = io.BytesIO(response.read()) + with zipfile.ZipFile(data) as f: + f.extractall(output) + # Make all files executable + for root, _, files in os.walk(output): + for name in files: + os.chmod(os.path.join(root, name), 0o755) + output.touch(mode=0o755) # Update dir modtime + else: + with open(output, "wb") as f: + shutil.copyfileobj(response, f) + st = os.stat(output) + os.chmod(output, st.st_mode | stat.S_IEXEC) + + +if __name__ == "__main__": + main() diff --git a/tools/ninja_syntax.py b/tools/ninja_syntax.py new file mode 100644 index 0000000..7306ee1 --- /dev/null +++ b/tools/ninja_syntax.py @@ -0,0 +1,254 @@ +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python module for generating .ninja files. + +Note that this is emphatically not a required piece of Ninja; it's +just a helpful utility for build-file-generation systems that already +use Python. +""" + +import re +import textwrap +import os +from io import StringIO +from pathlib import Path +from typing import Dict, List, Match, Optional, Tuple, Union + +NinjaPath = Union[str, Path] +NinjaPaths = Union[ + List[str], + List[Path], + List[NinjaPath], + List[Optional[str]], + List[Optional[Path]], + List[Optional[NinjaPath]], +] +NinjaPathOrPaths = Union[NinjaPath, NinjaPaths] + + +def escape_path(word: str) -> str: + return word.replace("$ ", "$$ ").replace(" ", "$ ").replace(":", "$:") + + +class Writer(object): + def __init__(self, output: StringIO, width: int = 78) -> None: + self.output = output + self.width = width + + def newline(self) -> None: + self.output.write("\n") + + def comment(self, text: str) -> None: + for line in textwrap.wrap( + text, self.width - 2, break_long_words=False, break_on_hyphens=False + ): + self.output.write("# " + line + "\n") + + def variable( + self, + key: str, + value: Optional[NinjaPathOrPaths], + indent: int = 0, + ) -> None: + value = " ".join(serialize_paths(value)) + self._line("%s = %s" % (key, value), indent) + + def pool(self, name: str, depth: int) -> None: + self._line("pool %s" % name) + self.variable("depth", str(depth), indent=1) + + def rule( + self, + name: str, + command: str, + description: Optional[str] = None, + depfile: Optional[NinjaPath] = None, + generator: bool = False, + pool: Optional[str] = None, + restat: bool = False, + rspfile: Optional[NinjaPath] = None, + rspfile_content: Optional[NinjaPath] = None, + deps: Optional[NinjaPathOrPaths] = None, + ) -> None: + self._line("rule %s" % name) + self.variable("command", command, indent=1) + if description: + self.variable("description", description, indent=1) + if depfile: + self.variable("depfile", depfile, indent=1) + if generator: + self.variable("generator", "1", indent=1) + if pool: + self.variable("pool", pool, indent=1) + if restat: + self.variable("restat", "1", indent=1) + if rspfile: + self.variable("rspfile", rspfile, indent=1) + if rspfile_content: + self.variable("rspfile_content", rspfile_content, indent=1) + if deps: + self.variable("deps", deps, indent=1) + + def build( + self, + outputs: NinjaPathOrPaths, + rule: str, + inputs: Optional[NinjaPathOrPaths] = None, + implicit: Optional[NinjaPathOrPaths] = None, + order_only: Optional[NinjaPathOrPaths] = None, + variables: Optional[ + Union[ + List[Tuple[str, Optional[NinjaPathOrPaths]]], + Dict[str, Optional[NinjaPathOrPaths]], + ] + ] = None, + implicit_outputs: Optional[NinjaPathOrPaths] = None, + pool: Optional[str] = None, + dyndep: Optional[NinjaPath] = None, + ) -> List[str]: + outputs = serialize_paths(outputs) + out_outputs = [escape_path(x) for x in outputs] + all_inputs = [escape_path(x) for x in serialize_paths(inputs)] + + if implicit: + implicit = [escape_path(x) for x in serialize_paths(implicit)] + all_inputs.append("|") + all_inputs.extend(map(str, implicit)) + if order_only: + order_only = [escape_path(x) for x in serialize_paths(order_only)] + all_inputs.append("||") + all_inputs.extend(map(str, order_only)) + if implicit_outputs: + implicit_outputs = [ + escape_path(x) for x in serialize_paths(implicit_outputs) + ] + out_outputs.append("|") + out_outputs.extend(map(str, implicit_outputs)) + + self._line( + "build %s: %s" % (" ".join(out_outputs), " ".join([rule] + all_inputs)) + ) + if pool is not None: + self._line(" pool = %s" % pool) + if dyndep is not None: + self._line(" dyndep = %s" % serialize_path(dyndep)) + + if variables: + if isinstance(variables, dict): + iterator = iter(variables.items()) + else: + iterator = iter(variables) + + for key, val in iterator: + self.variable(key, val, indent=1) + + return outputs + + def include(self, path: str) -> None: + self._line("include %s" % path) + + def subninja(self, path: str) -> None: + self._line("subninja %s" % path) + + def default(self, paths: NinjaPathOrPaths) -> None: + self._line("default %s" % " ".join(serialize_paths(paths))) + + def _count_dollars_before_index(self, s: str, i: int) -> int: + """Returns the number of '$' characters right in front of s[i].""" + dollar_count = 0 + dollar_index = i - 1 + while dollar_index > 0 and s[dollar_index] == "$": + dollar_count += 1 + dollar_index -= 1 + return dollar_count + + def _line(self, text: str, indent: int = 0) -> None: + """Write 'text' word-wrapped at self.width characters.""" + leading_space = " " * indent + while len(leading_space) + len(text) > self.width: + # The text is too wide; wrap if possible. + + # Find the rightmost space that would obey our width constraint and + # that's not an escaped space. + available_space = self.width - len(leading_space) - len(" $") + space = available_space + while True: + space = text.rfind(" ", 0, space) + if space < 0 or self._count_dollars_before_index(text, space) % 2 == 0: + break + + if space < 0: + # No such space; just use the first unescaped space we can find. + space = available_space - 1 + while True: + space = text.find(" ", space + 1) + if ( + space < 0 + or self._count_dollars_before_index(text, space) % 2 == 0 + ): + break + if space < 0: + # Give up on breaking. + break + + self.output.write(leading_space + text[0:space] + " $\n") + text = text[space + 1 :] + + # Subsequent lines are continuations, so indent them. + leading_space = " " * (indent + 2) + + self.output.write(leading_space + text + "\n") + + def close(self) -> None: + self.output.close() + + +def serialize_path(input: Optional[NinjaPath]) -> str: + if not input: + return "" + if isinstance(input, Path): + return str(input).replace("/", os.sep) + else: + return str(input) + + +def serialize_paths(input: Optional[NinjaPathOrPaths]) -> List[str]: + if isinstance(input, list): + return [serialize_path(path) for path in input if path] + return [serialize_path(input)] if input else [] + + +def escape(string: str) -> str: + """Escape a string such that it can be embedded into a Ninja file without + further interpretation.""" + assert "\n" not in string, "Ninja syntax does not allow newlines" + # We only have one special metacharacter: '$'. + return string.replace("$", "$$") + + +def expand(string: str, vars: Dict[str, str], local_vars: Dict[str, str] = {}) -> str: + """Expand a string containing $vars as Ninja would. + + Note: doesn't handle the full Ninja variable syntax, but it's enough + to make configure.py's use of it work. + """ + + def exp(m: Match[str]) -> str: + var = m.group(1) + if var == "$": + return "$" + return local_vars.get(var, vars.get(var, "")) + + return re.sub(r"\$(\$|\w*)", exp, string) diff --git a/wip/integers_64_bit.c b/wip/integers_64_bit.c new file mode 100644 index 0000000..40949fa --- /dev/null +++ b/wip/integers_64_bit.c @@ -0,0 +1,98 @@ +#include "src/runtime/platform.h" + +u64 example_64(u64 a, u64 b) { + return a + b; +} + +u64 aligned_64(u32 a, u64 b) { + return b + 5; +} + +u64 add_64(u64 a, u64 b) { + return a + b; +} + +u32 add_64_downcast(u64 a, u64 b) { + return a + b; +} + +u64 sub_64(u64 a, u64 b) { + return a - b; +} + +u32 sub_64_downcast(u64 a, u64 b) { + return a - b; +} + +u64 mul_64(u64 a, u64 b) { + return a * b; +} + +u32 mul_64_downcast(u64 a, u64 b) { + return a * b; +} + +u64 div_64(u64 a, u64 b) { + return a / b; +} + +u64 mod_64(u64 a, u64 b) { + return a % b; +} + +u64 shl_64(u64 a, u64 b) { + return a << b; +} + +u64 shr_64(u64 a, u64 b) { + return a >> b; +} + +u64 and_64(u64 a, u64 b) { + return a & b; +} + +u64 or_64(u64 a, u64 b) { + return a | b; +} + +u64 xor_64(u64 a, u64 b) { + return a ^ b; +} + +#if 0 +// TODO(fox): This should probably be combined with the branching section + +int abi_function_6(int a) { + switch (a) { + case 1: + return 1; + case 2: + return 2; + case 3: + return 3; + case 4: + return 4; + case 5: + return 5; + } + return 0; +} + +int abi_function_7(int a) { + some_func(); + switch (a) { + case 1: + return 1; + case 2: + return 2; + case 3: + return 3; + case 4: + return 4; + case 5: + return 5; + } + return 0; +} +#endif diff --git a/wip/t_integers_64_bit.c b/wip/t_integers_64_bit.c new file mode 100644 index 0000000..40afd63 --- /dev/null +++ b/wip/t_integers_64_bit.c @@ -0,0 +1,189 @@ +/* ================================================================ * + * + **** 64-BIT INTEGERS (WIP) + * + * ================================================================ */ + +#include "src/runtime/platform.h" + +/* ================================================================ * + * + * Register usage + * + * Integers of type "long long," aka an integer that takes up 8 bytes, + * or 64 bits, have to be implemented a bit specially since GPRs on + * the GC/Wii are only 32 bits long. + * + * Each 64-bit value occupies two general-purpose registers. When + * passed into a function, it simply occupies two continuous registers. + * Additionally, r4 can be used in conjunction with r3 as a return value + * to return a 64-bit value without needing to use the stack. + * + * ================================================================ */ + +u64 example_64(u64 a, u64 b) { +} + +/* ================================================================ * + * + * The ABI requires the compiler to "align" the two GPRs belonging to + * a 64-bit integer to an odd-numbered register (PPC 3-19), meaning + * that in the example below, the register order looks like this: + * + * r3 - a + * r4 - empty + * r5, r6 - b + * + * Presumably this is to decrease register chain dependencies; if the + * 64-bit argument was allowed to be placed in r4 and r5 instead, then + * this example would compile to this and require an extra register + * move to free up r4: + * + * addi r6, r4, 0x0 + * li r0, 0x5 + * addc r4, r5, r0 + * li r0, 0x0 + * adde r3, r6, r0 + * blr + * + * ================================================================ */ + +u64 aligned_64(u32 a, u64 b) { +} + +/* ================================================================ * + * + **** DETERMINING 64-BIT USAGE + * + * The best way to confirm whether a given variable or struct/class + * member is 64-bit or not is to inspect its operations, as most of + * them are distinct from their 32-bit or less counterparts. + * + * An important optimization the compiler can do to watch out for is + * in the case where the rvalue is 64-bit but the lvalue is 32-bit, + * which I'll refer to as a "downcast." Look at the examples below to + * see what I mean: + * + * ================================================================ */ + +/* ================================================================ * + * + * Addition and subtraction + * + * Addition and subtraction use a pair of carry and extend instructions + * to pass the carry bit to the higher-value register. Interstingly on + * all versions of MWCC, the compiler will still emit the carry part of + * the instruction instead of the normal variant when downcasting to 32-bit + * even with optimizations, so this could theoretically be used to + * identify a 64-bit value. ProDG emits the normal variant which causes + * it to be indistinguishable like multiplication. + * + * ================================================================ */ + +u64 add_64(u64 a, u64 b) { +} + +u32 add_64_downcast(u64 a, u64 b) { +} + +u64 sub_64(u64 a, u64 b) { +} + +u32 sub_64_downcast(u64 a, u64 b) { +} + +/* ================================================================ * + * + * Multiplication + * + * Multiplication has a long and recognizable 64-bit pattern, but a + * 32-bit downcast is indistinguishable from multiplying two 32-bit numbers. + * + * ================================================================ */ + +u64 mul_64(u64 a, u64 b) { +} + +u32 mul_64_downcast(u64 a, u64 b) { +} + +/* ================================================================ * + * + * Compiler intrinsics + * + * Division, modulo, and bit shifts call compiler intrinsics on both + * MWCC and ProDG, which are unaffected by downcasts. + * + * ================================================================ */ + +u64 div_64(u64 a, u64 b) { +} + +u64 mod_64(u64 a, u64 b) { +} + +u64 shl_64(u64 a, u64 b) { +} + +u64 shr_64(u64 a, u64 b) { +} + +/* ================================================================ * + * + * Boolean operations + * + * 64-bit AND, OR, and XOR are indistinguishable from two back-to-back + * 32-bit operations, so you can only know for sure that 64-bit is being + * used if the inputs/results are passed into other known 64-bit + * operations, like a comparison. + * + * ================================================================ */ + +u64 and_64(u64 a, u64 b) { +} + +u64 or_64(u64 a, u64 b) { +} + +u64 xor_64(u64 a, u64 b) { +} + +/* ================================================================ * + * + * Comparisons + * + * Both branched and branchless 64-bit comparisons are unambiguous + * and are unaffected by downcasting, since the result of a comparison + * is already a single bit (true/false). + * + * ================================================================ */ + +// TODO(fox): implement + + + +// MISC + +// TODO(fox): This should probably be combined with the branching +// section. This is a more complicated example than using a single +// conditional, but I think the multiple branches illustrates the +// point better. + +/* ================================================================ * + * + * Typically when a function has early returns, such as the cases in + * this switch statement in abi_function_6, they will branch to the + * epilogue. However if a function doesn't use the stack, the epilogue + * effectively turns into a single blr instruction. In this case, the + * compiler can perform an optimization where it replaces those + * epilogue branches with the epilogue itself and create multiple blrs + * in a single function. + * + * ================================================================ */ + +void abi_function_6(int a) { +} + +void abi_function_7(int a) { +} +