diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3380f3a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,85 @@ +# compiled code anywhere +**/*.native +**/*.byte +**/*.o +**/*.lo +**/*.cmx +**/*.cmo +**/*.cmi +**/*.a +**/*.cmxa +**/*.dSYM/ +**/_build + +# system crap +Dockerfile +Dockerfile.test +Dockerfile.web +.git +.gitmodules +.dockerignore +**/.gitignore +**/*~ +**/.#* +**/.DS_Store +**/.DS_Store? +**/._* +**/.Spotlight* +**/.Trash* +**/*[Tt]humbs.db +**/*~ +**/*.bak +**/*.orig +**/*.rej + +################################################################################ +# libdash ignores + +libtool +# geneated by libtool +ltmain.sh + +# generated by autogen.sh +Makefile.in +aclocal.m4 +autom4te.cache/ +compile +config.h.in +configure +depcomp +install-sh +missing + +# generated by configure +Makefile +config.cache +config.h +config.log +config.status +src/.deps/ +stamp-h1 + +# generated by make +src/token_vars.h + +# generated files +ar-lib +config.* +src/libdash.a + +src/builtins.[ch] +src/builtins.def +src/dash +src/init.c +src/mkinit +src/mknodes +src/mksignames +src/mksyntax +src/nodes.[ch] +src/signames.c +src/syntax.[ch] +src/token.h + +src/.libs +src/.deps + diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..e2ba507 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,155 @@ +name: Main workflow + +on: + pull_request: + branches: + - master + push: + schedule: + - cron: '5 14 * * *' + +jobs: + check-version-numbers: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Check version numbers + run: ./version.sh + + package-python: + strategy: + fail-fast: false + matrix: + os: + - macos-latest + - ubuntu-latest + + runs-on: ${{ matrix.os }} + + steps: + - name: Install dependencies (libtool, aclocal, autoconf) + run: | + if [ "$RUNNER_OS" = "Linux" ]; then + sudo apt-get install libtool automake + elif [ "$RUNNER_OS" = "macOS" ]; then + brew install libtool autoconf automake + else + echo Unsupported RUNNER_OS=$RUNNER_OS + exit 1 + fi + + - name: Checkout code + uses: actions/checkout@v6 + + - name: Build wheels + uses: pypa/cibuildwheel@v3.4.1 + env: + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" + + - name: Upload binary wheel + uses: actions/upload-artifact@v4 + with: + name: ${{ format('bdist.{0}', matrix.os) }} + path: wheelhouse/libdash-*.whl + + - name: Build source distribution (Linux only) + if: contains(matrix.os, 'ubuntu') + run: python setup.py sdist + + - name: Upload source distribution (from Linux) + uses: actions/upload-artifact@v4 + if: contains(matrix.os, 'ubuntu') + with: + name: sdist + path: dist/libdash-*.tar.gz + + build-both-and-compare: + strategy: + fail-fast: true + matrix: + os: + - macos-latest + - ubuntu-latest + ocaml-compiler: + - 4.14.x + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Use OCaml ${{ matrix.ocaml-compiler }} + uses: avsm/setup-ocaml@v3 + with: + ocaml-compiler: ${{ matrix.ocaml-compiler }} + dune-cache: true + + - name: Install and test OCaml bindings + run: opam install --with-test --working-dir . + + # we don't reuse the wheels so that all of the CI runs can happen concurrently + - name: Install Python via venv + run: | + python3 -m venv .venv + . .venv/bin/activate + python3 -m pip install . + + - name: Test Python bindings + run: | + . .venv/bin/activate + make -C python test + + - name: Compare OCaml and Python bindings + run: | + . .venv/bin/activate + opam exec -- make -C test test + + deploy: + needs: + - check-version-numbers + - package-python + - build-both-and-compare + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags') + + steps: + - name: Download distributions + uses: actions/download-artifact@v4 + + - name: Rename distributions + run: | + mkdir dist + ls bdist.*/ + mv bdist.*/libdash-*.whl dist/ + mv sdist/libdash-*.tar.gz dist/ + echo Look on my Works, ye Mighty, and despair! + ls dist + + - name: Deploy 'latest' release on GH + uses: marvinpinto/action-automatic-releases@latest + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + automatic_release_tag: "latest" + prerelease: true + title: "Python source and binary distributions" + files: | + dist/* + + - name: Deploy test distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + verbose: true + repository_url: https://test.pypi.org/legacy/ + skip_existing: true + + - name: Deploy tagged release on PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + verbose: true diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml new file mode 100644 index 0000000..8522c91 --- /dev/null +++ b/.github/workflows/canary.yml @@ -0,0 +1,71 @@ +name: Distribution canary + +on: +# push: + schedule: + - cron: '20 13 * * *' + +jobs: + ocaml: + strategy: + fail-fast: false + matrix: + os: + - macos-14 + - macos-15 + - ubuntu-22.04 + - ubuntu-24.04 + ocaml-compiler: + - 4.14.x + - 5.2.x + + runs-on: ${{ matrix.os }} + + steps: + - name: Use OCaml ${{ matrix.ocaml-compiler }} + uses: avsm/setup-ocaml@v2 + with: + ocaml-compiler: ${{ matrix.ocaml-compiler }} + + - name: Install OCaml bindings from OPAM + run: | + opam update + opam depext libdash + opam install libdash + + - name: Test OPAM executables + run: test "$(echo hi | opam exec -- shell_to_json | opam exec -- json_to_shell)" = "hi" + + python: + strategy: + fail-fast: true + matrix: + os: + - macos-12 + - macos-11 + - ubuntu-18.04 + - ubuntu-20.04 + python-version: + - '3.7' + - '3.8' + - '3.9' + - '3.10' + repository_url: + - https://pypi.org/simple/ + - https://test.pypi.org/simple/ + + runs-on: ${{ matrix.os }} + + steps: + - name: Use Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python bindings from ${{ matrix.repository_url }} + run: pip install -v -i "${{ matrix.repository_url }}" --extra-index-url https://pypi.org/simple/ libdash + + - name: Test Python library + run: | + RT="$(printf 'import libdash\nasts = libdash.parse("-", True)\nfor (ast, lines, linno_before, linno_after) in asts:\n print(libdash.to_string(ast))\n')" + test "$(echo hi | python -c "$RT")" = "hi" diff --git a/.gitignore b/.gitignore index e349901..6064816 100644 --- a/.gitignore +++ b/.gitignore @@ -22,8 +22,15 @@ Makefile /stamp-h1 # generated by make +/src/builtins.h +/src/nodes.h +/src/syntax.h +/src/token.h /src/token_vars.h +# generated by dune +_build + # Apple debug symbol bundles *.dSYM/ @@ -37,6 +44,20 @@ Makefile .DS_Store .DS_Store? ._* +.\#* .Spotlight* .Trash* *[Tt]humbs.db +ar-lib +config.* +src/libdash.a +*.lo +*.dylib +m4 +libtool +ltmain.sh +ocamlprof.dump +__pycache__ +libdash.egg-info +dist +build diff --git a/.travis-ocaml.sh b/.travis-ocaml.sh new file mode 100644 index 0000000..6730871 --- /dev/null +++ b/.travis-ocaml.sh @@ -0,0 +1,327 @@ +## basic OCaml and opam installation + +full_apt_version () { + package=$1 + version=$2 + case "${version}" in + latest) echo -n "${package}" ;; + *) echo -n "${package}=" + apt-cache show "$package" \ + | sed -n "s/^Version: \(${version}\)/\1/p" \ + | head -1 + esac +} + +set -uex + +if [ "$TRAVIS_OS_NAME" = freebsd -a "${OPAM_VERSION+x}" = x ]; then + echo OPAM_VERSION not permitted for FreeBSD targets + exit 1 +fi + +OCAML_VERSION=${OCAML_VERSION:-latest} +SYS_OCAML_VERSION=4.05 +# Default opam is the latest release of opam 2 +OPAM_VERSION=${OPAM_VERSION:-2} +OPAM_INIT=${OPAM_INIT:-true} +OCAML_BETA=${OCAML_BETA:-disable} + +OPAM_LATEST_RELEASE=2.0.7 + +case ${TRAVIS_CPU_ARCH:-amd64} in + amd64|notset) OPAM_ARCH=x86_64;; + arm64) OPAM_ARCH=arm64;; + *) echo "'$TRAVIS_CPU_ARCH' architecture not currently supported"; exit 1;; +esac + +case $OPAM_VERSION in + 2|2.0) OPAM_VERSION=$OPAM_LATEST_RELEASE;; + 1.*) echo "Opam version '$OPAM_VERSION' is not supported"; exit 1;; +esac + +if [ "$TRAVIS_OS_NAME" = "osx" ] ; then + brew update &> /dev/null + BREW_OPAM_VERSION=$(brew info opam --json=v1 | sed -e 's/.*"versions":{[^}]*"stable":"//' -e 's/".*//') + if [ "$OPAM_VERSION" != "$BREW_OPAM_VERSION" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Ignored OPAM_VERSION=$OPAM_VERSION; interpreted as \"$BREW_OPAM_VERSION\"" >&2 + echo -e "[\e[0;31mWARNING\e[0m] opam 2 is installed via Homebrew" >&2 + set -x + fi + OPAM_VERSION="$BREW_OPAM_VERSION" +fi + +if [ "$OPAM_VERSION" != "$OPAM_LATEST_RELEASE" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Out-of-date opam $OPAM_VERSION requested" >&2 + echo -e "[\e[0;31mWARNING\e[0m] Latest release is $OPAM_LATEST_RELEASE" >&2 + set -x +fi + +if [ "${INSTALL_LOCAL+x}" = x ] ; then + if [ "$TRAVIS_OS_NAME" = osx -o "$TRAVIS_OS_NAME" = freebsd ] ; then + echo INSTALL_LOCAL not permitted for macOS and FreeBSD targets + exit 1 + fi + + if [ "${OPAM_SWITCH:=ocaml-system}" != ocaml-system ] ; then + echo "INSTALL_LOCAL requires OPAM_SWITCH=ocaml-system (or unset/null)" + exit 1 + fi +fi + +# the base opam repository to use for bootstrapping and catch-all namespace +BASE_REMOTE=${BASE_REMOTE:-git://github.com/ocaml/opam-repository} + +# whether we need a new gcc and binutils +UPDATE_GCC_BINUTILS=${UPDATE_GCC_BINUTILS:-"0"} + +# Install Xenial remotes +UBUNTU_XENIAL=${UBUNTU_XENIAL:-"0"} + +# Install XQuartz on OSX +INSTALL_XQUARTZ=${INSTALL_XQUARTZ:-"false"} + +APT_UPDATED=0 + +add_ppa () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + APT_UPDATED=0 + sudo add-apt-repository --yes ppa:$1 + fi +} + +apt_install () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + if [ "$APT_UPDATED" -eq 0 ] ; then + APT_UPDATED=1 + sudo apt-get update -qq + fi + sudo apt-get install --no-install-recommends -y "$@" + fi +} + +install_ocaml () { + apt_install \ + ocaml ocaml-base ocaml-native-compilers ocaml-compiler-libs \ + ocaml-interp ocaml-base-nox ocaml-nox +} + +install_opam2 () { + case $TRAVIS_OS_NAME in + freebsd) + # Opam does not have any ready to use binaries for FreeBSD + sudo pkg install -qy ocaml-opam ;; + linux) + case $TRAVIS_DIST in + precise|trusty|xenial) + # Required for bubblewrap (supports arm64 & amd64) + add_ppa avsm/ppa ;; + esac + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + install_ocaml + fi + apt_install bubblewrap + sudo wget https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-linux -O /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + osx) + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + brew install ocaml + fi + sudo curl -fsSL https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-macos -o /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + esac +} + +install_ppa () { + add_ppa $1 + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + sudo apt-get -qq update + APT_UPDATED=1 + apt_install \ + "$(full_apt_version ocaml $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-native-compilers $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-compiler-libs $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-interp $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base-nox $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-nox $SYS_OCAML_VERSION)" + fi + apt_install opam +} + +install_on_freebsd () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +install_on_linux () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac + + XENIAL="deb mirror://mirrors.ubuntu.com/mirrors.txt xenial main restricted universe" + + if [ "$UPDATE_GCC_BINUTILS" != "0" ] ; then + echo "installing a recent gcc and binutils (mainly to get mirage-entropy-xen working!)" + sudo add-apt-repository "${XENIAL}" + sudo add-apt-repository --yes ppa:ubuntu-toolchain-r/test + sudo apt-get -qq update + sudo apt-get install -y gcc-5 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 90 + sudo add-apt-repository -r "${XENIAL}" + fi + + if [ "$UBUNTU_XENIAL" != "0" ] ; then + echo "Adding Ubuntu Xenial mirrors" + sudo add-apt-repository "${XENIAL}" + sudo apt-get -qq update + APT_UPDATED=1 + fi + + if [ "${INSTALL_LOCAL:=0}" != 0 ] ; then + ( set +x; echo -en "travis_fold:start:build.ocaml\r" ) 2>/dev/null + echo "Building a local OCaml; this may take a few minutes..." + wget "http://caml.inria.fr/pub/distrib/ocaml-${OCAML_FULL_VERSION%.*}/ocaml-$OCAML_FULL_VERSION.tar.gz" + tar -xzf "ocaml-$OCAML_FULL_VERSION.tar.gz" + cd "ocaml-$OCAML_FULL_VERSION" + ./configure -prefix /usr/local ${OCAML_CONFIGURE_ARGS:=--with-debug-runtime} + make world.opt + sudo make install + cd .. + rm -rf "ocaml-$OCAML_FULL_VERSION" + ( set +x; echo -en "travis_fold:end:build.ocaml\r" ) 2>/dev/null + fi +} + +install_on_osx () { + case $INSTALL_XQUARTZ in + true) + curl -OL "http://xquartz.macosforge.org/downloads/SL/XQuartz-2.7.6.dmg" + sudo hdiutil attach XQuartz-2.7.6.dmg + sudo installer -verbose -pkg /Volumes/XQuartz-2.7.6/XQuartz.pkg -target / + ;; + esac + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.0; + OPAM_SWITCH=${OPAM_SWITCH:-ocaml-system}; + brew install ocaml; + install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +case $TRAVIS_OS_NAME in + freebsd) install_on_freebsd ;; + osx) install_on_osx ;; + linux) install_on_linux ;; +esac + +ocaml_package=ocaml-base-compiler +if [ "$OCAML_BETA" = "enable" ]; then + ocaml_package=ocaml-variants +fi + +OPAM_SWITCH=${OPAM_SWITCH:-$ocaml_package.$OCAML_FULL_VERSION} + +PACKAGES="$OPAM_SWITCH" +case "$OCAML_VERSION" in + 3.12|4.00|4.01|4.02|4.03|4.04|4.05|4.06) + PACKAGES="$PACKAGES,ocaml-secondary-compiler";; +esac + +export OPAMYES=1 + +case $OPAM_INIT in + true) + opam init -a --bare "$BASE_REMOTE" + opam_repo_selection= + if [ "$OCAML_BETA" = "enable" ]; then + opam repo add --dont-select beta git://github.com/ocaml/ocaml-beta-repository.git + opam_repo_selection="--repo=default,beta" + fi + opam switch "$OPAM_SWITCH" || opam switch create $opam_repo_selection "$OPAM_SWITCH" --packages="$PACKAGES" + eval $(opam config env) + ;; +esac + +echo OCAML_VERSION=$OCAML_VERSION > .travis-ocaml.env +echo OPAM_SWITCH=$OPAM_SWITCH >> .travis-ocaml.env + +# Temporary fix an issue with opam-depext < 1.1.3 on FreeBSD. +# See https://github.com/ocaml/opam-depext/pull/123 +echo export ASSUME_ALWAYS_YES=YES >> .travis-ocaml.env + +if [ -x "$(command -v ocaml)" ]; then + # "|| true" is a temp fix for OCaml 4.12: https://github.com/ocaml/ocaml/pull/9798 + ocaml -version || true +else + echo "OCaml is not yet installed" +fi + +opam --version +opam --git-version diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..b24e3d6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,50 @@ +dist: xenial + +language: generic + +env: + - OCAML_VERSION=4.09 + - OCAML_VERSION=4.10 + - OCAML_VERSION=4.11 + +os: + - freebsd + - linux + - osx + +jobs: + include: + - os: linux + env: OCAML_VERSION=4.11 INSTALL_LOCAL=1 + +cache: + directories: + - $HOME/.opam + +addons: + apt: + packages: + - autoconf + - autotools-dev + - libtool + - pkg-config + - libffi-dev + homebrew: + packages: + - autoconf + - automake + - libtool + - pkg-config + - libffi + +install: + - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh + - bash -ex .travis-ocaml.sh + +script: + - opam pin -y -v -t add . + - ls -l $(opam var libdash:lib) + - cat $(opam var libdash:lib)/META + - opam exec -- make -C test test + - opam uninstall libdash # clear the cache + diff --git a/GUIDE.md b/GUIDE.md new file mode 100644 index 0000000..6723ef8 --- /dev/null +++ b/GUIDE.md @@ -0,0 +1,72 @@ +# The dash AST + +The dash AST itself is defined in `src/nodes.h` and `src/parser.h` + +- `union node` in `src/nodes.h` on line 146 for commands/statements + node.type type tag + + node.nbinary (AND, OR, SEMI) + +- special characters and codes in `parser.h` on lines 40-64 + + CTL* for control codes in words + breaks multibyte characters/UTF-8 :( + + VS* for variable format metadata + +The parser in `src/parser.c` is not easy to read, but is a good place +to see dash ASTs being constructed. + +Input sources come in a stack (to support, e.g., the `source`/`.` +command). Dash has some subtle invariants around its own string +allocation stack... it took quite some time to get it right in Smoosh, +so don't try to "optimize" things! (See +https://github.com/mgree/smoosh/pull/18 for test cases.) + +To get a gist for how they're used, look at `evaltree` at line 200 in +`src/eval.c`. To see how the special characters and codes are used, +see `argstr` at line 23 in `src/expand.c`. + +# OCaml bindings + +The core OCaml bindings are in `ocaml/dash.ml`. The bindings are +dynamically loaded by ctypes. (It's a longstanding TODO to make these +bindings static, as it would significantly simplify the build process.) + +Lines 69 through 233 are just copying the definitions from +`src/nodes.h`. + +The primary API entry point is `parse_next`, which returns one of a +few results: + + - `Done` when EOF (dash returns the special node `neof`, not `NULL`!) has been + reached for the current input. + + - `Error` when parsing failed (dash returns the special node `nerr`, + not `NULL`!). + + - `Null` when there was no command, e.g., a blank line (dash returns + `NULL` here). + + - `Parsed n` for some `node`, `n`. Note that `n` is a dash AST, + i.e., a ctypes structure. + +These nodes are dash AST nodes not yet a usable OCaml structure. + +# AST translation + +See `ocaml/ast.ml` (or Smoosh's `src/shim.ml` for a more +battle-hardened, nearly but not quite identical version of the same +code) for the `of_node` entry point that converts dash AST nodes to +OCaml structures. + +`parse_arg` is a funny a stack machine, best thought of as a for loop +with an explicit stack. There are some tricky extra bits of +information to track (i.e., when tildes are possible, whether we're in +an assignment). + +# General approach + +Call `Dash.initialize`, then `Dash.parse` with the string you +have. Call `Ast.of_node` on the resulting dash AST to get a nice OCaml +structure. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..04c5720 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,23 @@ +include COPYING Makefile.am autogen.sh configure.ac +graft src +exclude src/builtins.c src/builtins.h src/builtins.def +exclude src/dash +exclude src/init.c src/mkinit +exclude src/mknodes src/mksignames src/mksyntax +exclude src/nodes.c src/nodes.h +exclude src/signames.c +exclude src/syntax.c src/syntax.h +exclude src/token.h +prune src/.deps +prune src/bltin/.deps +prune src/mkinit.dSYM +prune src/mknodes.dSYM +prune src/mksignames.dSYM +prune src/mksyntax.dSYM +global-exclude *.dSYM *.o *.lo *.la *.py[cod] __pycache__ Makefile *.log .gitignore +prune src/.libs +prune ocaml/ +prune test/ +prune build/ +exclude python/rt.py +include libdash/libdash.so libdash/libdash.dylib \ No newline at end of file diff --git a/Makefile.am b/Makefile.am index af437a6..cb1807d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1 +1,4 @@ +ACLOCAL_AMFLAGS=-I m4 + SUBDIRS = src + diff --git a/README.md b/README.md new file mode 100644 index 0000000..a984246 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ +[![Main workflow](https://github.com/mgree/libdash/actions/workflows/build.yml/badge.svg)](https://github.com/mgree/libdash/actions/workflows/build.yml) + +*libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. + +The Python bindings are packaged as the [`libdash` PyPi package](https://pypi.org/project/libdash/). + +The OCaml bindings---packaged as the [`libdash` OPAM package](https://opam.ocaml.org/packages/libdash/)---include two executables, `shell_to_json` and `json_to_shell`, which let you conveniently parse POSIX shell scripts into a JSON AST. + +# What are the dependencies? + +The C code for dash should build on a wide variety of systems; it requires `libtool` and `autotools` (`aclocal`, `autoheader`, `automake`, `autoconf`). The library may not build on platforms with esoteric linkers; it's been tested on macOS and Linux. + +The Python and OCaml bindings depend on being able to build the C code. See `libdash.opam` for details on the OCaml code's dependencies, which includes the build-time external dependencies. Python wheels have no need for these build-time dependencies, but building from a Python source distribution will only succeed when `libtool` and `autotools` are present. + +The CI scripts (in `.github/workflows/build.yml`) give build details. + +## How to build `libdash` from source + +### Python + +Run `python3 setup.py install`. On macOS, you must first install the build dependencies via `brew install libtool autoconf automake`. + +You can test the Python bindings by running: + +``` +cd python; make test +``` + +### OCaml + +Install the OPAM file: `opam pin add .` or `opam install .`. This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. + +You can test the OCaml bindings by running: + +``` +cd ocaml; make test +``` + +### Testing + +The tests use `test/round_trip.sh` to ensure that every tester file in `test/tests` round-trips correctly through parsing and pretty printing. + +Additionally, you can run tests that compare the OCaml and Python implementations (after you've installed them both): + +``` +cd test; make +``` + +# How to use the parser + +For Python, see [`python/rt.py`](https://github.com/mgree/libdash/blob/master/python/rt.py), an example tool that does a round-trip: shell syntax to AST back to shell syntax. + +For OCaml, see [`ocaml/shell_to_json.ml`](https://github.com/mgree/libdash/blob/master/ocaml/shell_to_json.ml), a tool that parses shell syntax and produces JSON (using the [atdgen](https://opam.ocaml.org/packages/atdgen/) bindings). + +The ideal low-level interface to use is `parsecmd_safe` in `parser.c`; you'll need to ensure that dash's initialization routines have been called and that the stack marks are managed correctly. Parsing the POSIX shell is a complicated affair: beyond the usual locale issues, aliases affect the lexer, so one must use `setalias` and `unalias` to manage any aliases that ought to exist. + +# How work with the parsed nodes + +The general AST is described in `nodes.h`. There are some tricky invariants around the precise formatting of control codes; the OCaml code shows some examples of working with the `args` fields in `ocaml/ast.ml`, which converts the C AST to an OCaml AST. + +The OCaml tools `shell_to_json` and `json_to_shell` will produce JSON ASTs, allowing you to work with these ASTs in any language. + +# Pretty printing + +The pretty printer does its best to produce valid shell scripts, but it's possible to manually construct AST nodes that don't directly correspond to valid scripts. + +For example, the Python AST `[[['Q', [['C', 34]]]]]` represents a quoted field containing a double quote character. Translated literally, this would yield the string `"""`, which is not a valid shell script. The pretty printer will instead automatically escape the inner quote, rendering `"\""`. + +While the printer tries to get things right either way, you should use escapes to signal to the printer when to escape: you should use the Python AST `[[['Q', [['E', 34]]]]]` to mark the inner double quote as escaped. + +# Known issues + +We currently do not escape the character `!` (exclamation point). In an interactive shell, `!` is likely treated as a history substitution (and so should be escaped), but in a non-interactive shell, `!` is treated normally. We currently cater to non-interactive shells; eventually this behavior will be controllable. diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..59d8c47 --- /dev/null +++ b/TODO.md @@ -0,0 +1,8 @@ +- [x] clear out old C stuff +- [x] get roundtrips correct (fix OCaml pretty printing) +- [x] correct libdash.so installation (locally) +- [x] pip setup +- [x] testpypi setup +- [ ] version bump, fix CI +- [ ] pash pull request +- [ ] smoosh pull request diff --git a/autogen.sh b/autogen.sh index 9879c53..bbc5667 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,6 +1,7 @@ #!/bin/sh -aclocal \ +libtoolize \ +&& aclocal \ && autoheader \ && automake --add-missing \ && autoconf diff --git a/configure.ac b/configure.ac index 036730d..e456c7f 100644 --- a/configure.ac +++ b/configure.ac @@ -1,15 +1,20 @@ -AC_INIT([dash],[0.5.10.2]) +AC_INIT([dash],[0.5.13.3]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) - AC_CONFIG_HEADERS(config.h) +AC_CONFIG_MACRO_DIRS([m4]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) dnl Checks for programs. AC_PROG_CC AC_USE_SYSTEM_EXTENSIONS + +dnl AC_GNU_SOURCE AC_PROG_YACC +dnl MMG 2018-09-26 support building the library +AM_PROG_AR + AC_MSG_CHECKING([for build system compiler]) if test "$cross_compiling" = yes; then @@ -35,17 +40,20 @@ fi AC_ARG_ENABLE(static, AS_HELP_STRING(--enable-static, \ [Build statical linked program])) if test "$enable_static" = "yes"; then - export LDFLAGS="-static -Wl,--fatal-warnings" + export LDFLAGS="-static" fi AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--enable-fnmatch, \ [Use fnmatch(3) from libc])) AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) +AC_ARG_ENABLE(tee, AS_HELP_STRING(--disable-tee, [Do not use tee(2)])) +AC_ARG_ENABLE(memfd_create, AS_HELP_STRING(--disable-memfd-create, + [Do not use memfd_create(3)])) dnl Checks for libraries. dnl Checks for header files. -AC_CHECK_HEADERS(alloca.h paths.h) +AC_CHECK_HEADERS(alloca.h paths.h sys/wait.h) dnl Check for declarations AC_CHECK_DECL([_PATH_BSHELL],,AC_DEFINE_UNQUOTED([_PATH_BSHELL], "/bin/sh", [Define to system shell path]),[ @@ -88,9 +96,9 @@ AC_CHECK_DECL([PRIdMAX],, dnl Checks for library functions. AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \ - mempcpy \ + memfd_create memrchr mempcpy \ sigsetmask stpcpy strchrnul strsignal strtod strtoimax \ - strtoumax sysconf) + strtoumax sysconf tee wait3) dnl Check whether it's worth working around FreeBSD PR kern/125009. dnl The traditional behavior of access/faccessat is crazy, but @@ -132,6 +140,24 @@ if test "$use_fnmatch" = yes && test "$enable_glob" = yes; then AC_CHECK_FUNCS(glob) fi +if test "$enable_tee" != no; then + AC_CHECK_FUNCS(tee, use_tee=yes) +fi + +if test "$use_tee" = yes; then + AC_DEFINE([USE_TEE], [1], [Non-zero if tee(2) should be used]) +else + AC_DEFINE([USE_TEE], [0], [Non-zero if tee(2) should be used]) +fi + +if test "$enable_memfd_create" != no; then + AC_DEFINE([USE_MEMFD_CREATE], [1], + [Non-zero if memfd_create(3) should be used]) +else + AC_DEFINE([USE_MEMFD_CREATE], [0], + [Non-zero if memfd_create(3) should be used]) +fi + dnl Check for klibc signal. AC_CHECK_FUNC(signal) if test "$ac_cv_func_signal" != yes; then @@ -141,14 +167,27 @@ if test "$ac_cv_func_signal" != yes; then fi dnl Check for stat64 (dietlibc/klibc). -AC_CHECK_FUNC(stat64,, [ +AC_CHECK_DECL(stat64, AC_CHECK_FUNC(stat64)) +if test "$ac_cv_func_stat64" != yes; then AC_DEFINE(fstat64, fstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(lstat64, lstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(stat64, stat, [64-bit operations are the same as 32-bit]) +fi + +AC_CHECK_FUNC(glob64,, [ + AC_DEFINE(glob64_t, glob_t, [64-bit operations are the same as 32-bit]) + AC_DEFINE(glob64, glob, [64-bit operations are the same as 32-bit]) + AC_DEFINE(globfree64, globfree, + [64-bit operations are the same as 32-bit]) ]) +dnl OS X apparently has stat64 but not open64. AC_CHECK_FUNC(open64,, [ AC_DEFINE(open64, open, [64-bit operations are the same as 32-bit]) + AC_DEFINE(readdir64, readdir, + [64-bit operations are the same as 32-bit]) + AC_DEFINE(dirent64, dirent, + [64-bit operations are the same as 32-bit]) ]) dnl Check if struct stat has st_mtim. @@ -165,13 +204,25 @@ if test "$have_st_mtim" = "yes"; then [Define if your `struct stat' has `st_mtim']) fi +dnl F_DUPFD_CLOEXEC is a mandatory part of POSIX since Issue 7 +AC_MSG_CHECKING(for F_DUPFD_CLOEXEC) +AC_COMPILE_IFELSE( +[AC_LANG_PROGRAM([#include +#include ], +[return fcntl(0, F_DUPFD_CLOEXEC, 0)])], +have_dupfd_cloexec=1, have_dupfd_cloexec=0) +AC_MSG_RESULT($(expr yes \& $have_dupfd_cloexec \| no)) +AC_DEFINE_UNQUOTED([HAVE_F_DUPFD_CLOEXEC], [$have_dupfd_cloexec], + [Define to 1 your system supports F_DUPFD_CLOEXEC]) + AC_ARG_WITH(libedit, AS_HELP_STRING(--with-libedit, [Compile with libedit support])) use_libedit= if test "$with_libedit" = "yes"; then AC_CHECK_LIB(edit, history_init, [ AC_CHECK_HEADER([histedit.h], [use_libedit="yes"], AC_MSG_ERROR( - [Can't find required header files.]))]) + [Can't find required header files.]))], [ + AC_MSG_ERROR([Can't find libedit.])]) fi if test "$use_libedit" != "yes"; then AC_DEFINE([SMALL], 1, [Define if you build with -DSMALL]) @@ -183,5 +234,10 @@ AC_ARG_ENABLE(lineno, AS_HELP_STRING(--disable-lineno, \ if test "$enable_lineno" != "no"; then AC_DEFINE([WITH_LINENO], 1, [Define if you build with -DWITH_LINENO]) fi + +dnl MMG 2018-09-27 support building the shared library +AC_PROG_LIBTOOL +LT_INIT + AC_CONFIG_FILES([Makefile src/Makefile]) AC_OUTPUT diff --git a/dune b/dune new file mode 100644 index 0000000..ccc9ae6 --- /dev/null +++ b/dune @@ -0,0 +1,37 @@ +(data_only_dirs src) + +(rule + (deps (source_tree src) configure.ac Makefile.am) + (targets libdash.a dlldash.so + builtins.h nodes.h syntax.h token.h token_vars.h + ) + (action + (setenv CC "%{cc}" + (bash + "\ + \n set -e\ + \n if [ \"$(uname -s)\" = \"Darwin\" ]; then glibtoolize; else libtoolize; fi\ + \n aclocal && autoheader && automake --add-missing && autoconf\ + \n ./configure --prefix=\"$(pwd)\"\ + \n %{make}\ + \n %{make} install\ + \n cp lib/libdash.a libdash.a\ + \n cp lib/dlldash.so dlldash.so\ + \n cp src/{builtins,nodes,syntax,token,token_vars}.h .\ + \n")))) + +(subdir src + (rule + (deps ../builtins.h ../nodes.h ../syntax.h ../token.h ../token_vars.h) + (targets builtins.h nodes.h syntax.h token.h token_vars.h) + (action + (progn + (copy ../builtins.h builtins.h) + (copy ../nodes.h nodes.h) + (copy ../syntax.h syntax.h) + (copy ../token.h token.h) + (copy ../token_vars.h token_vars.h))))) + +(library + (name dash) + (foreign_archives dash)) diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..b100776 --- /dev/null +++ b/dune-project @@ -0,0 +1,23 @@ +(lang dune 3.12) +(name libdash) +(using ctypes 0.3) + +(source (github mgree/libdash)) +(license BSD-3-Clause) +(authors "Michael Greenberg") +(maintainers "michael@greenberg.science") + +(package + (name libdash) + (synopsis "Bindings to the dash shell's parser") + (depends + ("ctypes" (>= "0.21.1")) + ("ctypes-foreign" (>= "0.21.1")) + ("atdgen" (>= "2.15.0")) + ("atdgen-runtime" (>= "2.15.0")) + ("conf-autoconf" (>= 0.1)) + ("conf-aclocal" (>= 2)) + ("conf-automake" (>= 1)) + ("conf-libtool" (>= 1)))) + +(generate_opam_files) \ No newline at end of file diff --git a/dune-workspace b/dune-workspace new file mode 100644 index 0000000..42ee224 --- /dev/null +++ b/dune-workspace @@ -0,0 +1,4 @@ +(lang dune 3.12) +(env + (dev + (flags (:standard -warn-error -27)))) \ No newline at end of file diff --git a/ldconfig.sh b/ldconfig.sh new file mode 100755 index 0000000..334ac7b --- /dev/null +++ b/ldconfig.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +cd _build/lib + +trylink() { + [ -f "$2" ] || ln -sf $1 $2 +} + +trylink dlldash.so.0.0.0 dlldash.so +trylink dlldash.so.0.0.0 dlldash.so.0 + +trylink libdash.so.0.0.0 libdash.so +trylink libdash.so.0.0.0 libdash.so.0 + diff --git a/libdash.opam b/libdash.opam new file mode 100644 index 0000000..9278923 --- /dev/null +++ b/libdash.opam @@ -0,0 +1,34 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Bindings to the dash shell's parser" +maintainer: ["michael@greenberg.science"] +authors: ["Michael Greenberg"] +license: "BSD-3-Clause" +homepage: "https://github.com/mgree/libdash" +bug-reports: "https://github.com/mgree/libdash/issues" +depends: [ + "dune" {>= "3.12"} + "ctypes" {>= "0.21.1"} + "ctypes-foreign" {>= "0.21.1"} + "atdgen" {>= "2.15.0"} + "conf-autoconf" {>= "0.1"} + "conf-aclocal" {>= "2"} + "conf-automake" {>= "1"} + "conf-libtool" {>= "1"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/mgree/libdash.git" diff --git a/libdash/.gitignore b/libdash/.gitignore new file mode 100644 index 0000000..8749261 --- /dev/null +++ b/libdash/.gitignore @@ -0,0 +1,2 @@ +libdash.dylib +libdash.so diff --git a/libdash/__init__.py b/libdash/__init__.py new file mode 100644 index 0000000..19850a4 --- /dev/null +++ b/libdash/__init__.py @@ -0,0 +1,2 @@ +from .parser import parse +from .printer import to_string diff --git a/libdash/_dash.py b/libdash/_dash.py new file mode 100644 index 0000000..bb15190 --- /dev/null +++ b/libdash/_dash.py @@ -0,0 +1,335 @@ +from ctypes import * + + +# nodes.h +NCMD = 0; +NPIPE = 1; +NREDIR = 2; +NBACKGND = 3; +NSUBSHELL = 4; +NAND = 5; +NOR = 6; +NSEMI = 7; +NIF = 8; +NWHILE = 9; +NUNTIL = 10; +NFOR = 11; +NCASE = 12; +NCLIST = 13; +NDEFUN = 14; +NARG = 15; +NTO = 16; +NCLOBBER = 17; +NFROM = 18; +NFROMTO = 19; +NAPPEND = 20; +NTOFD = 21; +NFROMFD = 22; +NHERE = 23; +NXHERE = 24; +NNOT = 25; + + +# struct stackmark { +# struct stack_block *stackp; +# char *stacknxt; +# size_t stacknleft; +# }; +# +# We only care about getting the struct size correct, not the contents. +class stackmark (Structure): + _fields_ = [("stackp", c_void_p), + ("nxt", c_void_p), + ("size", c_size_t)]; + +def init_stack (libdash): + stack = create_string_buffer (sizeof (stackmark)); + + libdash.setstackmark.argtypes = [c_void_p]; # Pretend we don't know the contents + libdash.setstackmark.restypes = None; + libdash.setstackmark (stack); + + return (stack); + +def pop_stack (libdash, smark): + # Inefficient, we should only initialize once + + libdash.popstackmark.argtypes = [c_void_p]; # Again, hide the contents + libdash.popstackmark.restype = None; + + return (libdash.popstackmark (smark)); + + +def dash_init (libdash): + libdash.init.argtypes = []; + libdash.init.restype = None; + + libdash.init (); + + +def initialize_dash_errno (libdash): + libdash.initialize_dash_errno.argtypes = []; + libdash.initialize_dash_errno.restype = None; + + libdash.initialize_dash_errno (); + + +def initialize (libdash): + initialize_dash_errno (libdash); + dash_init (libdash); + + +def setinputtostdin (libdash): + libdash.setinputfd.argtypes = [c_int, c_int]; + libdash.setinputfd.restype = None; + + libdash.setinputfd (0, 0); + + +# TODO: allow push parameter +def setinputfile (libdash, filename): + libdash.setinputfile.argtypes = [c_char_p, c_int]; + libdash.setinputfile.restypes = c_int; + libdash.setinputfile (filename.encode ('utf-8'), 0); + + +def parsecmd_safe (libdash, interactive): + libdash.parsecmd_safe.argtypes = [c_int]; + libdash.parsecmd_safe.restype = c_void_p; + + return (libdash.parsecmd_safe (int (interactive))); + + +# Forward declarations to break recursive dependencies +class union_node (Union): + pass; + +class nodelist (Structure): + pass; + + +class ncmd (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("assign", POINTER (union_node)), + ("args", POINTER (union_node)), + ("redirect", POINTER (union_node))]; + +class npipe (Structure): + _fields_ = [("type", c_int), + ("backgnd", c_int), + ("cmdlist", POINTER (nodelist))]; + +class nredir (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("n", POINTER (union_node)), + ("redirect", POINTER (union_node))]; + +class nbinary (Structure): + _fields_ = [("type", c_int), + ("ch1", POINTER (union_node)), + ("ch2", POINTER (union_node))]; + +class nif (Structure): + _fields_ = [("type", c_int), + ("test", POINTER (union_node)), + ("ifpart", POINTER (union_node)), + ("elsepart", POINTER (union_node))]; + +class nfor (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("args", POINTER (union_node)), + ("body", POINTER (union_node)), + ("var", c_char_p)]; + +class ncase (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("expr", POINTER (union_node)), + ("cases", POINTER (union_node))]; + +class nclist (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("pattern", POINTER (union_node)), + ("body", POINTER (union_node))]; + +class ndefun (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("text", c_char_p), + ("body", POINTER (union_node))]; + +class narg (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("text", c_char_p), + ("backquote", POINTER (nodelist))]; + +class nfile (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("fd", c_int), + ("fname", POINTER (union_node)), + ("expfname", c_char_p)] + +class ndup (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("fd", c_int), + ("dupfd", c_int), + ("vname", POINTER (union_node))]; + +class nhere (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("fd", c_int), + ("doc", POINTER (union_node))]; + +class nnot (Structure): + _fields_ = [("type", c_int), + ("com", POINTER (union_node))]; + + +nodelist._fields_ = [("next", POINTER (nodelist)), + ("n", POINTER (union_node))]; + +union_node._fields_ = [("type", c_int), + ("ncmd", ncmd), + ("npipe", npipe), + ("nredir", nredir), + ("nbinary", nbinary), + ("nif", nif), + ("nfor", nfor), + ("ncase", ncase), + ("nclist", nclist), + ("ndefun", ndefun), + ("narg", narg), + ("nfile", nfile), + ("ndup", ndup), + ("nhere", nhere), + ("nnot", nnot)]; + + +class strpush (Structure): + pass; + +# struct strpush { +# struct strpush *prev; /* preceding string on stack */ +# char *prevstring; +# int prevnleft; +# struct alias *ap; /* if push was associated with an alias */ +# char *string; /* remember the string since it may change */ +# +# /* Remember last two characters for pungetc. */ +# int lastc[2]; +# +# /* Number of outstanding calls to pungetc. */ +# int unget; +# }; +strpush._fields_ = [("prev", POINTER (strpush)), + ("prevstring", c_char_p), + ("prevnleft", c_int), + ("ap", c_void_p), + ("string", c_char_p), + ("lastc", 2 * c_int), + ("unget", c_int)]; + +class parsefile (Structure): + pass; + +# struct parsefile { +# struct parsefile *prev; /* preceding file on stack */ +# int linno; /* current line */ +# int fd; /* file descriptor (or -1 if string) */ +# int nleft; /* number of chars left in this line */ +# int lleft; /* number of chars left in this buffer */ +# char *nextc; /* next char in buffer */ +# char *buf; /* input buffer */ +# struct strpush *strpush; /* for pushing strings at this level */ +# struct strpush basestrpush; /* so pushing one is fast */ +# +# /* Remember last two characters for pungetc. */ +# int lastc[2]; +# +# /* Number of outstanding calls to pungetc. */ +# int unget; +# }; +parsefile._fields_ = [("prev", POINTER (parsefile)), + ("linno", c_int), + ("fd", c_int), + ("nleft", c_int), + ("lleft", c_int), + ("nextc", POINTER (c_char)), # NOT c_char_p! + ("buf", c_char_p), + ("strpush", POINTER (strpush)), + ("basestrpush", strpush), + ("lastc", 2 * c_int), + ("unget", c_int)]; + + +# dash.ast +# let rec nodelist (n : nodelist structure ptr) : (node union ptr) list = +# if nullptr n +# then [] +# else (n @-> nodelist_n)::nodelist (n @-> nodelist_next) +def nodelist (nl): + snek = []; + + # ctypes has different semantics for POINTER vs. c_void_p + # See https://groups.google.com/g/nzpug/c/5CJxaWjuQro + while (nl): + snek.append (nl.contents.n); + nl = nl.contents.next; + + return snek; + + +def caselist (n): + cases = []; + + while (n): + nclist = n.contents.nclist; + + assert (nclist.type == 13); + + cases.append ((nclist.pattern, nclist.body)); + + n = nclist.next; + + return (cases); + + +def explode_rev (bytes): + charlist = explode (bytes); + charlist.reverse (); + + return (charlist); + + +def explode (bytes): + s = bytes.decode ("charmap"); + + charlist = []; + + for i in range (len (s)): + charlist.append (ord (s [i])); + + return (charlist); + + +def implode_rev (l): + s = implode (reversed (l)); + + return (s); + + +def implode (l): + s = ""; + + for c in l: + s = s + chr (c); + + return (s); diff --git a/libdash/ast.py b/libdash/ast.py new file mode 100644 index 0000000..7717624 --- /dev/null +++ b/libdash/ast.py @@ -0,0 +1,559 @@ +import os +import sys + +from ._dash import * + +# parser.h +CTLESC = 129 +CTLVAR = 130 +CTLENDVAR = 131 +CTLBACKQ = 132 +CTLARI = 134 +CTLENDARI = 135 +CTLQUOTEMARK = 136 + +# Internal use only +STACK_CTLVAR = 100 +STACK_CTLARI = 101 +STACK_CTLQUO = 102 + + +VAR_TYPES \ + = [ + "Normal", # 0x0 + "UNUSED", + "Minus", # 0x2 + "Plus", # 0x3 + "Question", # 0x4 + "Assign", # 0x5 + "TrimR", # 0x6 + "TrimRMax", # 0x7 + "TrimL", # 0x8 + "TrimLMax", # 0x9 + "Length" # 0xa + ] + + +SKIP_COMMAND = ["Command", [-1, [], [], []]] + +ORD_TILDE = ord ('~') +ORD_EQUALS = ord ('=') +ORD_MINUS = ord ('-') +ORD_COLON = ord (':') +ORD_SLASH = ord ('/') + + +def var_type (i): + return VAR_TYPES [i] + + +# Inline 'list (map (of_node, nodelist (nl)))' +def map_ofnode_nodelist (nl): + snek = [] + + # ctypes has different semantics for POINTER vs. c_void_p + # See https://groups.google.com/g/nzpug/c/5CJxaWjuQro + while (nl): + snek.append (of_node (nl.contents.n)) + nl = nl.contents.next + + return snek + + +def of_node (n_ptr): + if (not n_ptr): + return SKIP_COMMAND + else: + n = n_ptr.contents + +# print ("") +# print ("###" + str (n.type)) +# print ("") + + # 4412 0 NCMD + # 2442 7 NSEMI + # 517 8 NIF + # 255 12 NCASE + # 252 5 NAND + # 152 6 NOR + # 126 11 NFOR + # 119 14 NDEFUN + # 107 1 NPIPE + # 16 4 NSUBSHELL + # 14 9 NWHILE + # 4 2 NREDIR + # 2 10 NUNTIL + + if (n.type == NCMD): + return (["Command", + [n.ncmd.linno, + to_assigns (n.ncmd.assign), + to_args (n.ncmd.args), + redirs (n.ncmd.redirect)]]) + elif (n.type == NSEMI): + return ["Semi", of_binary (n)] + elif (n.type == NIF): + return (["If", + [of_node (n.nif.test), + of_node (n.nif.ifpart), + of_node (n.nif.elsepart)]]) + elif (n.type == NCASE): + cases_hashes = [] # Poetic + + for case in caselist (n.ncase.cases): + (pattern, body) = case + + current_case \ + = {'cpattern' : to_args (pattern), + 'cbody' : of_node (body)} + + cases_hashes.append (current_case) + + return (["Case", + [n.ncase.linno, + to_arg (n.ncase.expr.contents.narg), + cases_hashes]]) + elif (n.type == NAND): + return ["And", of_binary (n)] + elif (n.type == NOR): + return ["Or", of_binary (n)] + elif (n.type == NFOR): + return ["For", + [n.nfor.linno, + to_args (n.nfor.args), + of_node (n.nfor.body), + n.nfor.var.decode ("charmap")]] + elif (n.type == NDEFUN): + return ["Defun", + [n.ndefun.linno, + n.ndefun.text.decode ("charmap"), + of_node (n.ndefun.body)]] + elif (n.type == NPIPE): + return (["Pipe", + [n.npipe.backgnd != 0, + map_ofnode_nodelist (n.npipe.cmdlist)]]) + # list (map (of_node, nodelist (n.npipe.cmdlist)))]]) + elif (n.type == NSUBSHELL): + return ["Subshell", of_nredir (n)] + elif (n.type == NWHILE): + return ["While", of_binary (n)] + elif (n.type == NREDIR): + return ["Redir", of_nredir (n)] + elif (n.type == NUNTIL): + (t, b) = of_binary (n) + return ["While", [["Not", t], b]] + + elif (n.type == NBACKGND): + return ["Background", of_nredir (n)] + elif (n.type == NNOT): + return ["Not", of_node (n.nnot.com)] + else: + print ("Unexpected type") + sys.stdout.flush () + os.abort () + + +def of_nredir (n): + return ([n.nredir.linno, of_node (n.nredir.n), redirs (n.nredir.redirect)]) + + +def mk_file (ty, n): + arg = to_arg (n.nfile.fname.contents.narg) + + return ["File", [ty, n.nfile.fd, arg]] + + +def mk_dup (ty, n): + ndup = n.ndup + vname = ndup.vname + + tgt = [] + + if (not vname): + dupfd = ndup.dupfd + if (dupfd == -1): + tgt.append (["C", ORD_MINUS]) + else: + dupfd_str = str (dupfd) + + for i in range (len (dupfd_str)): + tgt.append (["C", ord (dupfd_str [i])]) + else: + tgt = to_arg (vname.contents.narg) + + return (["Dup", [ty, ndup.fd, tgt]]) + + +def mk_here (ty, n): + return ["Heredoc", [ty, n.nhere.fd, to_arg (n.nhere.doc.contents.narg)]] + + +def redirs (n_ptr): + rlist = [] + + while (n_ptr): + h = [] + + n = n_ptr.contents + + if (n.type == NTO): + h = mk_file ("To", n) + elif (n.type == NCLOBBER): + h = mk_file ("Clobber", n) + elif (n.type == NFROM): + h = mk_file ("From", n) + elif (n.type == NFROMTO): + h = mk_file ("FromTo", n) + elif (n.type == NAPPEND): + h = mk_file ("Append", n) + elif (n.type == NTOFD): + h = mk_dup ("ToFD", n) + elif (n.type == NFROMFD): + h = mk_dup ("FromFD", n) + elif (n.type == NHERE): + h = mk_here ("Here", n) + elif (n.type == NXHERE): + h = mk_here ("XHere", n) + else: + print ("unexpected node_type in redirlist") + os.abort () + + rlist.append (h) + + n_ptr = n.nfile.next + + return rlist + + +def of_binary (n): + return [of_node (n.nbinary.ch1), of_node (n.nbinary.ch2)] + + +def to_arg (narg): + s = explode_rev (narg.text) + bqlist = narg.backquote + stack = [] + + a = parse_arg (s, bqlist, stack) + + assert (len (s) == 0) + # assert (nullptr bqlist) +# if (bqlist): +# print ("bqlist is not null") +# print (bqlist) +# os.abort () + assert (len (stack) == 0) + + return (a) + + +def parse_arg (s, bqlist, stack): + acc = [] + + while (True): + s_len = len (s) + # stack_len = len (stack) + + # | [],[] -> [],[],bqlist,[] + if ((s_len == 0) and (len (stack) == 0)): + return (acc) + # | [],`CTLVar::_ -> failwith "End of string before CTLENDVAR" + + elif (s_len == 0): # We know that len (stack) > 0! + if (stack [-1] == STACK_CTLVAR): + print ("End of string before CTLENDVAR") + os.abort () + # | [],`CTLAri::_ -> failwith "End of string before CTLENDARI" + elif (stack [-1] == STACK_CTLARI): + print (s) + print (stack) + + print ("End of string before CTLENDARI") + os.abort () + # | [],`CTLQuo::_ -> failwith "End of string before CTLQUOTEMARK" + elif (stack [-1] == STACK_CTLQUO): + print (s) + print (stack) + + print ("End of string before CTLENDQUOTEMARK") + os.abort () + else: + print ("Invalid stack") + os.abort () + + else: # We know that len (s) > 0 + # (* CTLESC *) + # | '\129'::c::s,_ -> arg_char (E c) s bqlist stack + if ((s_len >= 2) and (s [-1] == CTLESC)): + s.pop () + c = s.pop () + + acc.append (["E", c]) + + # (* CTLVAR *) + # | '\130'::t::s,_ -> + elif ((s_len >= 2) and (s [-1] == CTLVAR)): + s.pop () + t = s.pop () + + # let var_name,s = split_at (fun c -> c = '=') s in + var_name = "" + while ((len (s) > 0) and (s [-1] != ORD_EQUALS)): + c = s.pop () + var_name = var_name + chr (c) + + v = [] + + if (((t & 0xf) == 0x1) and (len (s) >= 1) and (s [-1] == ORD_EQUALS)): + s.pop () + + v = ["V", ["Normal", False, var_name, []]] + elif (((t & 0xf) == 0xa) and (len (s) >= 2) and (s [-1] == ORD_EQUALS) and (s [-2] == 131)): + s.pop () + s.pop () + + v = ["V", ["Length", False, var_name, []]] + elif ((((t & 0xf) == 0x1) or ((t & 0xf) == 0xa)) and (len (s) >= 1)): + print ("Missing CTLENDVAR for VSNORMAL/VSLENGTH") + os.abort () + elif ((len (s) >= 1) and (s [-1] == ORD_EQUALS)): + s.pop () + + vstype = t & 0xf + + stack.append (STACK_CTLVAR) + + a = parse_arg (s, bqlist, stack) + + v = ["V", [var_type (vstype), (t & 0x10 == 0x10), var_name, a]] + elif (len (s) >= 1): + print (s) + print (stack) + + print ("Expected '=' terminating variable name") + os.abort () + elif (len (s) == 0): + print ("Expected '=' terminating variable name, found EOF") + os.abort () + else: + print ("This shouldn't be reachable") + os.abort () + + acc.append (v) + + # | '\130'::_, _ -> raise (ParseException "bad substitution (missing variable name in ${}?") + elif (False and (s [-1] == CTLVAR)): # Disable to match PaSH's version of libdash + print (s) + print (stack) + + print ("bad substitution (missing variable name in ${}?") + os.abort () + + # (* CTLENDVAR *) + # | '\131'::s,`CTLVar::stack' -> [],s,bqlist,stack' + elif (s [-1] == CTLENDVAR): + if (len (stack) >= 1): + if (stack [-1] == STACK_CTLVAR): + s.pop () + stack.pop () + + return (acc) + # | '\131'::_,`CTLAri::_ -> failwith "Saw CTLENDVAR before CTLENDARI" + elif (stack [-1] == STACK_CTLARI): + print ("Saw CTLENDVAR before CTLENDARI") + os.abort () + # | '\131'::_,`CTLQuo::_ -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" + elif (stack [-1] == STACK_CTLQUO): + print ("Saw CTLENDVAR before CTLQUOTEMARK") + os.abort () + # | '\131'::_,[] -> failwith "Saw CTLENDVAR outside of CTLVAR" + else: + print ("Saw CTLENDVAR outside of CTLVAR") + os.abort () + + # (* CTLBACKQ *) + # | '\132'::s,_ -> + elif (s [-1] == CTLBACKQ): + s.pop () + + if (not bqlist): + print (bqlist) + print ("Saw CTLBACKQ but bqlist was null") + os.abort () + else: + acc.append (["B", of_node (bqlist.contents.n)]) + + bqlist = bqlist.contents.next + + # (* CTLARI *) + # | '\134'::s,_ -> + elif (s [-1] == CTLARI): + s.pop () + + stack.append (STACK_CTLARI) + + a = parse_arg (s, bqlist, stack) + + # TODO: assert (stack = stack') + + acc.append (["A", a]) + + # (* CTLENDARI *) + # | '\135'::s,`CTLAri::stack' -> [],s,bqlist,stack' + elif (s [-1] == CTLENDARI): + if (len (stack) >= 1): + if (stack [-1] == STACK_CTLARI): + s.pop () + stack.pop () + + return (acc) + # | '\135'::_,`CTLVar::_' -> failwith "Saw CTLENDARI before CTLENDVAR" + elif (stack [-1] == STACK_CTLVAR): + print ("Saw CTLENDARI before CTLENDVAR") + os.abort () + # | '\135'::_,`CTLQuo::_' -> failwith "Saw CTLENDARI before CTLQUOTEMARK" + elif (stack [-1] == STACK_CTLQUO): + print ("Saw CTLENDARI before CTLQUOTEMARK") + os.abort () + # | '\135'::_,[] -> failwith "Saw CTLENDARI outside of CTLARI" + else: + print ("Saw CTLENDARI outside of CTLARI") + os.abort () + + # (* CTLQUOTEMARK *) + # | '\136'::s,`CTLQuo::stack' -> [],s,bqlist,stack' + elif (s [-1] == CTLQUOTEMARK): + if ((len (stack) >= 1) and (stack [-1] == STACK_CTLQUO)): + s.pop () + stack.pop () + + return (acc) + # | '\136'::s,_ -> + else: + s.pop () + stack.append (STACK_CTLQUO) + + a = parse_arg (s, bqlist, stack) + + acc.append (["Q", a]) + + # (* tildes *) + # | '~'::s,stack -> + elif (s [-1] == ORD_TILDE): + s.pop () + + if ((STACK_CTLQUO in stack) or (STACK_CTLARI in stack)): + acc.append (["C", ORD_TILDE]) + else: + uname = parse_tilde (s) + + acc.append (["T", uname]) + + # (* ordinary character *) + # | c::s,_ -> arg_char (C c) s bqlist stack + else: + c = s.pop () + + acc.append (["C", c]) + + +def stringOrNull (acc_str): + if (acc_str == ""): + return "None" + else: + return ["Some", acc_str] + + +def parse_tilde (s): + acc_str = "" + + while (True): + if (s == []): + return stringOrNull (acc_str) + else: + s_last = s [-1] + + if (s_last in [CTLESC, CTLVAR, CTLQUOTEMARK, CTLBACKQ, CTLARI]): + return ("None") + elif (s_last in [CTLENDVAR, CTLENDARI, ORD_COLON, ORD_SLASH]): + return (stringOrNull (acc_str)) + else: + c = s.pop () + acc_str = acc_str + chr (c) + + +def to_assign (a_rev): + v_str = "" + + while (len (a_rev) > 0): + if (a_rev [-1][0] != 'C'): + print ("Unexpected special character in assignment") + sys.stdout.flush () + os.abort () + + if (a_rev [-1][1] == ORD_EQUALS): + a_rev.pop () + + a_rev.reverse () + return (v_str, a_rev) + + # return (v_str, reversed (a_rev)) + else: + c = a_rev [-1][1] + a_rev.pop () + + v_str = v_str + chr (c) + + print ("Never found an '=' sign in assignment") + os.abort () + + +# Inlined to_args +# to_assigns n = List.map (to_assign []) (to_args n) +def to_assigns (n): + assigns = [] + + while (n): + if (n.contents.type != NARG): + print ("Unexpected type: " + n.contents.type) + sys.stdout.flush () + os.abort () + + arg = to_arg (n.contents.narg) + + arg.reverse () + assigns.append (to_assign (arg)) + + n = n.contents.narg.next + + return (assigns) + + +# to_assigns n = List.map (to_assign []) (to_args n) +def to_assigns_classic (n): + assigns = [] + + for a in (to_args (n)): + a.reverse () + assigns.append (to_assign (a)) + + return (assigns) + + +def to_args (n): + snek = [] + + # ctypes has different semantics for POINTER vs. c_void_p + # See https://groups.google.com/g/nzpug/c/5CJxaWjuQro + while (n): + if (n.contents.type != NARG): + print ("Unexpected type: " + n.contents.type) + sys.stdout.flush () + os.abort () + + arg = to_arg (n.contents.narg) + snek.append (arg) + + n = n.contents.narg.next + + return snek diff --git a/libdash/parser.py b/libdash/parser.py new file mode 100644 index 0000000..4db7808 --- /dev/null +++ b/libdash/parser.py @@ -0,0 +1,101 @@ +import os +import subprocess +from ctypes import * +from .ast import of_node +from ._dash import * + + +LIBDASH_LIBRARY_PATH = None +def libdash_library_path(): + global LIBDASH_LIBRARY_PATH + + if LIBDASH_LIBRARY_PATH is not None: + return LIBDASH_LIBRARY_PATH + + FILE_PATH = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) + LIBDASH_LIBRARY_PATH = os.path.join(FILE_PATH, "libdash.so") + return LIBDASH_LIBRARY_PATH + +EOF_NLEFT = -99; # libdash/src/input.c + +class ParsingException(Exception): + def __init__(self, message='ParseError'): + # Call the base class constructor with the parameters it needs + super(ParsingException, self).__init__(message) + +# This is a mix of dash.ml:parse_next and parse_to_json.ml. +def parse(inputPath, init=True): + """ + Parses the file at `inputPath` to an AST. + + `init` determines whether libdash should be initialized; set it to `False` after the first call. + """ + lines = [] + + libdash = CDLL(libdash_library_path()) + + if (init): + initialize(libdash) + + if (inputPath == "-"): + setinputtostdin(libdash) + else: + setinputfile(libdash, inputPath) + + fp = open (inputPath, 'r') + for line in fp: + lines.append (line) + fp.close() + + # struct parsefile *parsefile = &basepf; /* current input file */ + # Get the value of parsefile (not &parsefile)! + parsefile_ptr_ptr = addressof(parsefile.in_dll (libdash, "parsefile")) + parsefile_ptr = cast(parsefile_ptr_ptr, POINTER (POINTER (parsefile))) + parsefile_var = parsefile_ptr.contents + + smark = init_stack(libdash) + + NEOF = addressof(c_int.in_dll(libdash, "tokpushback")) + NERR = addressof(c_int.in_dll(libdash, "lasttoken")) + + while (True): + linno_before = parsefile_var.contents.linno - 1; # libdash is 1-indexed + + n_ptr_C = parsecmd_safe (libdash, False) + + linno_after = parsefile_var.contents.linno - 1; # libdash is 1-indexed + nleft_after = parsefile_var.contents.nleft + + if (n_ptr_C == None): # Dash.Null + pass + elif (n_ptr_C == NEOF): # Dash.Done + break + elif (n_ptr_C == NERR): # Dash.Error + raise ParsingException() + else: + if (nleft_after == EOF_NLEFT): + linno_after = linno_after + 1; # The last line wasn't counted + + if (inputPath != "-"): + ## Both of these assertions check "our" assumption with respect to the final parser state + ## and are therefore not necessary if they become an issue. + assert ((linno_after == len (lines)) or (linno_after == len (lines) + 1)) + + # Last line did not have a newline + assert (len (lines [-1]) > 0 and (lines [-1][-1] != '\n')) + elif nleft_after != 0: + # we formerly asserted that `nleft_after != 0`, but this no longer holds + linno_after = linno_after + 1; # The last line wasn't counted + + n_ptr = cast (n_ptr_C, POINTER (union_node)) + new_ast = of_node (n_ptr) + + if (inputPath != "-"): + parsedLines = "".join(lines[linno_before:linno_after]) + else: + ## When parsing from stdin there is no way to save the lines + parsedLines = None + + yield (new_ast, parsedLines, linno_before, linno_after) + + pop_stack(libdash, smark) diff --git a/libdash/printer.py b/libdash/printer.py new file mode 100644 index 0000000..d0e26ac --- /dev/null +++ b/libdash/printer.py @@ -0,0 +1,569 @@ +#!/usr/bin/python3 + +import os; + +STRING_OF_VAR_TYPE_DICT = { + "Normal" : "", + "Minus" : "-", + "Plus" : "+", + "Question" : "?", + "Assign" : "=", + "TrimR" : "%", + "TrimRMax" : "%%", + "TrimL" : "#", + "TrimLMax" : "##", + "Length" : "#" +}; + +UNQUOTED = 0 # everything escaped +QUOTED = 1 # only escape special characters +HEREDOC = 2 # like QUOTED, but _don't_ escape double quotes +QUOTE_MODES = [UNQUOTED, QUOTED, HEREDOC] + +# dash.ml +# +# let rec intercalate p ss = +# match ss with +# | [] -> "" +# | [s] -> s +# | s::ss -> s ^ p ^ intercalate p ss +def intercalate (p, ss): + str = p.join (ss); + +# str = ""; +# +# i = 0; +# for s in ss: +# if (i > 0): +# str = str + p; +# +# str = str + s; +# +# i = i + 1; + + return (str); + + +# dash.ml +# +# let braces s = "{ " ^ s ^ " ; }" +def braces (s): + return "{ " + s + " ; }"; + + +# dash.ml +# +# let parens s = "( " ^ s ^ " )" +def parens (s): + return "( " + s + " )"; + + +# let string_of_var_type = function +# | Normal -> "" +# | Minus -> "-" +# | Plus -> "+" +# | Question -> "?" +# | Assign -> "=" +# | TrimR -> "%" +# | TrimRMax -> "%%" +# | TrimL -> "#" +# | TrimLMax -> "##" +# | Length -> "#" +def string_of_var_type (var_type): + if (var_type in STRING_OF_VAR_TYPE_DICT): + return (STRING_OF_VAR_TYPE_DICT [var_type]); + + exit (1); + + +# let separated f l = intercalate " " (List.map f l) +def separated (f, l): + return " ".join (map (f, l)); + + +# let show_unless expected actual = +# if expected = actual +# then "" +# else string_of_int actual +def show_unless (expected, actual): + if (expected == actual): + return ""; + else: + return (str (actual)); + + +# let background s = "{ " ^ s ^ " & }" +def background (s): + return ("{ " + s + " & }"); + + +# let rec to_string = function +# | Command (_,assigns,cmds,redirs) -> +# separated string_of_assign assigns ^ +# (if List.length assigns = 0 || List.length cmds = 0 then "" else " ") ^ +# separated string_of_arg cmds ^ string_of_redirs redirs +# | Pipe (bg,ps) -> +# let p = intercalate " | " (List.map to_string ps) in +# if bg then background p else p +# | Redir (_,a,redirs) -> +# to_string a ^ string_of_redirs redirs +# | Background (_,a,redirs) -> +# (* we translate +# cmds... & +# to +# { cmds & } +# this avoids issues with parsing; in particular, +# cmd1 & ; cmd2 & ; cmd3 +# doesn't parse; it must be: +# cmd1 & cmd2 & cmd3 +# it's a little too annoying to track "was the last thing +# backgrounded?" so the braces resolve the issue. testing +# indicates that they're semantically equivalent. +# *) +# background (to_string a ^ string_of_redirs redirs) +# | Subshell (_,a,redirs) -> +# parens (to_string a ^ string_of_redirs redirs) +# | And (a1,a2) -> to_string a1 ^ " && " ^ to_string a2 +# | Or (a1,a2) -> to_string a1 ^ " || " ^ to_string a2 +# | Not a -> "! " ^ braces (to_string a) +# | Semi (a1,a2) -> to_string a1 ^ " ; " ^ to_string a2 +# | If (c,t,e) -> string_of_if c t e +# | While (Not t,b) -> +# "until " ^ to_string t ^ "; do " ^ to_string b ^ "; done " +# | While (t,b) -> +# "while " ^ to_string t ^ "; do " ^ to_string b ^ "; done " +# | For (_,a,body,var) -> +# "for " ^ var ^ " in " ^ string_of_arg a ^ "; do " ^ +# to_string body ^ "; done" +# | Case (_,a,cs) -> +# "case " ^ string_of_arg a ^ " in " ^ +# separated string_of_case cs ^ " esac" +# | Defun (_,name,body) -> name ^ "() {\n" ^ to_string body ^ "\n}" +def to_string(ast): + """ + Renders an AST back in shell syntax. + """ + # print (ast); + + if (len (ast) == 0): + pass; + else: + (type, params) = ast; + + if (type == "Command"): + (_, assigns, cmds, redirs) = params; + str = separated (string_of_assign, assigns); + if ((len (assigns) == 0) or (len (cmds) == 0)): + pass; + else: + str += " "; + str += separated (string_of_arg, cmds) + string_of_redirs (redirs); + + return (str); + elif (type == "Pipe"): + (bg, ps) = params; + p = intercalate (" | ", (map (to_string, ps))); + + if (bg): + return (background (p)); + else: + return (p); + elif (type == "Redir"): + (_, a, redirs) = params; + + return to_string (a) + string_of_redirs (redirs); + elif (type == "Background"): + (_, a, redirs) = params; + + return background (to_string (a) + string_of_redirs (redirs)); + elif (type == "Subshell"): + (_, a, redirs) = params; + + return parens (to_string (a) + string_of_redirs (redirs)); + elif (type == "And"): + (a1, a2) = params + + return braces(to_string(a1)) + " && " + braces(to_string(a2)) + elif (type == "Or"): + (a1, a2) = params + + return braces(to_string(a1)) + " || " + braces(to_string(a2)) + elif (type == "Not"): + (a) = params + + return "! " + braces(to_string(a)) + elif (type == "Semi"): + (a1, a2) = params + + return braces(to_string(a1)) + " \n " + braces(to_string(a2)) + elif (type == "If"): + (c, t, e) = params; + return string_of_if (c, t, e); + elif (type == "While"): + (first, b) = params; + + if (first [0] == "Not"): + (_, t) = first; + + return "until " + to_string (t) + "; do " + to_string (b) + "; done "; + else: + t = first; + + return "while " + to_string (t) + "; do " + to_string (b) + "; done "; + elif (type == "For"): + (_, a, body, var) = params; + + return "for " + var + " in " + separated (string_of_arg, a) + "; do " + \ + to_string (body) + "; done"; + elif (type == "Case"): + (_, a, cs) = params; + + return "case " + string_of_arg (a) + " in " + \ + separated (string_of_case, cs) + " esac"; + abort (); + elif (type == "Defun"): + (_, name, body) = params; + + return name + "() {\n" + to_string (body) + "\n}"; + else: + print ("Invalid type: %s" % type); + abort (); + + +# and string_of_if c t e = +# "if " ^ to_string c ^ +# "; then " ^ to_string t ^ +# (match e with +# | Command (-1,[],[],[]) -> "; fi" (* one-armed if *) +# | If (c,t,e) -> "; el" ^ string_of_if c t e +# | _ -> "; else " ^ to_string e ^ "; fi") +def string_of_if (c, t, e): + str1 = "if " + to_string (c) + \ + "; then " + to_string (t); + + # ['Command', [-1, [], [], []]] + if ( (len (e) == 2) \ + and (e [0] == "Command") \ + and (len (e [1]) == 4) \ + and (e [1][0] == -1)) \ + and (len (e [1][1]) == 0) \ + and (len (e [1][2]) == 0) \ + and (len (e [1][3]) == 0): + str1 = str1 + "; fi"; + elif ( e [0] == "If" \ + and (len (e [1]) == 3)): + (c2, t2, e2) = e [1]; + + str1 += "; el" + string_of_if (c2, t2, e2); + else: + str1 += "; else " + to_string (e) + "; fi"; + + return (str1); + + +# https://github.com/ocaml/ocaml/blob/trunk/stdlib/char.ml +# let escaped = function +# | '\'' -> "\\'" +# | '\\' -> "\\\\" +# | '\n' -> "\\n" +# | '\t' -> "\\t" +# | '\r' -> "\\r" +# | '\b' -> "\\b" +# | ' ' .. '~' as c -> +# let s = bytes_create 1 in +# bytes_unsafe_set s 0 c; +# unsafe_to_string s +# | c -> +# let n = code c in +# let s = bytes_create 4 in +# bytes_unsafe_set s 0 '\\'; +# bytes_unsafe_set s 1 (unsafe_chr (48 + n / 100)); +# bytes_unsafe_set s 2 (unsafe_chr (48 + (n / 10) mod 10)); +# bytes_unsafe_set s 3 (unsafe_chr (48 + n mod 10)); +# unsafe_to_string s +def escaped (param): + char = chr (param) + + if (char == "'"): + return "\\'"; + elif (char == "\\"): + return "\\\\"; + elif (char == "\n"): + return "\\n"; + elif (char == "\t"): + return "\\t"; + elif (char == "\r"): + return "\\r"; + elif (char == "\b"): + return "\\b"; + elif ((param >= ord (' ')) and (param <= ord ('~'))): + return char; + else: +# str1 = "\\" \ +# + chr (48 + int (param / 100)) \ +# + chr (48 + ((int (param / 10)) % 10)) \ +# + chr (48 + (param % 10)); + return ("\\" + str (param)); + + +# and string_of_arg_char = function +# | E '\'' -> "\\'" +# | E '\"' -> "\\\"" +# | E '(' -> "\\(" +# | E ')' -> "\\)" +# | E '{' -> "\\{" +# | E '}' -> "\\}" +# | E '$' -> "\\$" +# | E '!' -> "\\!" +# | E '&' -> "\\&" +# | E '|' -> "\\|" +# | E ';' -> "\\;" +# | C c -> String.make 1 c +# | E c -> Char.escaped c +# | T None -> "~" +# | T (Some u) -> "~" ^ u +# | A a -> "$((" ^ string_of_arg a ^ "))" +# | V (Length,_,name,_) -> "${#" ^ name ^ "}" +# | V (vt,nul,name,a) -> +# "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg a ^ "}" +# | Q a -> "\"" ^ string_of_arg a ^ "\"" +# | B t -> "$(" ^ to_string t ^ ")" +def string_of_arg_char (c, quote_mode=UNQUOTED): + (type, param) = c; + + if (type == "E"): + char = chr (param); + + ## MMG 2021-09-20 It might be safe to move everything except for " in the second list, but no need to do it if the tests pass + ## '!' dropped for bash non-interactive bash compatibility + ## Chars to escape unconditionally + chars_to_escape = ["'", '"', '`', '(', ')', '{', '}', '$', '&', '|', ';'] + ## Chars to escape only when not quoted + chars_to_escape_when_no_quotes = ['*', '?', '[', ']', '#', '<', '>', '~', ' '] + if char in chars_to_escape: + return '\\' + char + elif char in chars_to_escape_when_no_quotes and quote_mode==UNQUOTED: + return '\\' + char + else: + return escaped (param) + elif (type == "C"): + # HEREDOC should never escape double quotes per POSIX 2.7.4 + if quote_mode==QUOTED and chr(param) == '"': + return '\\"' + else: + return chr (param); + elif (type == "T"): + if (param == "None"): + return "~"; + elif (len (param) == 2): + if (param [0] == "Some"): + (_, u) = param; + + return "~" + u; + else: + abort (); + else: + print ("Unexpected param for T: %s" % param); + abort (); + elif (type == "A"): + return "$((" + string_of_arg (param, quote_mode) + "))"; + elif (type == "V"): + assert (len (param) == 4); + if (param [0] == "Length"): + (_, _, name, _) = param; + return "${#" + name + "}"; + else: + (vt, nul, name, a) = param; + + stri = "${" + name; + + # Depending on who generated the JSON, nul may be + # a string or a boolean! In Python, non-empty strings + # to True. + if (str (nul).lower () == "true"): + stri += ":"; + elif (str (nul).lower () == "false"): + pass; + else: + os.abort (); # For my own sanity + + stri += string_of_var_type (vt) + string_of_arg (a, quote_mode) + "}"; + + return stri; + elif (type == "Q"): + return "\"" + string_of_arg (param, quote_mode=QUOTED) + "\""; + elif (type == "B"): + body = to_string (param) + # to handle $( () ) + try: + if body[0] == "(" and body[-1] == ")": + body = f" {body} " + except IndexError: + pass + return "$(" + body + ")" + else: + abort (); + + +# and string_of_arg = function +# | [] -> "" +# | c :: a -> string_of_arg_char c ^ string_of_arg a +def string_of_arg (args, quote_mode=UNQUOTED): + # print (args); + + i = 0 + text = [] + while i < len(args): + c = string_of_arg_char(args[i], quote_mode=quote_mode) + if c == "$" and (i+1 < len(args)): + c = "\\$" + text.append(c) + + i = i+1 + + text = "".join(text) + + return (text); + + +# and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a +def string_of_assign (both): + (v, a) = both; + return v + "=" + string_of_arg (a); + + +# and string_of_case c = +# let pats = List.map string_of_arg c.cpattern in +# intercalate "|" pats ^ ") " ^ to_string c.cbody ^ ";;" +def string_of_case (c): + pats = map (string_of_arg, c ['cpattern']); + + return intercalate ("|", pats) + ") " + to_string (c ['cbody']) + ";;"; + + + +# let rec fresh_marker ls s = +# if List.mem s ls +# then fresh_marker ls (s ^ (String.sub s (String.length s - 1) 1)) +# else s +# +# OCaml implementation above is O(n^1.5). Algorithm below is linear. +def fresh_marker (heredoc): + respectsFound = set(); + + for line in heredoc.split ('\n'): + respects = 0; + + if ((len (line) > 2) and (line [0] == 'E') and (line [1] == 'O')): + for i in range (2, len (line)): + if (line [i] == 'F'): + respects = i - 2; + + respectsFound.add(respects); + + i = 0; + while (True): + if (not (i in respectsFound)): + return "EOF" + ("F" * i); + + i = i + 1; + + +# This version may give an unnecessarily long EOFFFF... (and therefore won't +# match the OCaml output but it is still correct w.r.t. giving a fresh +# marker, and uses less memory than fresh_marker above). +def fresh_marker0 (heredoc): + maxRespects = 0; + + for line in heredoc.split ('\n'): + respects = 0; + + if ((len (line) > 2) and (line [0] == 'E') and (line [1] == 'O')): + for i in range (2, len (line)): + if (line [i] == 'F'): + respects = i - 1; + + maxRespects = max (maxRespects, respects); + + return "EOF" + ("F" * maxRespects); + + +# and string_of_redir = function +# | File (To,fd,a) -> show_unless 1 fd ^ ">" ^ string_of_arg a +# | File (Clobber,fd,a) -> show_unless 1 fd ^ ">|" ^ string_of_arg a +# | File (From,fd,a) -> show_unless 0 fd ^ "<" ^ string_of_arg a +# | File (FromTo,fd,a) -> show_unless 0 fd ^ "<>" ^ string_of_arg a +# | File (Append,fd,a) -> show_unless 1 fd ^ ">>" ^ string_of_arg a + +# | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_arg tgt +# | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_arg tgt +# | Heredoc (t,fd,a) -> +# let heredoc = string_of_arg a in +# let marker = fresh_marker (lines heredoc) "EOF" in +# show_unless 0 fd ^ "<<" ^ +# (if t = XHere then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker ^ "\n" +def string_of_redir (redir): + assert (len (redir) == 2); + + (type, params) = redir; + if (type == "File"): + (subtype, fd, a) = params; + if (subtype == "To"): + return (show_unless (1, fd) + ">" + string_of_arg (a)); + elif (subtype == "Clobber"): + return (show_unless (1, fd) + ">|" + string_of_arg (a)); + elif (subtype == "From"): + return (show_unless (0, fd) + "<" + string_of_arg (a)); + elif (subtype == "FromTo"): + return (show_unless (0, fd) + "<>" + string_of_arg (a)); + elif (subtype == "Append"): + return (show_unless (1, fd) + ">>" + string_of_arg (a)); + else: + abort (); + elif (type == "Dup"): + (subtype, fd, tgt) = params; + + if (subtype == "ToFD"): + return (show_unless (1, fd) + ">&" + string_of_arg (tgt)); + elif (subtype == "FromFD"): + return (show_unless (0, fd) + "<&" + string_of_arg (tgt)); + else: + abort (); + elif (type == "Heredoc"): + (t, fd, a) = params; + + # MMG 2022-08-23 not quite correct + heredoc = string_of_arg (a, quote_mode=HEREDOC); + marker = fresh_marker0 (heredoc); + + stri = show_unless (0, fd) + "<<"; + if (t == "XHere"): + stri += marker; + else: + stri += "'" + marker + "'"; + + stri += "\n" + heredoc + marker + "\n"; + + return (stri); + else: + print ("Invalid type: %s" % type); + abort (); + + +# and string_of_redirs rs = +# let ss = List.map string_of_redir rs in +# (if List.length ss > 0 then " " else "") ^ intercalate " " ss +def string_of_redirs (rs): +# if (rs == []): +# return ""; +# +# ss = map (string_of_redir, rs); +# +# return intercalate (" ", ss); + + str = ""; + + for redir in rs: + str = str + " " + string_of_redir (redir); + + return (str); diff --git a/mk_dot_install.sh b/mk_dot_install.sh new file mode 100755 index 0000000..d76851d --- /dev/null +++ b/mk_dot_install.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +set -e + +libdash_files=$(ls _build/lib) +bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" + +lib_files= +for f in ${libdash_files} +do + lib_files="${lib_files} \"_build/lib/${f}\"" +done + +for f in ${bindings_files} +do + lib_files="${lib_files} \"ocaml/${f}\"" +done + +bin_files="\"ocaml/shell_to_json\" \"ocaml/json_to_shell\"" + +cat >libdash.install </dev/null 2>&1 && type shell_to_json >/dev/null 2>&1 || $(MAKE) json_to_shell shell_to_json + @find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.sh "$$f"; done | tee ocaml.log + @cat ocaml.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c + @grep ':' ocaml.log && echo "FAILED" && exit 1 || exit 0 + +clean : + rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa diff --git a/ocaml/ast.ml b/ocaml/ast.ml new file mode 100644 index 0000000..3abdfb2 --- /dev/null +++ b/ocaml/ast.ml @@ -0,0 +1,505 @@ +type linno = int + +exception ParseException of string + +type t = + | Command of (linno * assign list * args * redirection list) (* assign, args, redir *) + | Pipe of (bool * t list) (* background?, commands *) + | Redir of (linno * t * redirection list) + | Background of (linno * t * redirection list) + | Subshell of (linno * t * redirection list) + | And of (t * t) + | Or of (t * t) + | Not of (t) + | Semi of (t * t) + | If of (t * t * t) (* cond, then, else *) + | While of (t * t) (* test, body *) (* until encoded as a While . Not *) + | For of (linno * arg list * t * string) (* args, body, var *) + | Case of (linno * arg * case list) + | Defun of (linno * string * t) (* name, body *) + and assign = string * arg + and redirection = + | File of (redir_type * int * arg) + | Dup of (dup_type * int * arg) + | Heredoc of (heredoc_type * int * arg) + and redir_type = To | Clobber | From | FromTo | Append + and dup_type = ToFD | FromFD + and heredoc_type = Here | XHere (* for when in a quote... not sure when this comes up *) + and args = arg list + and arg = arg_char list + and arg_char = + | C of char + | E of char (* escape... necessary for expansion *) + | T of string option (* tilde *) + | A of arg (* arith *) + | V of (var_type * bool (* VSNUL? *) * string * arg) + | Q of arg (* quoted *) + | B of t (* backquote *) + and var_type = + | Normal + | Minus + | Plus + | Question + | Assign + | TrimR + | TrimRMax + | TrimL + | TrimLMax + | Length + and case = { cpattern : arg list; cbody : t } + +let var_type = function + | 0x0 -> (* VSNORMAL ${var} *) Normal + | 0x2 -> (* VSMINUS ${var-text} *) Minus + | 0x3 -> (* VSPLUS ${var+text} *) Plus + | 0x4 -> (* VSQUESTION ${var?message} *) Question + | 0x5 -> (* VSASSIGN ${var=text} *) Assign + | 0x6 -> (* VSTRIMRIGHT ${var%pattern} *) TrimR + | 0x7 -> (* VSTRIMRIGHTMAX ${var%%pattern} *) TrimRMax + | 0x8 -> (* VSTRIMLEFT ${var#pattern} *) TrimL + | 0x9 -> (* VSTRIMLEFTMAX ${var##pattern} *) TrimLMax + | 0xa -> (* VSLENGTH ${#var}) *) Length + | vs -> failwith ("Unknown VSTYPE: " ^ string_of_int vs) + +let string_of_var_type = function + | Normal -> "" + | Minus -> "-" + | Plus -> "+" + | Question -> "?" + | Assign -> "=" + | TrimR -> "%" + | TrimRMax -> "%%" + | TrimL -> "#" + | TrimLMax -> "##" + | Length -> "#" + +(* Some possible further simplifications: + + * Drop bool from pipe + dash *always* forks for a pipe, but sometimes it waits + * Drop redirection from Command, etc. + Just use Redir... though this may affect subshell behavior. + NCMD: expredir, pushredir, redirectsafe REDIR_PUSH|REDIR_SAVEFD2 + NREDIR: expredir, pushredir, redirectsafe REDIR_PUSH + NBACKGND: expredir, redirect 0 +*) + + +open Ctypes +open Dash + +let rec last = function + | [] -> None + | [x] -> Some x + | _::xs -> last xs + +let skip = Command (-1,[],[],[]) + +type quote_mode = + QUnquoted + | QQuoted + | QHeredoc + +let rec of_node (n : node union ptr) : t = + if nullptr n + then skip + else + match (n @-> node_type) with + (* NCMD *) + | 0 -> + let n = n @-> node_ncmd in + Command (getf n ncmd_linno, + to_assigns (getf n ncmd_assign), + to_args (getf n ncmd_args), + redirs (getf n ncmd_redirect)) + (* NPIPE *) + | 1 -> + let n = n @-> node_npipe in + Pipe (getf n npipe_backgnd <> 0, + List.map of_node (nodelist (getf n npipe_cmdlist))) + (* NREDIR *) + | 2 -> let (ty,fd,arg) = of_nredir n in Redir (ty,fd,arg) + (* NBACKGND *) + | 3 -> let (ty,fd,arg) = of_nredir n in Background (ty,fd,arg) + (* NSUBSHELL *) + | 4 -> let (ty,fd,arg) = of_nredir n in Subshell (ty,fd,arg) + (* NAND *) + | 5 -> let (l,r) = of_binary n in And (l,r) + (* NOR *) + | 6 -> let (l,r) = of_binary n in Or (l,r) + (* NSEMI *) + | 7 -> let (l,r) = of_binary n in Semi (l,r) + (* NIF *) + | 8 -> + let n = n @-> node_nif in + If (of_node (getf n nif_test), + of_node (getf n nif_ifpart), + of_node (getf n nif_elsepart)) + (* NWHILE *) + | 9 -> let (t,b) = of_binary n in While (t,b) + (* NUNTIL *) + | 10 -> let (t,b) = of_binary n in While (Not t,b) + (* NFOR *) + | 11 -> + let n = n @-> node_nfor in + For (getf n nfor_linno, + to_args (getf n nfor_args), + of_node (getf n nfor_body), + getf n nfor_var) + (* NCASE *) + | 12 -> + let n = n @-> node_ncase in + Case (getf n ncase_linno, + to_arg (getf n ncase_expr @-> node_narg), + List.map + (fun (pattern,body) -> + { cpattern = to_args pattern; + cbody = of_node body}) + (caselist (getf n ncase_cases))) + (* NDEFUN *) + | 14 -> + let n = n @-> node_ndefun in + Defun (getf n ndefun_linno, + getf n ndefun_text, + of_node (getf n ndefun_body)) + (* NNOT *) + | 25 -> Not (of_node (getf (n @-> node_nnot) nnot_com)) + | nt -> failwith ("Unexpected top level node_type " ^ string_of_int nt) + +and of_nredir (n : node union ptr) = + let n = n @-> node_nredir in + (getf n nredir_linno, of_node (getf n nredir_n), redirs (getf n nredir_redirect)) + +and redirs (n : node union ptr) = + if nullptr n + then [] + else + let mk_file ty = + let n = n @-> node_nfile in + File (ty,getf n nfile_fd,to_arg (getf n nfile_fname @-> node_narg)) in + let mk_dup ty = + let n = n @-> node_ndup in + let vname = getf n ndup_vname in + let tgt = + if nullptr vname + then let dupfd = getf n ndup_dupfd in + if dupfd = -1 + then [C '-'] + else List.map (fun c -> C c) (explode (string_of_int dupfd)) + else to_arg (vname @-> node_narg) + in + Dup (ty,getf n ndup_fd,tgt) in + let mk_here ty = + let n = n @-> node_nhere in + Heredoc (ty,getf n nhere_fd,to_arg (getf n nhere_doc @-> node_narg)) in + let h = match n @-> node_type with + (* NTO *) + | 16 -> mk_file To + (* NCLOBBER *) + | 17 -> mk_file Clobber + (* NFROM *) + | 18 -> mk_file From + (* NFROMTO *) + | 19 -> mk_file FromTo + (* NAPPEND *) + | 20 -> mk_file Append + (* NTOFD *) + | 21 -> mk_dup ToFD + (* NFROMFD *) + | 22 -> mk_dup FromFD + (* NHERE quoted heredoc---no expansion)*) + | 23 -> mk_here Here + (* NXHERE unquoted heredoc (param/command/arith expansion) *) + | 24 -> mk_here XHere + | nt -> failwith ("unexpected node_type in redirlist: " ^ string_of_int nt) + in + h :: redirs (getf (n @-> node_nfile) nfile_next) + +and of_binary (n : node union ptr) = + let n = n @-> node_nbinary in + (of_node (getf n nbinary_ch1), of_node (getf n nbinary_ch2)) + +and to_arg (n : narg structure) : arg = + let a,s,bqlist,stack = parse_arg ~assign:false (explode (getf n narg_text)) (getf n narg_backquote) [] in + (* we should have used up the string and have no backquotes left in our list *) + assert (s = []); + assert (nullptr bqlist); + assert (stack = []); + a + +and parse_arg ?tilde_ok:(tilde_ok=false) ~assign:(assign:bool) (s : char list) (bqlist : nodelist structure ptr) stack = + match s,stack with + | [],[] -> [],[],bqlist,[] + | [],`CTLVar::_ -> failwith "End of string before CTLENDVAR" + | [],`CTLAri::_ -> failwith "End of string before CTLENDARI" + | [],`CTLQuo::_ -> failwith "End of string before CTLQUOTEMARK" + (* CTLESC *) + | '\129'::c::s,_ -> arg_char assign (E c) s bqlist stack + (* CTLVAR *) + | '\130'::t::s,_ -> + let var_name,s = split_at (fun c -> c = '=') s in + let t = int_of_char t in + let v,s,bqlist,stack = match t land 0x0f, s with + (* VSNORMAL and VSLENGTH get special treatment + + neither ever gets VSNUL + VSNORMAL is terminated just with the =, without a CTLENDVAR *) + (* VSNORMAL *) + | 0x1,'='::s -> + V (Normal,false,implode var_name,[]),s,bqlist,stack + (* VSLENGTH *) + | 0xa,'='::'\131'::s -> + V (Length,false,implode var_name,[]),s,bqlist,stack + | 0x1,c::_ | 0xa,c::_ -> + failwith ("Missing CTLENDVAR for VSNORMAL/VSLENGTH, found " ^ Char.escaped c) + (* every other VSTYPE takes mods before CTLENDVAR *) + | vstype,'='::s -> + let a,s,bqlist,stack' = parse_arg ~tilde_ok:true ~assign s bqlist (`CTLVar::stack) in + V (var_type vstype,t land 0x10 = 0x10,implode var_name,a), s, bqlist, stack' + | _,c::_ -> failwith ("Expected '=' terminating variable name, found " ^ Char.escaped c) + | _,[] -> failwith "Expected '=' terminating variable name, found EOF" + in + arg_char assign v s bqlist stack + | '\130'::s, _ -> + (* original behavior *) + (* raise (ParseException "bad substitution (missing variable name in ${}?") *) + (* ignoring malformed stuff (e.g., from arrays) to behave the same as pash's python bindings *) + let a,s,bqlist,stack = parse_arg ~assign s bqlist stack in + (C '\194'::C '\130'::a,s,bqlist,stack) + + (* CTLENDVAR *) + | '\131'::s,`CTLVar::stack' -> [],s,bqlist,stack' + | '\131'::_,`CTLAri::_ -> failwith "Saw CTLENDVAR before CTLENDARI" + | '\131'::_,`CTLQuo::_ -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" + | '\131'::_,[] -> failwith "Saw CTLENDVAR outside of CTLVAR" + (* CTLBACKQ *) + | '\132'::s,_ -> + if nullptr bqlist + then failwith "Saw CTLBACKQ but bqlist was null" + else arg_char assign (B (of_node (bqlist @-> nodelist_n))) s (bqlist @-> nodelist_next) stack + (* CTLARI *) + | '\134'::s,_ -> + let a,s,bqlist,stack' = parse_arg ~assign s bqlist (`CTLAri::stack) in + assert (stack = stack'); + arg_char assign (A a) s bqlist stack' + (* CTLENDARI *) + | '\135'::s,`CTLAri::stack' -> [],s,bqlist,stack' + | '\135'::_,`CTLVar::_' -> failwith "Saw CTLENDARI before CTLENDVAR" + | '\135'::_,`CTLQuo::_' -> failwith "Saw CTLENDARI before CTLQUOTEMARK" + | '\135'::_,[] -> failwith "Saw CTLENDARI outside of CTLARI" + (* CTLQUOTEMARK *) + | '\136'::s,`CTLQuo::stack' -> [],s,bqlist,stack' + | '\136'::s,_ -> + let a,s,bqlist,stack' = parse_arg ~assign s bqlist (`CTLQuo::stack) in + assert (stack' = stack); + arg_char assign (Q a) s bqlist stack' + (* tildes *) + | '~'::s,stack -> + if List.exists (fun m -> m = `CTLQuo || m = `CTLAri) stack + then (* we're in arithmetic or double quotes, so tilde is ignored *) + arg_char assign (C '~') s bqlist stack + else + let _ = tilde_ok in (* unused? *) + let uname,s' = parse_tilde [] s in + arg_char assign (T uname) s' bqlist stack + (* ordinary character *) + | c::s,_ -> + arg_char assign (C c) s bqlist stack + +and parse_tilde acc s = + match s with + (* CTLESC, CTLVAR, CTLQUOTEMARK, CTLBACKQ, CTLARI: no tilde prefix *) + | '\129'::_ | '\130'::_ | '\132'::_ | '\134'::_ | '\136'::_ -> None, s + (* CTLENDVAR, CTLENDARI, /, :, EOF: terminate tilde prefix *) + | '\131'::_ | '\135'::_ + | ':'::_ | '/'::_ | [] -> + if acc = [] then (None, s) else (Some (implode acc), s) + (* ordinary char *) + (* TODO 2019-01-03 only characters from the portable character set *) + | c::s' -> parse_tilde (acc @ [c]) s' + +and arg_char assign c s bqlist stack = + let tilde_ok = + match c with + | C _ -> assign && (match last s with + | Some ':' -> true + | _ -> false) + | _ -> false + in + let a,s,bqlist,stack = parse_arg ~tilde_ok ~assign s bqlist stack in + (c::a,s,bqlist,stack) + +and extract_assign v = function + | [] -> failwith ("Never found an '=' sign in assignment, got " ^ implode (List.rev v)) + | '=' :: a -> (implode (List.rev v),a) + | '\129'::_ -> failwith "Unexpected CTLESC in variable name" + | '\130'::_ -> failwith "Unexpected CTLVAR in variable name" + | '\131'::_ -> failwith "Unexpected CTLENDVAR in variable name" + | '\132'::_ -> failwith "Unexpected CTLBACKQ in variable name" + | '\133'::_ -> failwith "Unexpected CTL??? in variable name" + | '\134'::_ -> failwith "Unexpected CTLARI in variable name" + | '\135'::_ -> failwith "Unexpected CTLENDARI in variable name" + | '\136'::_ -> failwith "Unexpected CTLQUOTEMARK in variable name" + | c :: a -> + extract_assign (c::v) a + +and to_assign (n : narg structure) : (string * arg) = + let (v,t) = extract_assign [] (explode (getf n narg_text)) in + let a,s,bqlist,stack = parse_arg ~tilde_ok:true ~assign:true t (getf n narg_backquote) [] in + (* we should have used up the string and have no backquotes left in our list *) + assert (s = []); + assert (nullptr bqlist); + assert (stack = []); + (v,a) + +and to_assigns n = + if nullptr n + then [] + else (assert (n @-> node_type = 15); + let n = n @-> node_narg in + to_assign n::to_assigns (getf n narg_next)) + +and to_args (n : node union ptr) : args = + if nullptr n + then [] + else (assert (n @-> node_type = 15); + let n = n @-> node_narg in + to_arg n::to_args (getf n narg_next)) + +let separated f l = intercalate " " (List.map f l) + +let show_unless expected actual = + if expected = actual + then "" + else string_of_int actual + +let background s = "{ " ^ s ^ " & }" + +let lines = Str.split (Str.regexp "[\n]+") + +let fresh_marker heredoc = + let eofs_in_line line = + if String.length line > 2 && String.get line 0 = 'E' && String.get line 1 == 'O' + then + try String.rindex line 'F' - 1 + with Not_found -> 0 + else 0 + in + let rec find_eofs lines max_fs = + match lines with + | [] -> max_fs + | line::lines -> find_eofs lines (max max_fs (eofs_in_line line)) + in + "EOF" ^ String.make (find_eofs heredoc 0) 'F' + +let rec to_string = function + | Command (_,assigns,cmds,redirs) -> + separated string_of_assign assigns ^ + (if List.length assigns = 0 || List.length cmds = 0 then "" else " ") ^ + separated string_of_arg cmds ^ string_of_redirs redirs + | Pipe (bg,ps) -> + let p = intercalate " | " (List.map to_string ps) in + if bg then background p else p + | Redir (_,a,redirs) -> + to_string a ^ string_of_redirs redirs + | Background (_,a,redirs) -> + (* we translate + cmds... & + to + { cmds & } + this avoids issues with parsing; in particular, + cmd1 & ; cmd2 & ; cmd3 + doesn't parse; it must be: + cmd1 & cmd2 & cmd3 + it's a little too annoying to track "was the last thing + backgrounded?" so the braces resolve the issue. testing + indicates that they're semantically equivalent. + *) + background (to_string a ^ string_of_redirs redirs) + | Subshell (_,a,redirs) -> + parens (to_string a ^ string_of_redirs redirs) + | And (a1,a2) -> braces (to_string a1) ^ " && " ^ braces (to_string a2) + | Or (a1,a2) -> braces (to_string a1) ^ " || " ^ braces (to_string a2) + | Not a -> "! " ^ braces (to_string a) + | Semi (a1,a2) -> braces (to_string a1) ^ " \n " ^ braces (to_string a2) + | If (c,t,e) -> string_of_if c t e + | While (Not t,b) -> + "until " ^ to_string t ^ "; do " ^ to_string b ^ "; done " + | While (t,b) -> + "while " ^ to_string t ^ "; do " ^ to_string b ^ "; done " + | For (_,a,body,var) -> + "for " ^ var ^ " in " ^ separated string_of_arg a ^ "; do " ^ + to_string body ^ "; done" + | Case (_,a,cs) -> + "case " ^ string_of_arg a ^ " in " ^ + separated string_of_case cs ^ " esac" + | Defun (_,name,body) -> name ^ "() {\n" ^ to_string body ^ "\n}" + +and string_of_if c t e = + "if " ^ to_string c ^ + "; then " ^ to_string t ^ + (match e with + | Command (-1,[],[],[]) -> "; fi" (* one-armed if *) + | If (c,t,e) -> "; el" ^ string_of_if c t e + | _ -> "; else " ^ to_string e ^ "; fi") + +and string_of_arg_char ?quote_mode:(quote_mode=QUnquoted) = function + | E c -> + (* removed ! from chars_to_escape to have the right behavior in non-interactive shells *) + let chars_to_escape = "'\"`(){}$&|;" in + let chars_to_escape_when_no_quotes = "*?[]#<>~ " in + if String.contains chars_to_escape c + then "\\" ^ String.make 1 c + else if String.contains chars_to_escape_when_no_quotes c && quote_mode=QUnquoted + then "\\" ^ String.make 1 c + else Char.escaped c + | C '"' when quote_mode=QQuoted -> "\\\"" + | C c -> String.make 1 c + | T None -> "~" + | T (Some u) -> "~" ^ u + | A a -> "$((" ^ string_of_arg ~quote_mode a ^ "))" + | V (Length,_,name,_) -> "${#" ^ name ^ "}" + | V (vt,nul,name,a) -> + "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg ~quote_mode a ^ "}" + | Q a -> "\"" ^ string_of_arg ~quote_mode:QQuoted a ^ "\"" + | B t -> + let s = to_string t in + if String.length s >= 2 && s.[0] = '(' && s.[String.length s - 1] = ')' then + "$( " ^ s ^ " )" + else + "$(" ^ s ^ ")" +and string_of_arg ?quote_mode:(quote_mode=QUnquoted) = function + | [] -> "" + | c :: a -> + let char = string_of_arg_char ~quote_mode c in + if char = "$" && a <> [] + then "\\$" ^ string_of_arg ~quote_mode a + else char ^ string_of_arg ~quote_mode a + +and next_is_escaped = function + | E _ :: _ -> true + | _ -> false + +and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a + +and string_of_case c = + let pats = List.map string_of_arg c.cpattern in + intercalate "|" pats ^ ") " ^ to_string c.cbody ^ ";;" + +and string_of_redir = function + | File (To,fd,a) -> show_unless 1 fd ^ ">" ^ string_of_arg a + | File (Clobber,fd,a) -> show_unless 1 fd ^ ">|" ^ string_of_arg a + | File (From,fd,a) -> show_unless 0 fd ^ "<" ^ string_of_arg a + | File (FromTo,fd,a) -> show_unless 0 fd ^ "<>" ^ string_of_arg a + | File (Append,fd,a) -> show_unless 1 fd ^ ">>" ^ string_of_arg a + | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_arg tgt + | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_arg tgt + | Heredoc (t,fd,a) -> + let heredoc = string_of_arg ~quote_mode:QHeredoc a in + let marker = fresh_marker (lines heredoc) in + show_unless 0 fd ^ "<<" ^ + (if t = XHere then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker ^ "\n" + +and string_of_redirs rs = + let ss = List.map string_of_redir rs in + (if List.length ss > 0 then " " else "") ^ intercalate " " ss diff --git a/ocaml/ast.mli b/ocaml/ast.mli new file mode 100644 index 0000000..bac58ff --- /dev/null +++ b/ocaml/ast.mli @@ -0,0 +1,57 @@ +type linno = int + +exception ParseException of string + +type t = + Command of (linno * assign list * args * redirection list) + | Pipe of (bool * t list) + | Redir of (linno * t * redirection list) + | Background of (linno * t * redirection list) + | Subshell of (linno * t * redirection list) + | And of (t * t) + | Or of (t * t) + | Not of (t) + | Semi of (t * t) + | If of (t * t * t) + | While of (t * t) + | For of (linno * arg list * t * string) + | Case of (linno * arg * case list) + | Defun of (linno * string * t) +and assign = string * arg +and redirection = + File of (redir_type * int * arg) + | Dup of (dup_type * int * arg) + | Heredoc of (heredoc_type * int * arg) +and redir_type = To | Clobber | From | FromTo | Append +and dup_type = ToFD | FromFD +and heredoc_type = Here | XHere +and args = arg list +and arg = arg_char list +and arg_char = + C of char + | E of char + | T of string option + | A of arg + | V of (var_type * bool * string * arg) + | Q of arg + | B of t +and var_type = + Normal + | Minus + | Plus + | Question + | Assign + | TrimR + | TrimRMax + | TrimL + | TrimLMax + | Length +and case = { cpattern : arg list; cbody : t; } + +val of_node : Dash.node Ctypes.union Ctypes.ptr -> t + +(* command that does nothing *) +val skip : t + +(* render to string *) +val to_string : t -> string diff --git a/ocaml/ast_atd.atd b/ocaml/ast_atd.atd new file mode 100644 index 0000000..989789b --- /dev/null +++ b/ocaml/ast_atd.atd @@ -0,0 +1,78 @@ +type char = int + +type linno = int + +type t = [ + Command of (linno * assign list * args * redirection list) (* assign, args, redir *) + | Pipe of (bool * t list) (* background?, commands *) + | Redir of (linno * t * redirection list) + | Background of (linno * t * redirection list) + | Subshell of (linno * t * redirection list) + | And of (t * t) + | Or of (t * t) + | Not of t + | Semi of (t * t) + | If of (t * t * t) (* cond, then, else *) + | While of (t * t) (* test, body *) (* until encoded as a While . Not *) + | For of (linno * arg list * t * string) (* args, body, var *) + | Case of (linno * arg * case list) + | Defun of (linno * string * t) (* name, body *) +] + +type assign = (string * arg) + +type redirection = [ + File of (redir_type * int * arg) + | Dup of (dup_type * int * arg) + | Heredoc of (heredoc_type * int * arg) +] + +type redir_type = [ + To + | Clobber + | From + | FromTo + | Append +] + +type dup_type = [ + ToFD + | FromFD +] + +type heredoc_type = [ + Here + | XHere (* for when in a quote... not sure when this comes up *) +] + +type args = arg list + +type arg = arg_char list + +type arg_char = [ + C of char + | E of char (* escape... necessary for expansion *) + | T of string option (* tilde *) + | A of arg (* arith *) + | V of (var_type * bool (* VSNUL? *) * string * arg) + | Q of arg (* quoted *) + | B of t (* backquote *) +] + +type var_type = [ + Normal + | Minus + | Plus + | Question + | Assign + | TrimR + | TrimRMax + | TrimL + | TrimLMax + | Length +] + +type case = { + cpattern : arg list; + cbody : t +} \ No newline at end of file diff --git a/ocaml/dash.ml b/ocaml/dash.ml new file mode 100644 index 0000000..65215e9 --- /dev/null +++ b/ocaml/dash.ml @@ -0,0 +1,346 @@ +open Ctypes +include Cdash.Functions +include Cdash.Types + +(* First, some dash trivia. *) + +type stackmark_t = Stackmark.stackmark + +let init_stack () : stackmark = + let stack = Ctypes.make stackmark in + setstackmark (addr stack); + stack + +let pop_stack stack : unit = + popstackmark (addr stack) + +let initialize () : unit = + initialize_dash_errno (); + dash_init () + +let setinputtostdin () : unit = + setinputfd 0 0 (* don't bother pushing the file *) + +let setinputfile ?push:(push=false) (s : string) : unit = + let _ = raw_setinputfile s (if push then 1 else 0) in + () + +let setvar (x : string) (v : string) : unit = + let _ = raw_setvar x v 0 in + () + +let addrof p = raw_address_of_ptr (to_voidp p) + +let eqptr p1 p2 = addrof p1 = addrof p2 + +let nullptr (p : 'a ptr) = addrof p = Nativeint.zero + +type parse_result = Done | Error | Null | Parsed of (node union ptr) + +let parse_next ?interactive:(i=false) () = + let n = parsecmd_safe (if i then 1 else 0) in + if eqptr n neof + then Done + else if eqptr n nerr + then Error + else if nullptr n + then Null (* comment or blank line or error ... *) + else Parsed n + +let (@->) (s : ('b, 'c) structured ptr) (f : ('a, ('b, 'c) structured) field) = + getf (!@ s) f + +let rec arglist (n : narg structure) : (narg structure) list = + let next = getf n narg_next in + if nullptr next + then [n] + else + (assert (next @-> node_type = 15); + n::arglist (next @-> node_narg)) + +let rec nodelist (n : nodelist structure ptr) : (node union ptr) list = + if nullptr n + then [] + else (n @-> nodelist_n)::nodelist (n @-> nodelist_next) + +let rec redirlist (n : node union ptr) = + if nullptr n + then [] + else + let h = match n @-> node_type with + (* NTO *) + | 16 -> `File (1,">",n @-> node_nfile) + (* NCLOBBER *) + | 17 -> `File (1,">|",n @-> node_nfile) + (* NFROM *) + | 18 -> `File (0,"<",n @-> node_nfile) + (* NFROMTO *) + | 19 -> `File (0,"<>",n @-> node_nfile) + (* NAPPEND *) + | 20 -> `File (1,">>",n @-> node_nfile) + (* NTOFD *) + | 21 -> `Dup (1,">&",n @-> node_ndup) + (* NFROMFD *) + | 22 -> `Dup (0,"<&",n @-> node_ndup) + (* NHERE quoted heredoc---no expansion)*) + | 23 -> `Here (0,"<<",false,n @-> node_nhere) + (* NXHERE unquoted heredoc (param/command/arith expansion) *) + | 24 -> `Here (0,"<<",true,n @-> node_nhere) + | nt -> failwith ("unexpected node_type in redirlist: " ^ string_of_int nt) + in + h :: redirlist (getf (n @-> node_nfile) nfile_next) + +let rec caselist (n : node union ptr) = + if nullptr n + then [] + else + let n = n @-> node_nclist in + assert (getf n nclist_type = 13); (* NCLIST *) + (getf n nclist_pattern, getf n nclist_body)::caselist (getf n nclist_next) + +let explode s = + let rec exp i l = + if i < 0 then l else exp (i - 1) (s.[i] :: l) in + exp (String.length s - 1) [] + +let implode l = + let s = Bytes.create (List.length l) in + let rec imp i l = + match l with + | [] -> () + | (c::l) -> (Bytes.set s i c; imp (i+1) l) + in + imp 0 l; + Bytes.unsafe_to_string s + +let rec intercalate p ss = + match ss with + | [] -> "" + | [s] -> s + | s::ss -> s ^ p ^ intercalate p ss + +let lines = Str.split (Str.regexp "[\n\r]+") + +let rec fresh_marker ls s = + if List.mem s ls + then fresh_marker ls (s ^ (String.sub s (String.length s - 1) 1)) + else s + +let rec split_at p xs = + match xs with + | [] -> ([],[]) + | x::xs -> + if p x + then ([],x::xs) + else let (xs,ys) = split_at p xs in + (x::xs, ys) + +let string_of_vs = function + | 0x1 -> (* VSNORMAL ${var} *) [] + | 0x2 -> (* VSMINUS ${var-text} *) ['-'] + | 0x3 -> (* VSPLUS ${var+text} *) ['+'] + | 0x4 -> (* VSQUESTION ${var?message} *) ['?'] + | 0x5 -> (* VSASSIGN ${var=text} *) ['='] + | 0x6 -> (* VSTRIMRIGHT ${var%pattern} *) ['%'] + | 0x7 -> (* VSTRIMRIGHTMAX ${var%%pattern} *) ['%';'%'] + | 0x8 -> (* VSTRIMLEFT ${var#pattern} *) ['#'] + | 0x9 -> (* VSTRIMLEFTMAX ${var##pattern} *) ['#';'#'] + | vs -> failwith ("Unknown VSTYPE: " ^ string_of_int vs) + +let braces s = "{ " ^ s ^ " ; }" +let parens s = "( " ^ s ^ " )" + +let rec show (n : node union ptr) : string = + match (n @-> node_type) with + (* NCMD *) + | 0 -> + let n = n @-> node_ncmd in + let raw_cmd = intercalate " " (List.map sharg (arglist (getf n ncmd_args @-> node_narg))) in + let vars = if nullptr (getf n ncmd_assign) then "" else intercalate " " (List.map sharg (arglist (getf n ncmd_assign @-> node_narg))) ^ " " in + vars ^ raw_cmd ^ shredir (getf n ncmd_redirect) + (* NPIPE *) + | 1 -> + let n = n @-> node_npipe in + let cmds = nodelist (getf n npipe_cmdlist) in + intercalate " | " (List.map show cmds) ^ if (getf n npipe_backgnd) = 0 then "" else " &" + (* NREDIR *) + | 2 -> shnredir braces n + (* NBACKGND *) + | 3 -> shnredir braces n ^ " &" + (* NSUBSHELL *) + | 4 -> shnredir parens n + (* NAND *) + | 5 -> shbinary "&&" (n @-> node_nbinary) + (* NOR *) + | 6 -> shbinary "||" (n @-> node_nbinary) + (* NSEMI *) + | 7 -> shbinary ";" (n @-> node_nbinary) + (* NIF *) + | 8 -> shif (n @-> node_nif) + (* NWHILE *) + | 9 -> + let n = n @-> node_nbinary in + "while " ^ show (getf n nbinary_ch1) ^ "; do " ^ show (getf n nbinary_ch2) ^ "; done" + (* NUNTIL *) + | 10 -> + let n = n @-> node_nbinary in + "until " ^ show (getf n nbinary_ch1) ^ "; do " ^ show (getf n nbinary_ch2) ^ "; done" + (* NFOR *) + | 11 -> + let n = n @-> node_nfor in + "for " ^ (getf n nfor_var) ^ " in " ^ intercalate " " (List.map sharg (arglist (getf n nfor_args @-> node_narg))) ^ "; do " ^ show (getf n nfor_body) ^ "; done" + (* NCASE *) + | 12 -> + let n = n @-> node_ncase in + "case " ^ sharg (getf n ncase_expr @-> node_narg) ^ " in " ^ shclist (getf n ncase_cases) ^ " esac" + (* NDEFUN *) + | 14 -> + let n = n @-> node_ndefun in + (getf n ndefun_text) ^ "() " ^ braces (show (getf n ndefun_body)) + (* NARG *) + | 15 -> failwith "Didn't expect narg at the top-level" + (* NNOT *) + | 25 -> "! { " ^ show (getf (n @-> node_nnot) nnot_com) ^ " }" + | nt -> failwith ("unexpected node_type " ^ string_of_int nt) + +and shbinary (op : string) (n : nbinary structure) : string = + show (getf n nbinary_ch1) ^ " " ^ op ^ " " ^ show (getf n nbinary_ch2) + +and shnredir parenthesize n = + let nr = n @-> node_nredir in + parenthesize (show (getf nr nredir_n)) ^ shredir (getf nr nredir_redirect) + +and shif n = + "if " ^ show (getf n nif_test) ^ + "; then " ^ show (getf n nif_ifpart) ^ + (let else_part = getf n nif_elsepart in + if nullptr else_part + then "; fi" + else if (else_part @-> node_type = 8) + then "; el" ^ shif (else_part @-> node_nif) + else "; else " ^ show else_part ^ "; fi") + +and shclist clist = intercalate " " (List.map shcase (caselist clist)) (* handles NCLIST = 13 *) + +and shcase (pat,body) = + assert (pat @-> node_type = 15); + sharg (pat @-> node_narg) ^ ") " ^ show body ^ ";;" + +and shredir (n : node union ptr) : string = + let redirs = redirlist n in + if redirs = [] + then "" + else " " ^ intercalate " " (List.map show_redir redirs) +and show_redir n : string = + match n with + | `File (src,sym,f) -> show_redir_src (getf f nfile_fd) src ^ sym ^ sharg ((getf f nfile_fname) @-> node_narg) + | `Dup (src,sym,d) -> + let vname = getf d ndup_vname in + let tgt = + if nullptr vname + then string_of_int (getf d ndup_dupfd) + else sharg (vname @-> node_narg) + in + show_redir_src (getf d ndup_fd) src ^ sym ^ tgt + | `Here (src,sym,exp,h) -> + let heredoc = sharg ((getf h nhere_doc) @-> node_narg) in + let marker = fresh_marker (lines heredoc) "EOF" in + show_redir_src (getf h nhere_fd) src ^ sym ^ (if exp then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker +and show_redir_src actual expected = + if actual = expected + then "" + else string_of_int actual + +and sharg (n : narg structure) : string = + let str,s',bqlist,stack = show_arg (explode (getf n narg_text)) (getf n narg_backquote) [] in + (* we should have used up the string and have no backquotes left in our list *) + assert (s' = []); + assert (nullptr bqlist); + assert (stack = []); + str +and show_arg (s : char list) (bqlist : nodelist structure ptr) stack = + (* we have to look at the string and interpret control characters... *) + match s,stack with + | [],[] -> "",[],bqlist,[] + | [],`CTLVar::stack' -> failwith "End of string before CTLENDVAR" + | [],`CTLAri::stack' -> failwith "End of string before CTLENDARI" + | [],`CTLQuo::stack' -> failwith "End of string before CTLQUOTEMARK" + (* CTLESC *) + | '\129'::c::s',_ -> + let str,s'',bqlist',stack' = show_arg s' bqlist stack in + let c' = match c with + | '\'' -> "\\'" + | '\"' -> "\\\"" + | _ -> String.make 1 c + in + c' ^ str,s'',bqlist',stack' + (* CTLVAR *) + | '\130'::t::s',_ -> + let v,s'',bqlist',stack' = show_var (int_of_char t) s' bqlist stack in + assert (stack = stack'); + let str,s''',bqlist'',stack'' = show_arg s'' bqlist' stack' in + "${" ^ v ^ "}" ^ str, s''', bqlist'', stack'' + (* CTLENDVAR *) + | '\131'::s',`CTLVar::stack' -> "",[],bqlist,stack' (* s' gets handled by CTLVAR *) + | '\131'::s',`CTLAri::stack' -> failwith "Saw CTLENDVAR before CTLENDARI" + | '\131'::s',`CTLQuo::stack' -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" + | '\131'::s',[] -> failwith "Saw CTLENDVAR outside of CTLVAR" + (* CTLBACKQ *) + | '\132'::s',_ -> + if nullptr bqlist + then failwith "Saw CTLBACKQ but bqlist was null" + else + let n = bqlist @-> nodelist_n in + (* MMG: !!! dash has a bug in its sharg function... it doesn't advance the list! *) + let bqlist' = bqlist @-> nodelist_next in + let str,s'',bqlist'',stack' = show_arg s' bqlist' stack in + "$(" ^ show n ^ ")" ^ str,s'',bqlist'',stack' + (* CTLARI *) + | '\134'::s',_ -> + let ari,s'',bqlist',stack' = show_arg s' bqlist (`CTLAri::stack) in + assert (stack = stack'); + let str,s''',bqlist'',stack'' = show_arg s'' bqlist' stack' in + "$((" ^ ari ^ "))" ^ str, s''', bqlist'', stack'' + (* CTLENDARI *) + | '\135'::s',`CTLAri::stack' -> "",s',bqlist,stack' + | '\135'::s',`CTLVar::stack' -> failwith "Saw CTLENDARI before CTLENDVAR" + | '\135'::s',`CTLQuo::stack' -> failwith "Saw CTLENDARI before CTLQUOTEMARK" + | '\135'::s',[] -> failwith "Saw CTLENDARI outside of CTLARI" + (* CTLQUOTEMARK *) + | '\136'::s',[`CTLQuo] -> "",s',bqlist,[] + | '\136'::s',_ -> + let quoted,s'',bqlist',stack' = show_arg s' bqlist [`CTLQuo] in + assert (stack' = []); + let str,s''',bqlist'',stack'' = show_arg s'' bqlist' stack in + "\"" ^ quoted ^ "\"" ^ str, s''', bqlist'', stack'' + (* ordinary character *) + | c::s',_ -> + let str,s',bqlist',stack' = show_arg s' bqlist stack in + let c' = match c with + | '\'' -> "\\'" + | '\"' -> "\\\"" + | _ -> String.make 1 c + in + c' ^ str,s',bqlist',stack' +and show_var (t : int) (s : char list) (bqlist : nodelist structure ptr) stack = + let var_name,s' = split_at (fun c -> c = '=') s in + (* mask out VSNUL, check VSTYPE *) + match t land 0x0f, s' with + (* VSNORMAL and VSLENGTH get special treatment + + neither ever gets VSNUL + VSNORMAL is terminated just with the =, without a CTLENDVAR *) + (* VSNORMAL *) + | 0x1,'='::s'' -> implode var_name, s'', bqlist, stack + (* VSLENGTH *) + | 0xa,'='::'\131'::s'' -> implode (['#'] @ var_name), s'', bqlist, stack + | 0x1,c::_ | 0xa,c::_ -> failwith ("Missing CTLENDVAR for VSNORMAL/VSLENGTH, found " ^ Char.escaped c) + (* every other VSTYPE takes mods before CTLENDVAR *) + | vstype,'='::s' -> + (* check VSNUL *) + let vsnul = if t land 0x10 = 1 then [] else [':'] in + let mods,s'',bqlist',stack' = show_arg s' bqlist (`CTLVar::stack) in + implode (var_name @ vsnul @ string_of_vs vstype) ^ mods, s'', bqlist', stack' + | _,c::s' -> failwith ("Expected '=' terminating variable name, found " ^ Char.escaped c) + | _,[] -> failwith "Expected '=' terminating variable name, found EOF" + diff --git a/ocaml/dash.mli b/ocaml/dash.mli new file mode 100644 index 0000000..a7bf212 --- /dev/null +++ b/ocaml/dash.mli @@ -0,0 +1,233 @@ +(* dash internals + + call initialize before doing anything! +*) + +val initialize : unit -> unit + +(* stackmark discipline: + + (init_stack parse_next [process AST] pop_stack[deallocates dash AST])* + + see libdash/test/test.ml for an example usage in parse_all +*) +type stackmark_t +val init_stack : unit -> stackmark_t Ctypes.structure +val pop_stack : stackmark_t Ctypes.structure -> unit + +val alloc_stack_string : string -> (char Ctypes.ptr) +val free_stack_string : (char Ctypes.ptr) -> unit + +val popfile : unit -> unit +val setinputstring : (char Ctypes.ptr) -> unit +val setinputtostdin : unit -> unit +val setinputfile : ?push:bool -> string -> unit + +val setvar : string -> string -> unit +val setalias : string -> string -> unit +val unalias : string -> unit + +(* returns -1 when fd was closed; -2 on other errors *) +val freshfd_ge10 : int -> int + +(* Ctypes mappings of the node types *) +type node +val node : node Ctypes.union Ctypes.typ +val node_type : (int, node Ctypes.union) Ctypes.field + +type nodelist +val nodelist_next : + (nodelist Ctypes.structure Ctypes_static.ptr, nodelist Ctypes.structure) + Ctypes.field +val nodelist_n : + (node Ctypes.union Ctypes_static.ptr, nodelist Ctypes.structure) + Ctypes.field + +type ncmd +val ncmd : ncmd Ctypes.structure Ctypes.typ +val ncmd_type : (int, ncmd Ctypes.structure) Ctypes.field +val ncmd_linno : (int, ncmd Ctypes.structure) Ctypes.field +val ncmd_assign : + (node Ctypes.union Ctypes_static.ptr, ncmd Ctypes.structure) Ctypes.field +val ncmd_args : + (node Ctypes.union Ctypes_static.ptr, ncmd Ctypes.structure) Ctypes.field +val ncmd_redirect : + (node Ctypes.union Ctypes_static.ptr, ncmd Ctypes.structure) Ctypes.field +val node_ncmd : (ncmd Ctypes.structure, node Ctypes.union) Ctypes.field + +type npipe +val npipe : npipe Ctypes.structure Ctypes.typ +val npipe_type : (int, npipe Ctypes.structure) Ctypes.field +val npipe_backgnd : (int, npipe Ctypes.structure) Ctypes.field +val npipe_cmdlist : + (nodelist Ctypes.structure Ctypes_static.ptr, npipe Ctypes.structure) + Ctypes.field +val node_npipe : (npipe Ctypes.structure, node Ctypes.union) Ctypes.field + +type nredir +val nredir : nredir Ctypes.structure Ctypes.typ +val nredir_type : (int, nredir Ctypes.structure) Ctypes.field +val nredir_linno : (int, nredir Ctypes.structure) Ctypes.field +val nredir_n : + (node Ctypes.union Ctypes_static.ptr, nredir Ctypes.structure) Ctypes.field +val nredir_redirect : + (node Ctypes.union Ctypes_static.ptr, nredir Ctypes.structure) Ctypes.field +val node_nredir : (nredir Ctypes.structure, node Ctypes.union) Ctypes.field + +type nbinary +val nbinary : nbinary Ctypes.structure Ctypes.typ +val nbinary_type : (int, nbinary Ctypes.structure) Ctypes.field +val nbinary_ch1 : + (node Ctypes.union Ctypes_static.ptr, nbinary Ctypes.structure) + Ctypes.field +val nbinary_ch2 : + (node Ctypes.union Ctypes_static.ptr, nbinary Ctypes.structure) + Ctypes.field +val node_nbinary : (nbinary Ctypes.structure, node Ctypes.union) Ctypes.field + +type nif +val nif : nif Ctypes.structure Ctypes.typ +val nif_type : (int, nif Ctypes.structure) Ctypes.field +val nif_test : + (node Ctypes.union Ctypes_static.ptr, nif Ctypes.structure) Ctypes.field +val nif_ifpart : + (node Ctypes.union Ctypes_static.ptr, nif Ctypes.structure) Ctypes.field +val nif_elsepart : + (node Ctypes.union Ctypes_static.ptr, nif Ctypes.structure) Ctypes.field +val node_nif : (nif Ctypes.structure, node Ctypes.union) Ctypes.field + +type nfor +val nfor : nfor Ctypes.structure Ctypes.typ +val nfor_type : (int, nfor Ctypes.structure) Ctypes.field +val nfor_linno : (int, nfor Ctypes.structure) Ctypes.field +val nfor_args : + (node Ctypes.union Ctypes_static.ptr, nfor Ctypes.structure) Ctypes.field +val nfor_body : + (node Ctypes.union Ctypes_static.ptr, nfor Ctypes.structure) Ctypes.field +val nfor_var : (string, nfor Ctypes.structure) Ctypes.field +val node_nfor : (nfor Ctypes.structure, node Ctypes.union) Ctypes.field + +type ncase +val ncase : ncase Ctypes.structure Ctypes.typ +val ncase_type : (int, ncase Ctypes.structure) Ctypes.field +val ncase_linno : (int, ncase Ctypes.structure) Ctypes.field +val ncase_expr : + (node Ctypes.union Ctypes_static.ptr, ncase Ctypes.structure) Ctypes.field +val ncase_cases : + (node Ctypes.union Ctypes_static.ptr, ncase Ctypes.structure) Ctypes.field +val node_ncase : (ncase Ctypes.structure, node Ctypes.union) Ctypes.field + +type nclist +val nclist : nclist Ctypes.structure Ctypes.typ +val nclist_type : (int, nclist Ctypes.structure) Ctypes.field +val nclist_next : + (node Ctypes.union Ctypes_static.ptr, nclist Ctypes.structure) Ctypes.field +val nclist_pattern : + (node Ctypes.union Ctypes_static.ptr, nclist Ctypes.structure) Ctypes.field +val nclist_body : + (node Ctypes.union Ctypes_static.ptr, nclist Ctypes.structure) Ctypes.field +val node_nclist : (nclist Ctypes.structure, node Ctypes.union) Ctypes.field + +type ndefun +val ndefun : ndefun Ctypes.structure Ctypes.typ +val ndefun_type : (int, ndefun Ctypes.structure) Ctypes.field +val ndefun_linno : (int, ndefun Ctypes.structure) Ctypes.field +val ndefun_text : (string, ndefun Ctypes.structure) Ctypes.field +val ndefun_body : + (node Ctypes.union Ctypes_static.ptr, ndefun Ctypes.structure) Ctypes.field +val node_ndefun : (ndefun Ctypes.structure, node Ctypes.union) Ctypes.field + +type narg +val narg : narg Ctypes.structure Ctypes.typ +val narg_type : (int, narg Ctypes.structure) Ctypes.field +val narg_next : + (node Ctypes.union Ctypes_static.ptr, narg Ctypes.structure) Ctypes.field +val narg_text : (string, narg Ctypes.structure) Ctypes.field +val narg_backquote : + (nodelist Ctypes.structure Ctypes_static.ptr, narg Ctypes.structure) + Ctypes.field +val node_narg : (narg Ctypes.structure, node Ctypes.union) Ctypes.field + +type nfile +val nfile : nfile Ctypes.structure Ctypes.typ +val nfile_type : (int, nfile Ctypes.structure) Ctypes.field +val nfile_next : + (node Ctypes.union Ctypes_static.ptr, nfile Ctypes.structure) Ctypes.field +val nfile_fd : (int, nfile Ctypes.structure) Ctypes.field +val nfile_fname : + (node Ctypes.union Ctypes_static.ptr, nfile Ctypes.structure) Ctypes.field +val nfile_expfname : (string, nfile Ctypes.structure) Ctypes.field +val node_nfile : (nfile Ctypes.structure, node Ctypes.union) Ctypes.field + +type ndup +val ndup : ndup Ctypes.structure Ctypes.typ +val ndup_type : (int, ndup Ctypes.structure) Ctypes.field +val ndup_next : + (node Ctypes.union Ctypes_static.ptr, ndup Ctypes.structure) Ctypes.field +val ndup_fd : (int, ndup Ctypes.structure) Ctypes.field +val ndup_dupfd : (int, ndup Ctypes.structure) Ctypes.field +val ndup_vname : + (node Ctypes.union Ctypes_static.ptr, ndup Ctypes.structure) Ctypes.field +val node_ndup : (ndup Ctypes.structure, node Ctypes.union) Ctypes.field + +type nhere +val nhere : nhere Ctypes.structure Ctypes.typ +val nhere_type : (int, nhere Ctypes.structure) Ctypes.field +val nhere_next : + (node Ctypes.union Ctypes_static.ptr, nhere Ctypes.structure) Ctypes.field +val nhere_fd : (int, nhere Ctypes.structure) Ctypes.field +val nhere_doc : + (node Ctypes.union Ctypes_static.ptr, nhere Ctypes.structure) Ctypes.field +val node_nhere : (nhere Ctypes.structure, node Ctypes.union) Ctypes.field + +type nnot +val nnot : nnot Ctypes.structure Ctypes.typ +val nnot_type : (int, nnot Ctypes.structure) Ctypes.field +val nnot_com : + (node Ctypes.union Ctypes_static.ptr, nnot Ctypes.structure) Ctypes.field +val node_nnot : (nnot Ctypes.structure, node Ctypes.union) Ctypes.field + +val ( @-> ) : + ('b, 'c) Ctypes.structured Ctypes.ptr -> + ('a, ('b, 'c) Ctypes.structured) Ctypes.field -> 'a +val arglist : narg Ctypes.structure -> narg Ctypes.structure list +val nodelist : + nodelist Ctypes.structure Ctypes.ptr -> node Ctypes.union Ctypes.ptr list +val redirlist : + node Ctypes.union Ctypes.ptr -> + [> `Dup of int * string * ndup Ctypes.structure + | `File of int * string * nfile Ctypes.structure + | `Here of int * string * bool * nhere Ctypes.structure ] + list +val caselist : + node Ctypes.union Ctypes.ptr -> + (node Ctypes.union Ctypes_static.ptr * node Ctypes.union Ctypes_static.ptr) + list + +(* useful functions for working with the Ctypes AST *) +val addrof : 'a Ctypes.ptr -> nativeint +val eqptr : 'a Ctypes.ptr -> 'b Ctypes.ptr -> bool +val nullptr : 'a Ctypes.ptr -> bool + +(* useful functions for pretty printing *) +val explode : string -> char list +val implode : char list -> string +val intercalate : string -> string list -> string +val lines : string -> string list +val split_at : ('a -> bool) -> 'a list -> 'a list * 'a list + +(* shell-specific functions for pretty printing *) +val braces : string -> string +val parens : string -> string +val fresh_marker : string list -> string -> string + +(* parser *) +type parse_result = + Done + | Error + | Null + | Parsed of node Ctypes.union Ctypes.ptr +val parse_next : ?interactive:bool -> unit -> parse_result + +(* native pretty printer *) +val show : node Ctypes.union Ctypes.ptr -> string diff --git a/ocaml/dune b/ocaml/dune new file mode 100644 index 0000000..a10e513 --- /dev/null +++ b/ocaml/dune @@ -0,0 +1,65 @@ +(executables + (names shell_to_json json_to_shell) + (public_names shell_to_json json_to_shell) + (modules shell_to_json json_to_shell ast_json) + (modes (native exe)) + (libraries libdash yojson atdgen-runtime)) + +(rule (copy ../dlldash.so dlldash_native.so)) +(rule (copy ../libdash.a libdash_native.a)) + +(library + (name libdash) + (public_name libdash) + (modes native) + (modules (:standard \ json_to_shell shell_to_json ast_json)) + (libraries ctypes ctypes.foreign) + (foreign_archives dash_native) + (ctypes + (external_library_name dash) + (build_flags_resolver vendored) + (deps (glob_files ../src/*.h) ../src/builtins.h ../src/nodes.h ../src/syntax.h ../src/token.h ../src/token_vars.h) + (headers (preamble + "\ + \n#include \"../src/shell.h\"\ + \n#include \"../src/memalloc.h\"\ + \n#include \"../src/mystring.h\"\ + \n#include \"../src/init.h\"\ + \n#include \"../src/main.h\"\ + \n#include \"../src/input.h\"\ + \n#include \"../src/var.h\"\ + \n#include \"../src/alias.h\"\ + \n#include \"../src/redir.h\"\ + \n#include \"../src/parser.h\"\ + \n#include \"../src/nodes.h\"\ + \n")) + (type_description + (instance Types) + (functor Type_description)) + (function_description + (instance Functions) + (functor Function_description)) + (generated_types Types_generated) + (generated_entry_point Cdash))) + +(rule + (targets ast_json.mli ast_json.ml) + (deps ast_atd.atd) + (action + (progn + (run atdgen -j -j-std ast_atd.atd) + (run sed -i -e "/type char = Libdash.Ast.char/d" ast_atd_j.ml) + (run sed -i -e "/type char = Libdash.Ast.char/d" ast_atd_j.mli) + (run mv ast_atd_j.ml ast_json.ml) + (run mv ast_atd_j.mli ast_json.mli)))) + +(rule + (alias runtest) + (deps (glob_files ../test/tests/*) (glob_files ../test/pash_tests/*) + ../test/round_trip.sh rt.sh %{bin:json_to_shell} %{bin:shell_to_json}) + (action + (setenv + JSON_TO_SHELL %{bin:json_to_shell} + (setenv + SHELL_TO_JSON %{bin:shell_to_json} + (bash "{ find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.sh \"$f\"; done | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c | grep ':' ; } && echo FAILED && exit 1 || { echo OK; exit 0; }"))))) diff --git a/ocaml/function_description.ml b/ocaml/function_description.ml new file mode 100644 index 0000000..cf65d95 --- /dev/null +++ b/ocaml/function_description.ml @@ -0,0 +1,36 @@ +open Ctypes + +module Types = Types_generated +open Types + +module Functions (F : Ctypes.FOREIGN) = struct + open F + + let setstackmark = foreign "setstackmark" (ptr stackmark @-> returning void) + let popstackmark = foreign "popstackmark" (ptr stackmark @-> returning void) + + let alloc_stack_string = foreign "sstrdup" (string @-> returning (ptr char)) + let free_stack_string = foreign "stunalloc" (ptr char @-> returning void) + + let dash_init = foreign "init" (void @-> returning void) + let initialize_dash_errno = foreign "initialize_dash_errno" (void @-> returning void) + + let popfile = foreign "popfile" (void @-> returning void) + let setinputstring = foreign "setinputstring" (ptr char @-> returning void) + let setinputfd = foreign "setinputfd" (int @-> int @-> returning void) + let raw_setinputfile = foreign "setinputfile" (string @-> int @-> returning int) + + let raw_setvar = foreign "setvar" (string @-> string @-> int @-> returning (ptr void)) + + let setalias = foreign "setalias" (string @-> string @-> returning void) + let unalias = foreign "unalias" (string @-> returning void) + + (* Unix/ExtUnix don't let you renumber things the way you want *) + let freshfd_ge10 = foreign "freshfd_ge10" (int @-> returning int) + + let parsecmd_safe = foreign "parsecmd_safe" (int @-> returning (ptr node)) + let neof = foreign_value "tokpushback" node + let nerr = foreign_value "lasttoken" node +end + + diff --git a/ocaml/json_to_shell.ml b/ocaml/json_to_shell.ml new file mode 100644 index 0000000..2474e8c --- /dev/null +++ b/ocaml/json_to_shell.ml @@ -0,0 +1,39 @@ +(* This is straight-up copied from the libdash tests *) +open Libdash + +let verbose = ref false +let input_src : string option ref = ref None + +let parse_args () = + Arg.parse + [("-v",Arg.Set verbose,"verbose mode")] + (function | "-" -> input_src := None | f -> input_src := Some f) + "Final argument should be either a filename or empty (for STDIN); only the last such argument is used" + +let read_channel chan = +let lines = ref [] in +try + while true; do + lines := input_line chan :: !lines + done; !lines +with End_of_file -> + close_in chan; + List.rev !lines + +let read_lines () = + match !input_src with + | None -> read_channel stdin + | Some filename -> read_channel (open_in filename) + +let parse_lines () : Ast.t list = + let lines = read_lines () in + List.map (fun line -> Ast_json.t_of_string line) lines + + +let main () = + parse_args (); + let cs = parse_lines () in + List.map (fun c -> print_endline (Ast.to_string c)) cs +;; + +main () diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh new file mode 100755 index 0000000..22d1d18 --- /dev/null +++ b/ocaml/mk_meta.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -e + +LIB="$1" +: ${LIB:=$(opam var lib)/libdash} + +cat >META </dev/null 2>&1 +then + SHELL_TO_JSON=$(dirname $0)/$SHELL_TO_JSON +fi + +: ${JSON_TO_SHELL=json_to_shell} +if ! type json_to_shell >/dev/null 2>&1 +then + JSON_TO_SHELL=$(dirname $0)/json_to_shell +fi + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" >&2 + exit 1 +fi + +testFile="$1" + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" >&2 + exit 1 +fi + +json=$(mktemp) + +"$SHELL_TO_JSON" "$testFile" >"$json" +if [ $? -ne 0 ] +then + echo "OCAML_PARSE_ABORT: '$testFile'" >&2 + exit 1 +fi + +rt=$(mktemp) + +"$JSON_TO_SHELL" "$json" >"$rt" +if [ $? -ne 0 ] +then + echo "OCAML_UNPARSE_ABORT: '$testFile' -> '$json'" >&2 + exit 1 +fi + +cat "$rt" diff --git a/ocaml/shell_to_json.ml b/ocaml/shell_to_json.ml new file mode 100644 index 0000000..29f32ac --- /dev/null +++ b/ocaml/shell_to_json.ml @@ -0,0 +1,45 @@ +(* This is straight-up copied from the libdash tests *) + +open Libdash + +let verbose = ref false +let input_src : string option ref = ref None + +let set_input_src () = + match !input_src with + | None -> Dash.setinputtostdin () + | Some f -> Dash.setinputfile f + +let parse_args () = + Arg.parse + [("-v",Arg.Set verbose,"verbose mode")] + (function | "-" -> input_src := None | f -> input_src := Some f) + "Final argument should be either a filename or - (for STDIN); only the last such argument is used" + +exception Parse_error + +let rec parse_all () : Ast.t list = + let stackmark = Dash.init_stack () in + match Dash.parse_next ~interactive:false () with + | Dash.Done -> Dash.pop_stack stackmark; [] + | Dash.Error -> Dash.pop_stack stackmark; raise Parse_error + | Dash.Null -> Dash.pop_stack stackmark; parse_all () + | Dash.Parsed n -> + (* translate to our AST *) + let c = Ast.of_node n in + (* deallocate *) + Dash.pop_stack stackmark; + (* keep calm and carry on *) + c::parse_all () + +let print_ast c = print_endline (Ast_json.string_of_t c) + +let main () = + Dash.initialize (); + parse_args (); + set_input_src (); + let cs = parse_all () in + List.map print_ast cs +;; + +main () diff --git a/ocaml/type_description.ml b/ocaml/type_description.ml new file mode 100644 index 0000000..ef6a134 --- /dev/null +++ b/ocaml/type_description.ml @@ -0,0 +1,191 @@ +open Ctypes + +module Types (F : Ctypes.TYPE) = struct + open F + + (* stackmarks [used for string allocation in dash] *) + module Stackmark = struct + + type stackmark + type t = stackmark Ctypes.structure + + let t : stackmark structure typ = structure "stackmark" + let stackp = field t "stackp" (ptr void) + let nxt = field t "stacknxt" string + let size = field t "stacknleft" F.size_t + let () = seal t + end + + type stackmark = Stackmark.t + let stackmark = Stackmark.t + + (* AST nodes *) + + (* define the node type... *) + type node + let node : node union typ = union "node" + let node_type = field node "type" int + (* ...but don't seal it yet! *) + + type nodelist + let nodelist : nodelist structure typ = structure "nodelist" + let nodelist_next = field nodelist "next" (ptr nodelist) + let nodelist_n = field nodelist "n" (ptr node) + let () = seal nodelist + + type ncmd + + let ncmd : ncmd structure typ = structure "ncmd" + let ncmd_type = field ncmd "type" int + let ncmd_linno = field ncmd "linno" int + let ncmd_assign = field ncmd "assign" (ptr node) + let ncmd_args = field ncmd "args" (ptr node) + let ncmd_redirect = field ncmd "redirect" (ptr node) + let () = seal ncmd + + let node_ncmd = field node "ncmd" ncmd + + type npipe + + let npipe : npipe structure typ = structure "npipe" + let npipe_type = field npipe "type" int + let npipe_backgnd = field npipe "backgnd" int + let npipe_cmdlist = field npipe "cmdlist" (ptr nodelist) + let () = seal npipe + + let node_npipe = field node "npipe" npipe + + type nredir + + let nredir : nredir structure typ = structure "nredir" + let nredir_type = field nredir "type" int + let nredir_linno = field nredir "linno" int + let nredir_n = field nredir "n" (ptr node) + let nredir_redirect = field nredir "redirect" (ptr node) + let () = seal nredir + + let node_nredir = field node "nredir" nredir + + type nbinary + + let nbinary : nbinary structure typ = structure "nbinary" + let nbinary_type = field nbinary "type" int + let nbinary_ch1 = field nbinary "ch1" (ptr node) + let nbinary_ch2 = field nbinary "ch2" (ptr node) + let () = seal nbinary + + let node_nbinary = field node "nbinary" nbinary + + type nif + + let nif : nif structure typ = structure "nif" + let nif_type = field nif "type" int + let nif_test = field nif "test" (ptr node) + let nif_ifpart = field nif "ifpart" (ptr node) + let nif_elsepart = field nif "elsepart" (ptr node) + let () = seal nif + + let node_nif = field node "nif" nif + + type nfor + + let nfor : nfor structure typ = structure "nfor" + let nfor_type = field nfor "type" int + let nfor_linno = field nfor "linno" int + let nfor_args = field nfor "args" (ptr node) + let nfor_body = field nfor "body" (ptr node) + let nfor_var = field nfor "var" string + let () = seal nfor + + let node_nfor = field node "nfor" nfor + + type ncase + + let ncase : ncase structure typ = structure "ncase" + let ncase_type = field ncase "type" int + let ncase_linno = field ncase "linno" int + let ncase_expr = field ncase "expr" (ptr node) + let ncase_cases = field ncase "cases" (ptr node) + let () = seal ncase + + let node_ncase = field node "ncase" ncase + + type nclist + + let nclist : nclist structure typ = structure "nclist" + let nclist_type = field nclist "type" int + let nclist_next = field nclist "next" (ptr node) + let nclist_pattern = field nclist "pattern" (ptr node) + let nclist_body = field nclist "body" (ptr node) + let () = seal nclist + + let node_nclist = field node "nclist" nclist + + type ndefun + + let ndefun : ndefun structure typ = structure "ndefun" + let ndefun_type = field ndefun "type" int + let ndefun_linno = field ndefun "linno" int + let ndefun_text = field ndefun "text" string + let ndefun_body = field ndefun "body" (ptr node) + let () = seal ndefun + + let node_ndefun = field node "ndefun" ndefun + + type narg + + let narg : narg structure typ = structure "narg" + let narg_type = field narg "type" int + let narg_next = field narg "next" (ptr node) + let narg_text = field narg "text" string + let narg_backquote = field narg "backquote" (ptr nodelist) + let () = seal narg + + let node_narg = field node "narg" narg + + type nfile + + let nfile : nfile structure typ = structure "nfile" + let nfile_type = field nfile "type" int + let nfile_next = field nfile "next" (ptr node) + let nfile_fd = field nfile "fd" int + let nfile_fname = field nfile "fname" (ptr node) + let nfile_expfname = field nfile "expfname" string + let () = seal nfile + + let node_nfile = field node "nfile" nfile + + type ndup + + let ndup : ndup structure typ = structure "ndup" + let ndup_type = field ndup "type" int + let ndup_next = field ndup "next" (ptr node) + let ndup_fd = field ndup "fd" int + let ndup_dupfd = field ndup "dupfd" int + let ndup_vname = field ndup "vname" (ptr node) + let () = seal ndup + + let node_ndup = field node "ndup" ndup + + type nhere + + let nhere : nhere structure typ = structure "nhere" + let nhere_type = field nhere "type" int + let nhere_next = field nhere "next" (ptr node) + let nhere_fd = field nhere "fd" int + let nhere_doc = field nhere "doc" (ptr node) + let () = seal nhere + + let node_nhere = field node "nhere" nhere + + type nnot + + let nnot : nnot structure typ = structure "nnot" + let nnot_type = field nnot "type" int + let nnot_com = field nnot "com" (ptr node) + let () = seal nnot + + let node_nnot = field node "nnot" nnot + let () = seal node + +end diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2561f05 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,25 @@ +[project] +name = "libdash" +version = "0.3.1" +authors = [ + { name="Michael Greenberg", email="michael@greenberg.science" }, + { name="PaSh contributors" }, +] +license = { file = "COPYING" } +description = "Bindings for the dash shell as a library" +readme = "README.md" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "Topic :: System :: System Shells", + "Operating System :: POSIX", +] + + +[project.urls] +"Homepage" = "https://github.com/binpash/libdash" +"Bug Tracker" = "https://github.com/binpash/libdash/issues" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..e5f78f1 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,4 @@ +*.o +*.so +*.dylib +python.log diff --git a/python/LICENSE b/python/LICENSE new file mode 100644 index 0000000..a78c7de --- /dev/null +++ b/python/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Michael Greenberg, Konstantinos Kallas, and Thurston Dang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/Makefile b/python/Makefile new file mode 100644 index 0000000..b446cce --- /dev/null +++ b/python/Makefile @@ -0,0 +1,9 @@ +.PHONY: test clean + +test: rt.py ../libdash/*.py + @find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.py "$$f"; done | tee python.log + @cat python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c + @grep ':' python.log && echo "FAILED" && exit 1 || exit 0 + +clean: + rm *.o *.so *.log diff --git a/python/rt.py b/python/rt.py new file mode 100755 index 0000000..4706df8 --- /dev/null +++ b/python/rt.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 + +import sys + +import libdash + +sys.setrecursionlimit (9001) + +def print_asts(new_asts): + for (ast, lines, linno_before, linno_after) in new_asts: + print(libdash.to_string(ast)) + +if (len(sys.argv) == 1): + new_asts = libdash.parse("-", True) +else: + new_asts = libdash.parse(sys.argv[1], True) + +print_asts(new_asts) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d248ae8 --- /dev/null +++ b/setup.py @@ -0,0 +1,59 @@ +from setuptools import setup +from setuptools.command.build_py import build_py + +import os +import platform +import shutil +import subprocess +import sys + +from pathlib import Path +long_description = (Path(__file__).parent / "README.md").read_text() + +def try_exec(*cmds): + proc = subprocess.run(cmds) + + if proc.returncode != 0: + print('`{}` failed'.format(' '.join(cmds)), file=sys.stderr) + proc.check_returncode() + +class libdash_build_py(build_py): + def run(self): + build_py.run(self) + + if sys.platform == 'darwin': + libtoolize = "glibtoolize" + + target_arch = os.environ.get("ARCHFLAGS") + host_arch = platform.machine() + if not target_arch: + target_arch = f"-arch {host_arch}" + os.environ["ARCHFLAGS"] = target_arch + if host_arch not in target_arch and "MACOSX_DEPLOYMENT_TARGET" not in os.environ: + os.environ["MACOSX_DEPLOYMENT_TARGET"] = "11.0" + + print(f'ARCHFLAGS: {target_arch} MACOSX_DEPLOYMENT_TARGET: {os.environ.get("MACOSX_DEPLOYMENT_TARGET", "")}') + else: + libtoolize = "libtoolize" + + try_exec('arch') + try_exec(libtoolize) + try_exec('aclocal') + try_exec('autoheader') + try_exec('automake', '--add-missing') + try_exec('autoconf') + try_exec('./configure') + try_exec('make') + + shutil.copy2('src/.libs/dlldash.so', os.path.join(self.build_lib, 'libdash/libdash.so')) + if sys.platform == 'darwin': + shutil.copy2('src/.libs/libdash.dylib', os.path.join(self.build_lib, 'libdash/libdash.dylib')) + +setup(name='libdash', + packages=['libdash'], + cmdclass={'build_py': libdash_build_py}, + version='0.3.1', + long_description=long_description, + long_description_content_type='text/markdown', + include_package_data=True, + has_ext_modules=lambda: True) diff --git a/src/.gitignore b/src/.gitignore index 644eccb..2f5860e 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -11,3 +11,6 @@ nodes.[ch] signames.c syntax.[ch] token.h +.libs +libdash.la +dlldash.la diff --git a/src/Makefile.am b/src/Makefile.am index 1732465..1871997 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,6 +16,29 @@ COMPILE_FOR_BUILD = \ bin_PROGRAMS = dash +dash_CFLAGS = -DMAIN + +lib_LIBRARIES = libdash.a +lib_LTLIBRARIES = libdash.la dlldash.la + +libdash_la_SOURCES = \ + alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ + histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ + mystring.c options.c parser.c redir.c show.c trap.c output.c \ + bltin/printf.c system.c bltin/test.c bltin/times.c var.c \ + builtins.c init.c nodes.c signames.c syntax.c +libdash_la_CFLAGS = $(AM_CFLAGS) +libdash_la_LDFLAGS = -shared -dynamic + +dlldash_la_SOURCES = \ + alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ + histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ + mystring.c options.c parser.c redir.c show.c trap.c output.c \ + bltin/printf.c system.c bltin/test.c bltin/times.c var.c \ + builtins.c init.c nodes.c signames.c syntax.c +dlldash_la_CFLAGS = $(AM_CFLAGS) +dlldash_la_LDFLAGS = -shared -dynamic -module + dash_CFILES = \ alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ @@ -30,6 +53,9 @@ dash_SOURCES = \ show.h system.h trap.h var.h dash_LDADD = builtins.o init.o nodes.o signames.o syntax.o +libdash_a_SOURCES = $(dash_CFILES) +libdash_a_LIBADD = $(dash_LDADD) + HELPERS = mkinit mksyntax mknodes mksignames BUILT_SOURCES = builtins.h nodes.h syntax.h token.h token_vars.h diff --git a/src/TOUR b/src/TOUR index 056e79b..e30836e 100644 --- a/src/TOUR +++ b/src/TOUR @@ -100,7 +100,7 @@ string was going to be: p = stackptr; *p++ = c; /* repeated as many times as needed */ stackptr = p; -The folloing three macros (defined in memalloc.h) perform these +The following three macros (defined in memalloc.h) perform these operations, but grow the stack if you run off the end: STARTSTACKSTR(p); STPUTC(c, p); /* repeated as many times as needed */ @@ -198,7 +198,7 @@ EXECUTION: Command execution is handled by the following files: eval.c The top level routines. redir.c Code to handle redirection of input and output. jobs.c Code to handle forking, waiting, and job control. - exec.c Code to to path searches and the actual exec sys call. + exec.c Code to path searches and the actual exec sys call. expand.c Code to evaluate arguments. var.c Maintains the variable symbol table. Called from expand.c. diff --git a/src/alias.c b/src/alias.c index daeacbb..b8c2704 100644 --- a/src/alias.c +++ b/src/alias.c @@ -41,39 +41,51 @@ #include "mystring.h" #include "alias.h" #include "options.h" /* XXX for argptr (should remove?) */ +#include "syntax.h" +#include "var.h" #define ATABSIZE 39 struct alias *atab[ATABSIZE]; +/* STATIC void setalias(const char *, const char *); +*/ // libdash STATIC struct alias *freealias(struct alias *); STATIC struct alias **__lookupalias(const char *); +/* STATIC +*/ // libdash void setalias(const char *name, const char *val) { struct alias *ap, **app; + const char *p = name; + size_t namelen; + + do { + if (BASESYNTAX[(signed char)*p] != CWORD) + sh_error("Invalid alias name: %s", name); + } while (*++p != '='); app = __lookupalias(name); ap = *app; INTOFF; if (ap) { - if (!(ap->flag & ALIASINUSE)) { - ckfree(ap->val); - } - ap->val = savestr(val); + if (!(ap->flag & ALIASINUSE)) + ckfree(ap->name); ap->flag &= ~ALIASDEAD; } else { /* not found */ ap = ckmalloc(sizeof (struct alias)); - ap->name = savestr(name); - ap->val = savestr(val); ap->flag = 0; ap->next = 0; *app = ap; } + namelen = val - name; + ap->name = savestr(name); + ap->val = ap->name + namelen; INTON; } @@ -143,15 +155,15 @@ aliascmd(int argc, char **argv) return (0); } while ((n = *++argv) != NULL) { - if ((v = strchr(n+1, '=')) == NULL) { /* n+1: funny ksh stuff */ + /* n + 1: funny ksh stuff (from 44lite) */ + if (!*n || !(v = strchr(n + 1, '='))) { if ((ap = *__lookupalias(n)) == NULL) { outfmt(out2, "%s: %s not found\n", "alias", n); ret = 1; } else printalias(ap); } else { - *v++ = '\0'; - setalias(n, v); + setalias(n, v + 1); } } @@ -179,7 +191,10 @@ unaliascmd(int argc, char **argv) return (i); } +/* STATIC struct alias * +*/ // libdash +struct alias * freealias(struct alias *ap) { struct alias *next; @@ -190,35 +205,22 @@ freealias(struct alias *ap) { next = ap->next; ckfree(ap->name); - ckfree(ap->val); ckfree(ap); return next; } -void -printalias(const struct alias *ap) { - out1fmt("%s=%s\n", ap->name, single_quote(ap->val)); +void __attribute__((noinline)) printalias(const struct alias *ap) { + out1fmt(snlfmt, single_quote(ap->name)); } STATIC struct alias ** __lookupalias(const char *name) { - unsigned int hashval; struct alias **app; - const char *p; - unsigned int ch; - p = name; - - ch = (unsigned char)*p; - hashval = ch << 4; - while (ch) { - hashval += ch; - ch = (unsigned char)*++p; - } - app = &atab[hashval % ATABSIZE]; + app = &atab[hashval(name) % ATABSIZE]; for (; *app; app = &(*app)->next) { - if (equal(name, (*app)->name)) { + if (varequal(name, (*app)->name)) { break; } } diff --git a/src/alias.h b/src/alias.h index fb841d6..3aec80d 100644 --- a/src/alias.h +++ b/src/alias.h @@ -45,6 +45,7 @@ struct alias { }; struct alias *lookupalias(const char *, int); +void setalias(const char *, const char *); // libdash int aliascmd(int, char **); int unaliascmd(int, char **); void rmaliases(void); diff --git a/src/arith_yacc.c b/src/arith_yacc.c index 1a087c3..b978ef0 100644 --- a/src/arith_yacc.c +++ b/src/arith_yacc.c @@ -98,8 +98,8 @@ static intmax_t do_binop(int op, intmax_t a, intmax_t b) default: case ARITH_REM: case ARITH_DIV: - if (!b) - yyerror("division by zero"); + if (!b || (a == INTMAX_MIN && b == -1)) + yyerror("division error"); return op == ARITH_REM ? a % b : a / b; case ARITH_MUL: return a * b; diff --git a/src/bltin/.gitignore b/src/bltin/.gitignore new file mode 100644 index 0000000..ec96903 --- /dev/null +++ b/src/bltin/.gitignore @@ -0,0 +1,2 @@ +.deps +.dirstamp diff --git a/src/bltin/echo.1 b/src/bltin/echo.1 index fbc7fb4..4d1890f 100644 --- a/src/bltin/echo.1 +++ b/src/bltin/echo.1 @@ -66,13 +66,15 @@ and may be given. .Pp If any of the following sequences of characters is encountered during -output, the sequence is not output. Instead, the specified action is +output, the sequence is not output. +Instead, the specified action is performed: .Bl -tag -width indent .It Li \eb A backspace character is output. .It Li \ec -Subsequent output is suppressed. This is normally used at the end of the +Subsequent output is suppressed. +This is normally used at the end of the last argument to suppress the trailing newline that .Nm would otherwise output. diff --git a/src/bltin/printf.1 b/src/bltin/printf.1 index 3873173..409d434 100644 --- a/src/bltin/printf.1 +++ b/src/bltin/printf.1 @@ -202,7 +202,7 @@ and formats, or the maximum number of characters to be printed from a string .Sm off -.Pf ( Cm b No , +.Pf ( Cm b Ns \&, .Sm on .Cm B and @@ -281,16 +281,16 @@ value is the 1\-, 2\-, or 3\-digit octal number .Ar num . .It Cm \e^ Ns Ar c -Write the control character +Write the control character .Ar c . Generates characters `\e000' through `\e037`, and `\e177' (from `\e^?'). .It Cm \eM\- Ns Ar c -Write the character +Write the character .Ar c with the 8th bit set. Generates characters `\e241' through `\e376`. .It Cm \eM^ Ns Ar c -Write the control character +Write the control character .Ar c with the 8th bit set. Generates characters `\e000' through `\e037`, and `\e177' (from `\eM^?'). @@ -330,7 +330,7 @@ exits 0 on success, 1 on failure. .Sh SEE ALSO .Xr echo 1 , .Xr printf 3 , -.Xr printf 9 +.Xr printf 9 , .Xr vis 3 .Sh STANDARDS The @@ -350,5 +350,6 @@ to floating-point and then back again, floating-point precision may be lost. .Pp Hexadecimal character constants are restricted to, and should be specified -as, two character constants. This is contrary to the ISO C standard but +as, two character constants. +This is contrary to the ISO C standard but does guarantee detection of the end of the constant. diff --git a/src/bltin/printf.c b/src/bltin/printf.c index 7785735..106aecd 100644 --- a/src/bltin/printf.c +++ b/src/bltin/printf.c @@ -29,8 +29,7 @@ * SUCH DAMAGE. */ -#include - +#include #include #include #include @@ -38,10 +37,10 @@ #include #include #include +#include #include static int conv_escape_str(char *, char **); -static char *conv_escape(char *, int *); static int getchr(void); static double getdouble(void); static uintmax_t getuintmax(int); @@ -56,6 +55,7 @@ static char **gargv; #define octtobin(c) ((c) - '0') #include "bltin.h" +#include "parser.h" #include "system.h" #define PF(f, func) { \ @@ -164,13 +164,17 @@ int printfcmd(int argc, char *argv[]) int *param; if (ch == '\\') { - int c_ch; - fmt = conv_escape(fmt, &c_ch); - ch = c_ch; - goto pc; + unsigned ret; + char *cp; + + STARTSTACKSTR(cp); + CHECKSTRSPACE(4, cp); + ret = conv_escape(fmt, cp, false); + fmt += ret >> 4; + out1mem(cp, ret & 15); + continue; } if (ch != '%' || (*fmt == '%' && (++fmt || 1))) { -pc: putchar(ch); continue; } @@ -275,82 +279,181 @@ int printfcmd(int argc, char *argv[]) static int conv_escape_str(char *str, char **sp) { - int c; - int ch; char *cp; + int c; /* convert string into a temporary buffer... */ STARTSTACKSTR(cp); do { - c = ch = *str++; - if (ch != '\\') - continue; + unsigned ret; + int ch; + + CHECKSTRSPACE(4, cp); c = *str++; - if (c == 'c') { - /* \c as in SYSV echo - abort all processing.... */ - c = ch = 0x100; + if (c != '\\') { +putchar: + USTPUTC(c, cp); continue; } + ch = *str; + if (ch == 'c') { + /* \c as in SYSV echo - abort all processing.... */ + c = 0x100; + goto putchar; + } + /* * %b string octal constants are not like those in C. * They start with a \0, and are followed by 0, 1, 2, * or 3 octal digits. */ - if (c == '0' && isodigit(*str)) + if (ch == '0' && isodigit(str[1])) str++; /* Finally test for sequences valid in the format string */ - str = conv_escape(str - 1, &c); - } while (STPUTC(c, cp), (char)ch); + ret = conv_escape(str, cp, false); + str += ret >> 4; + cp += ret & 15; + } while (c & 0xff); *sp = cp; - return ch; + return c; } /* * Print "standard" escape characters */ -static char * -conv_escape(char *str, int *conv_ch) +unsigned conv_escape(char *str0, char *out0, bool mbchar) { - int value; + char *out = out0; + char *str = str0; + unsigned value; int ch; ch = *str; + value = ch; switch (ch) { default: - if (!isodigit(*str)) { - value = '\\'; - goto out; + if (mbchar && (ch == '"' || ch == '\'')) + break; + + if (ch == 'U') { + ch = 8; + goto hex; } - ch = 3; + value = '\\'; + + if (isodigit(ch)) { + ch = 3; + value = 0; + do { + value <<= 3; + value += octtobin(*str++); + } while (--ch && isodigit(*str)); + } + + str--; + break; + + case 'x': + ch = 2; + +hex: value = 0; do { - value <<= 3; - value += octtobin(*str++); - } while (isodigit(*str) && --ch); - goto out; + int c = *++str; + int d; + + if (c >= '0' && c <= '9') + d = c - '0'; + else { + int cl; + + cl = c & ~0x20; + if (cl >= 'A' && cl <= 'F') + d = cl - 'A' + 10; + else { + str--; + break; + } + } + + value <<= 4; + value += d; + } while (--ch); + + if (value < 0x80) + break; + + if (value < 0x110000) { + int mboff = (mbchar - 1) * 2; + unsigned uni = value; + int len; + + value = 0x80 << 8 | (value & 0xfc0) << 2 | + 0x80 | (value & 0x3f); + + if (uni < 0x800) { + value |= 0x40 << 8; + len = 2; + } else { + value |= 0x80 << 16 | (uni & 0x3f000) << 4; + if (uni < 0x10000) { + value |= 0x60 << 16; + len = 3; + } else { + value |= 0xf0 << 24 | + (uni & ~0x3ffff) << 6; + len = 4; + } + } - case '\\': value = '\\'; break; /* backslash */ - case 'a': value = '\a'; break; /* alert */ - case 'b': value = '\b'; break; /* backspace */ - case 'f': value = '\f'; break; /* form-feed */ + value = htonl(value << (4 - len) * 8); + + USTPUTC(CTLMBCHAR, out); + USTPUTC(len, out); + STADJUST(mboff, out); + *(uint32_t *)out = value; + STADJUST(len, out); + USTPUTC(len, out); + USTPUTC(CTLMBCHAR, out); + STADJUST(mboff, out); + } + + goto out_noput; + + case 'u': + ch = 4; + goto hex; + + case '\\': + break; + + case 'a': /* alert */ + case 'b': /* backspace */ + case 'f': /* form-feed */ + value -= 'a'; + value += '\a'; + break; + + case 'e': value = '\033'; break; /* */ case 'n': value = '\n'; break; /* newline */ case 'r': value = '\r'; break; /* carriage-return */ case 't': value = '\t'; break; /* tab */ case 'v': value = '\v'; break; /* vertical-tab */ } + USTPUTC(value, out); + +out_noput: str++; -out: - *conv_ch = value; - return str; + return (out - out0) | (str - str0) << 4; } static char * diff --git a/src/bltin/test.1 b/src/bltin/test.1 index 42435fb..03abce8 100644 --- a/src/bltin/test.1 +++ b/src/bltin/test.1 @@ -43,7 +43,7 @@ .Nm test .Ar expression .Nm \&[ -.Ar expression Cm ] +.Ar expression Cm \&] .Sh DESCRIPTION The .Nm test diff --git a/src/bltin/test.c b/src/bltin/test.c index b7188df..ac479bd 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -8,16 +8,17 @@ * This program is in the Public Domain. */ -#include -#include - +#include "bltin.h" +#include "../exec.h" #include #include +#include +#include #include #include +#include +#include #include -#include -#include "bltin.h" /* test(1) accepts the following grammar: oexpr ::= aexpr | aexpr "-o" oexpr ; @@ -145,14 +146,9 @@ static int binop(void); static int filstat(char *, enum token); static enum token t_lex(char **); static int isoperand(char **); -static int newerf(const char *, const char *); -static int olderf(const char *, const char *); +static bool newerf(const char *, const char *); +static bool olderf(const char *, const char *); static int equalf(const char *, const char *); -#ifdef HAVE_FACCESSAT -static int test_file_access(const char *, int); -#else -static int test_access(const struct stat64 *, int); -#endif #ifdef HAVE_FACCESSAT # ifdef HAVE_TRADITIONAL_FACCESSAT @@ -254,6 +250,8 @@ oexpr(enum token n) for (;;) { res |= aexpr(n); + if (!*t_wp) + break; n = t_lex(t_wp + 1); if (n != BOR) break; @@ -270,6 +268,8 @@ aexpr(enum token n) for (;;) { if (!nexpr(n)) res = 0; + if (!*t_wp) + break; n = t_lex(t_wp + 1); if (n != BAND) break; @@ -361,9 +361,9 @@ binop(void) case STRNE: return strcmp(opnd1, opnd2) != 0; case STRLT: - return strcmp(opnd1, opnd2) < 0; + return strcoll(opnd1, opnd2) < 0; case STRGT: - return strcmp(opnd1, opnd2) > 0; + return strcoll(opnd1, opnd2) > 0; case INTEQ: return getn(opnd1) == getn(opnd2); case INTNE: @@ -422,8 +422,10 @@ filstat(char *nm, enum token mode) return (s.st_mode & S_ISUID) != 0; case FILSGID: return (s.st_mode & S_ISGID) != 0; +#ifdef S_ISVTX case FILSTCK: return (s.st_mode & S_ISVTX) != 0; +#endif case FILGZ: return !!s.st_size; case FILUID: @@ -470,49 +472,49 @@ static int isoperand(char **tp) return op && op->op_type == BINOP; } -static int -newerf (const char *f1, const char *f2) +static bool newerf(const char *f1, const char *f2) { - struct stat b1, b2; + struct stat64 b1, b2; + + if (stat64(f1, &b1) != 0) + return false; + if (stat64(f2, &b2) != 0) + return true; #ifdef HAVE_ST_MTIM - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && - ( b1.st_mtim.tv_sec > b2.st_mtim.tv_sec || - (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec > b2.st_mtim.tv_nsec ))) - ); + return b1.st_mtim.tv_sec > b2.st_mtim.tv_sec || + (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && + b1.st_mtim.tv_nsec > b2.st_mtim.tv_nsec); #else - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && - b1.st_mtime > b2.st_mtime); + return b1.st_mtime > b2.st_mtime; #endif } -static int -olderf (const char *f1, const char *f2) +static bool olderf(const char *f1, const char *f2) { - struct stat b1, b2; + struct stat64 b1, b2; + + if (stat64(f2, &b2) != 0) + return false; + if (stat64(f1, &b1) != 0) + return true; #ifdef HAVE_ST_MTIM - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && - (b1.st_mtim.tv_sec < b2.st_mtim.tv_sec || - (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec < b2.st_mtim.tv_nsec ))) - ); + return b1.st_mtim.tv_sec < b2.st_mtim.tv_sec || + (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && + b1.st_mtim.tv_nsec < b2.st_mtim.tv_nsec); #else - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && - b1.st_mtime < b2.st_mtime); + return b1.st_mtime < b2.st_mtime; #endif } static int equalf (const char *f1, const char *f2) { - struct stat b1, b2; + struct stat64 b1, b2; - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && + return (stat64(f1, &b1) == 0 && + stat64(f2, &b2) == 0 && b1.st_dev == b2.st_dev && b1.st_ino == b2.st_ino); } @@ -527,7 +529,7 @@ static int has_exec_bit_set(const char *path) return st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH); } -static int test_file_access(const char *path, int mode) +int test_file_access(const char *path, int mode) { if (faccessat_confused_about_superuser() && mode == X_OK && geteuid() == 0 && !has_exec_bit_set(path)) @@ -657,7 +659,7 @@ static int test_file_access(const char *path, int mode) * (euid==uid&&egid==gid), but uses st_mode for '-x' iff running as root. * i.e. it does strictly conform to 1003.1-2001 (and presumably 1003.2b). */ -static int test_access(const struct stat64 *sp, int stmode) +int test_access(const struct stat64 *sp, int stmode) { gid_t *groups; register int n; diff --git a/src/bltin/times.c b/src/bltin/times.c index 8eabc1f..252b084 100644 --- a/src/bltin/times.c +++ b/src/bltin/times.c @@ -12,19 +12,31 @@ #endif #include "system.h" -int timescmd() { +int timescmd(int argc, char *argv[]) { struct tms buf; long int clk_tck = sysconf(_SC_CLK_TCK); + int mutime, mstime, mcutime, mcstime; + double utime, stime, cutime, cstime; times(&buf); - printf("%dm%fs %dm%fs\n%dm%fs %dm%fs\n", - (int) (buf.tms_utime / clk_tck / 60), - ((double) buf.tms_utime) / clk_tck, - (int) (buf.tms_stime / clk_tck / 60), - ((double) buf.tms_stime) / clk_tck, - (int) (buf.tms_cutime / clk_tck / 60), - ((double) buf.tms_cutime) / clk_tck, - (int) (buf.tms_cstime / clk_tck / 60), - ((double) buf.tms_cstime) / clk_tck); + + utime = (double)buf.tms_utime / clk_tck; + mutime = utime / 60; + utime -= mutime * 60.0; + + stime = (double)buf.tms_stime / clk_tck; + mstime = stime / 60; + stime -= mstime * 60.0; + + cutime = (double)buf.tms_cutime / clk_tck; + mcutime = cutime / 60; + cutime -= mcutime * 60.0; + + cstime = (double)buf.tms_cstime / clk_tck; + mcstime = cstime / 60; + cstime -= mcstime * 60.0; + + printf("%dm%fs %dm%fs\n%dm%fs %dm%fs\n", mutime, utime, mstime, stime, + mcutime, cutime, mcstime, cstime); return 0; } diff --git a/src/cd.c b/src/cd.c index b6742af..bcd1484 100644 --- a/src/cd.c +++ b/src/cd.c @@ -96,7 +96,7 @@ cdcmd(int argc, char **argv) const char *path; const char *p; char c; - struct stat statb; + struct stat64 statb; int flags; int len; @@ -126,13 +126,13 @@ cdcmd(int argc, char **argv) } } if (!*dest) - dest = "."; + dest = dotdir; path = bltinlookup("CDPATH"); while (p = path, (len = padvance_magic(&path, dest, 0)) >= 0) { c = *p; p = stalloc(len); - if (stat(p, &statb) >= 0 && S_ISDIR(statb.st_mode)) { + if (stat64(p, &statb) >= 0 && S_ISDIR(statb.st_mode)) { if (c && c != ':') flags |= CD_PRINT; docd: diff --git a/src/dash.1 b/src/dash.1 index 32f6ac0..dbc34c9 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -33,8 +33,8 @@ .\" @(#)sh.1 8.6 (Berkeley) 5/4/95 .\" .Dd January 19, 2003 -.Os .Dt DASH 1 +.Os .Sh NAME .Nm dash .Nd command interpreter (shell) @@ -402,13 +402,11 @@ Append standard output (or n) to file. .It [n] Ns \*[Lt] file Redirect standard input (or n) from file. .It [n1] Ns \*[Lt]& Ns n2 -Copy file descriptor n2 as stdout (or fd n1). -fd n2. +Copy file descriptor n2 as stdin (or fd n1). .It [n] Ns \*[Lt]&- Close standard input (or n). .It [n1] Ns \*[Gt]& Ns n2 -Copy file descriptor n2 as stdin (or fd n1). -fd n2. +Copy file descriptor n2 as stdout (or fd n1). .It [n] Ns \*[Gt]&- Close standard output (or n). .It [n] Ns \*[Lt]\*[Gt] file @@ -439,7 +437,7 @@ instead of then leading tabs in the here-doc-text are stripped. .Ss Search and Execution There are three types of commands: shell functions, builtin commands, and -normal programs -- and the command is searched for (by name) in that order. +normal programs \(en and the command is searched for (by name) in that order. They each are executed in a different way. .Pp When a shell function is executed, all of the shell positional parameters @@ -553,13 +551,17 @@ by redirection operators that are part of the command. If the pipeline is not in the background (discussed later), the shell waits for all commands to complete. .Pp -If the reserved word ! does not precede the pipeline, the exit status is -the exit status of the last command specified in the pipeline. -Otherwise, the exit status is the logical NOT of the exit status of the -last command. -That is, if the last command returns zero, the exit status -is 1; if the last command returns greater than zero, the exit status is -zero. +If the +.Em pipefail +option was enabled when the shell began execution of the pipeline, the +pipeline's exit status is the exit status of the last command specified in +the pipeline that exited with non-zero status, or zero if all commands in +the pipeline exited with a status of zero. If the +.Em pipefail +option was not enabled, the pipeline's exit status is the exit status of +the last command specified in the pipeline; the exit statuses of any other +commands are not used. If the reserved word ! precedes the pipeline, its +exit status is the logical NOT of the exit status described above. .Pp Because pipeline assignment of standard input or standard output or both takes place before redirection, it can be modified by redirection. @@ -578,11 +580,11 @@ the preceding AND-OR-list. .Pp Note that unlike some other shells, each process in the pipeline is a child of the invoking shell (unless it is a shell builtin, in which case -it executes in the current shell -- but any effect it has on the +it executes in the current shell \(en but any effect it has on the environment is wiped). -.Ss Background Commands -- & +.Ss Background Commands \(en & If a command is terminated by the control operator ampersand (&), the -shell executes the command asynchronously -- that is, the shell does not +shell executes the command asynchronously \(en that is, the shell does not wait for the command to finish before executing the next command. .Pp The format for running a command in background is: @@ -592,7 +594,7 @@ The format for running a command in background is: If the shell is not interactive, the standard input of an asynchronous command is set to .Pa /dev/null . -.Ss Lists -- Generally Speaking +.Ss Lists \(en Generally Speaking A list is a sequence of zero or more commands separated by newlines, semicolons, or ampersands, and optionally terminated by one of these three characters. @@ -615,7 +617,7 @@ of the first command is nonzero. and .Dq || both have the same priority. -.Ss Flow-Control Constructs -- if, while, for, case +.Ss Flow-Control Constructs \(en if, while, for, case The syntax of the if command is .Bd -literal -offset indent if list @@ -694,7 +696,7 @@ Builtin commands grouped into a (list) will not affect the current shell. The second form does not fork another shell so is slightly more efficient. Grouping commands together this way allows you to redirect their output as though they were one program: -.Pp +.\".Pp .Bd -literal -offset indent { printf \*q hello \*q ; printf \*q world\\n" ; } \*[Gt] greeting .Ed @@ -1021,6 +1023,19 @@ The shell treats each character of the .Ev IFS as a delimiter and uses the delimiters to split the results of parameter expansion and command substitution into fields. +.Pp +If +.Ev IFS +is empty, field splitting yields no fields if the input string was empty, +and one string with the unchanged value of the input otherwise. +For example, with the default +.Ev IFS , +.Dq Ic read Fl r Ev l +will remove any initial whitespace, +but +.Dq Ev IFS Ns = Ic read Fl r Ev l +will leave the entire line in +.Ev l . .Ss Pathname Expansion (File Name Generation) Unless the .Fl f @@ -1095,6 +1110,8 @@ etc). .It : .It true A null command that returns a 0 (true) exit value. +.It false +A null command that returns a 1 (false) exit value. .It \&. file The commands in the specified file are read and executed by the shell. .It alias Op Ar name Ns Op Ar "=string ..." @@ -1143,8 +1160,8 @@ Do not execute the command but search for the command and print the absolute pathname of utilities, the name for builtins or the expansion of aliases. .El -.It cd Ar - -.It Xo cd Op Fl LP +.It cd|chdir Ar - +.It Xo cd|chdir Op Fl LP .Op Ar directory .Xc Switch to the specified directory (default @@ -1177,13 +1194,14 @@ mechanism was used or because the argument is a single dash. The .Fl P option causes the physical directory structure to be used, that is, all -symbolic links are resolved to their respective values. The +symbolic links are resolved to their respective values. +The .Fl L option turns off the effect of any preceding .Fl P options. .It Xo echo Op Fl n -.Ar args... +.Ar args... .Xc Print the arguments on the standard output, separated by spaces. Unless the @@ -1191,13 +1209,15 @@ Unless the option is present, a newline is output following the arguments. .Pp If any of the following sequences of characters is encountered during -output, the sequence is not output. Instead, the specified action is +output, the sequence is not output. +Instead, the specified action is performed: .Bl -tag -width indent .It Li \eb A backspace character is output. .It Li \ec -Subsequent output is suppressed. This is normally used at the end of the +Subsequent output is suppressed. +This is normally used at the end of the last argument to suppress the trailing newline that .Ic echo would otherwise output. @@ -1339,13 +1359,12 @@ The number of previous commands that are accessible. .El .It fg Op Ar job Move the specified job or the current job to the foreground. -.It getopts Ar optstring var +.It getopts Ar optstring var Op Ar arg ... The .Tn POSIX .Ic getopts command, not to be confused with the -.Em Bell Labs --derived +.Em Bell Labs Ns -derived .Xr getopt 1 . .Pp The first argument should be a series of letters, each of which may be @@ -1383,6 +1402,12 @@ then .Ev OPTARG will be unset. .Pp +By default, the variables +.Va $1 , ... , $n +are inspected; if +.Ar arg Ns s +are specified, they'll be parsed instead. +.Pp .Va optstring is a string of recognized option letters (see .Xr getopt 3 ) . @@ -1417,7 +1442,7 @@ and and the option .Op c , which requires an argument. -.Pp +.\".Pp .Bd -literal -offset indent while getopts abc: f do @@ -1427,18 +1452,19 @@ do \\?) echo $USAGE; exit 1;; esac done -shift `expr $OPTIND - 1` +shift $((OPTIND - 1)) .Ed .Pp This code will accept any of the following as equivalent: -.Pp +.\".Pp .Bd -literal -offset indent cmd \-acarg file file cmd \-a \-c arg file file cmd \-carg -a file file cmd \-a \-carg \-\- file file .Ed -.It hash Fl rv Ar command ... +.It hash Op Ar command ... +.It hash Fl r The shell maintains a hash table which remembers the locations of commands. With no arguments whatsoever, @@ -1454,13 +1480,51 @@ With arguments, the .Ic hash command removes the specified commands from the hash table (unless they are functions) and then locates them. -With the -.Fl v -option, hash prints the locations of the commands as it finds them. The .Fl r option causes the hash command to delete all the entries in the hash table except for functions. +.It jobs Oo Fl lp Oc Op Ar job ... +Display the status of all, or just the specified, +.Ar job Ns s : +.Bl -tag -compact -offset 5n -width "By default" +.It By default +display the job number, currency +.Pq Sy +- +status, if any, the job state, and its shell command. +.It Fl l +also output the PID of the group leader, and just the PID and shell commands +of other members of the job. +.It Fl p +Display only leader PIDs, one per line. +.El +.It kill Oo Fl s Ar sigspec | Fl Ns Ar signum | Fl Ns Ar sigspec Oc Op Ar pid | job ... +Equivalent to +.Xr kill 1 , +but a +.Ar job +spec may also be specified. +Signals can be either case-insensitive names without +.Dv SIG +prefixes or decimal numbers; the default is +.Dv TERM . +.It kill Fl l Op Ar signum | exitstatus +List available signal names without the +.Dv SIG +prefix +.Pq Ar sigspec Ns s . +If +.Ar signum +specified, display just the +.Ar sigspec +for that signal. +If +.Ar exitstatus +specified +.Pq > Sy 128 , +display just the +.Ar sigspec +that caused it. .It pwd Op Fl LP builtin command remembers what the current directory is rather than recomputing it each time. @@ -1471,7 +1535,8 @@ will continue to print the old name for the directory. The .Fl P option causes the physical value of the current working directory to be shown, -that is, all symbolic links are resolved to their respective values. The +that is, all symbolic links are resolved to their respective values. +The .Fl L option turns off the effect of any preceding .Fl P @@ -1522,43 +1587,38 @@ variables. With the .Fl p option specified the output will be formatted suitably for non-interactive use. -.Pp +.\".Pp .It Xo printf Ar format -.Op Ar arguments ... +.Oo Ar value Oc Ns ... .Xc .Ic printf -formats and prints its arguments, after the first, under control -of the -.Ar format . -The -.Ar format -is a character string which contains three types of objects: plain characters, +formats and prints its arguments according to +.Ar format , +a character string which contains three types of objects: plain characters, which are simply copied to standard output, character escape sequences which are converted and copied to the standard output, and format specifications, each of which causes printing of the next successive -.Ar argument . +.Ar value . .Pp -The -.Ar arguments -after the first are treated as strings if the corresponding format is +Each +.Ar value +is treated as a string if the corresponding format specification is either .Cm b , -.Cm c +.Cm c , or .Cm s ; -otherwise it is evaluated as a C constant, with the following extensions: -.Pp +otherwise it is evaluated as a C constant, with the following additions: .Bl -bullet -offset indent -compact .It A leading plus or minus sign is allowed. .It -If the leading character is a single or double quote, the value is the -.Tn ASCII -code of the next character. +If the leading character is a single or double quote, the value of the next byte. .El .Pp -The format string is reused as often as necessary to satisfy the -.Ar arguments . +The format string is reused as often as necessary until all +.Ar value Ns s +are consumed. Any extra format specifications are evaluated with zero or the null string. .Pp @@ -1780,11 +1840,26 @@ If options are given, it sets the specified option flags, or clears them as described in the section called .Sx Argument List Processing . As a special case, if the option is -o or +o and no argument is -supplied, the shell prints the settings of all its options. If the -option is -o, the settings are printed in a human-readable format; if -the option is +o, the settings are printed in a format suitable for +supplied, the shell prints the settings of all its options. +If the option is -o, +the settings are printed in a human-readable format; +if the option is +o, +the settings are printed in a format suitable for reinput to the shell to affect the same option settings. .Pp +In addition to the option names listed in the +.Sx Argument List Processing +section, the following options may be specified as arguments +to -o or +o: +.Bl -tag -width pipefail +.It Em pipefail +Derive the exit status of a pipeline from the exit statuses of all +of the commands in the pipeline, not just the last command, as +described in the +.Sx Pipelines +section. +.El +.Pp The third use of the set command is to set the values of the shell's positional parameters to the specified args. To change the positional @@ -1944,7 +2019,12 @@ and exist and .Ar file1 is newer than -.Ar file2 . +.Ar file2 , +or if +.Ar file1 +exists but +.Ar file2 +doesn't. .It Ar file1 Fl ot Ar file2 True if .Ar file1 @@ -1953,7 +2033,12 @@ and exist and .Ar file1 is older than -.Ar file2 . +.Ar file2 , +or if +.Ar file2 +exists but +.Ar file1 +doesn't. .It Ar file1 Fl ef Ar file2 True if .Ar file1 @@ -2058,7 +2143,8 @@ operator has higher precedence than the operator. .It times Print the accumulated user and system times for the shell and for processes -run from the shell. The return status is 0. +run from the shell. +The return status is 0. .It Xo trap .Op Ar action Ar signal ... .Xc @@ -2071,7 +2157,8 @@ is .Li 0 or .Li EXIT , -the action is executed when the shell exits. +the action is executed when the shell exits normally (that is not via an +unhandled signal like SIGINT). .Ar action may be empty .Li ( "''" ) , @@ -2114,7 +2201,7 @@ printed; for commands and tracked aliases the complete pathname of the command is printed. .It ulimit Xo .Op Fl H \*(Ba Fl S -.Op Fl a \*(Ba Fl tfdscmlpnv Op Ar value +.Op Fl a \*(Ba Fl tfdscmlpnvwr Op Ar value .Xc Inquire about or set the hard or soft limits on processes or set new limits. @@ -2167,19 +2254,26 @@ show or set the limit on the number files a process can have open at once .It Fl v show or set the limit on the total virtual memory that can be in use by a process (in kilobytes) +.It Fl w +show or set the limit on the total number of locks held by a process .It Fl r show or set the limit on the real-time scheduling priority of a process .El .Pp If none of these is specified, it is the limit on file size that is shown or set. -If value is specified, the limit is set to that number; otherwise -the current limit is displayed. +If +.Ar value +is specified, the limit is set to that number; otherwise +the current limit is displayed. The special +.Ar value +.Cm unlimited +represents the lack of any limit. .Pp Limits of an arbitrary process can be displayed or set using the .Xr sysctl 8 utility. -.Pp +.\".Pp .It umask Op Ar mask Set the value of umask (see .Xr umask 2 ) @@ -2308,11 +2402,13 @@ children of the shell, and is used in the history editing modes. .It Ev HISTSIZE The number of lines in the history buffer for the shell. .It Ev PWD -The logical value of the current working directory. This is set by the +The logical value of the current working directory. +This is set by the .Ic cd command. .It Ev OLDPWD -The previous logical value of the current working directory. This is set by +The previous logical value of the current working directory. +This is set by the .Ic cd command. @@ -2320,7 +2416,7 @@ command. The process ID of the parent process of the shell. .El .Sh FILES -.Bl -item -width HOMEprofilexxxx +.Bl -item .It .Pa $HOME/.profile .It @@ -2352,6 +2448,3 @@ in 2002. .Sh BUGS Setuid shell scripts should be avoided at all costs, as they are a significant security risk. -.Pp -PS1, PS2, and PS4 should be subject to parameter expansion before -being displayed. diff --git a/src/error.h b/src/error.h index 94e30a2..661a8a0 100644 --- a/src/error.h +++ b/src/error.h @@ -116,11 +116,7 @@ void __inton(void); #define int_pending() intpending void exraise(int) __attribute__((__noreturn__)); -#ifdef USE_NORETURN void onint(void) __attribute__((__noreturn__)); -#else -void onint(void); -#endif extern int errlinno; void sh_error(const char *, ...) __attribute__((__noreturn__)); void exerror(int, const char *, ...) __attribute__((__noreturn__)); diff --git a/src/eval.c b/src/eval.c index 1aad31a..0f2a7ba 100644 --- a/src/eval.c +++ b/src/eval.c @@ -41,6 +41,8 @@ * Evaluate a command. */ +#include "init.h" +#include "main.h" #include "shell.h" #include "nodes.h" #include "syntax.h" @@ -76,6 +78,10 @@ int exitstatus; /* exit status of last command */ int back_exitstatus; /* exit status of backquoted command */ int savestatus = -1; /* exit status of last command outside traps */ +/* Prevent PS4 nesting. */ +MKINIT int inps4; + +MKINIT int tpip[2] = { -1 }; #if !defined(__alpha__) || (defined(__GNUC__) && __GNUC__ >= 3) STATIC @@ -121,6 +127,12 @@ EXITRESET { } evalskip = 0; loopnest = 0; + inps4 = 0; + + if (tpip[0] >= 0) { + close(tpip[0]); + close(tpip[1]); + } } #endif @@ -207,6 +219,9 @@ evaltree(union node *n, int flags) setstackmark(&smark); + if (nflag) + goto out; + if (n == NULL) { TRACE(("evaltree(NULL) called\n")); goto out; @@ -229,32 +244,29 @@ evaltree(union node *n, int flags) break; #endif case NNOT: - status = !evaltree(n->nnot.com, EV_TESTED); - goto setstatus; + status = evaltree(n->nnot.com, EV_TESTED); + if (!evalskip) + status = !status; + break; case NREDIR: errlinno = lineno = n->nredir.linno; if (funcline) lineno -= funcline - 1; expredir(n->nredir.redirect); pushredir(n->nredir.redirect); - status = redirectsafe(n->nredir.redirect, REDIR_PUSH) ?: - evaltree(n->nredir.n, flags & EV_TESTED); + status = redirectsafe(n->nredir.redirect, REDIR_PUSH); + if (status) + checkexit = EV_TESTED; + else + status = evaltree(n->nredir.n, flags & EV_TESTED); if (n->nredir.redirect) popredir(0); - goto setstatus; + break; case NCMD: -#ifdef notyet - if (eflag && !(flags & EV_TESTED)) - checkexit = ~0; - status = evalcommand(n, flags, (struct backcmd *)NULL); - goto setstatus; -#else evalfn = evalcommand; checkexit: - if (eflag && !(flags & EV_TESTED)) - checkexit = ~0; + checkexit = EV_TESTED; goto calleval; -#endif case NFOR: evalfn = evalfor; goto calleval; @@ -291,7 +303,7 @@ evaltree(union node *n, int flags) evalfn = evaltree; calleval: status = evalfn(n, flags); - goto setstatus; + break; case NIF: status = evaltree(n->nif.test, EV_TESTED); if (evalskip) @@ -304,17 +316,18 @@ evaltree(union node *n, int flags) goto evaln; } status = 0; - goto setstatus; + break; case NDEFUN: defun(n); -setstatus: - exitstatus = status; break; } + + exitstatus = status; + out: dotrap(); - if (checkexit & status) + if (eflag && (~flags & checkexit) && status) goto exexit; if (flags & EV_EXIT) { @@ -444,7 +457,8 @@ evalcase(union node *n, int flags) lineno -= funcline - 1; arglist.lastp = &arglist.list; - expandarg(n->ncase.expr, &arglist, EXP_TILDE); + expandarg(n->ncase.expr, &arglist, FNMATCH_IS_ENABLED ? EXP_TILDE : + EXP_TILDE | EXP_MBCHAR); for (cp = n->ncase.cases ; cp && evalskip == 0 ; cp = cp->nclist.next) { for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) { if (casematch(patp, arglist.list->text)) { @@ -482,16 +496,18 @@ evalsubshell(union node *n, int flags) lineno -= funcline - 1; expredir(n->nredir.redirect); - if (!backgnd && flags & EV_EXIT && !have_traps()) - goto nofork; INTOFF; - jp = makejob(n, 1); - if (forkshell(jp, n, backgnd) == 0) { - INTON; + if (!backgnd && flags & EV_EXIT && !have_traps()) { + forkreset(NULL); + goto nofork; + } + jp = makejob(1); + if (forkshell(jp, n->nredir.n, backgnd) == 0) { flags |= EV_EXIT; if (backgnd) flags &=~ EV_TESTED; nofork: + INTON; redirect(n->nredir.redirect, 0); evaltreenr(n->nredir.n, flags); /* never returns */ @@ -562,7 +578,7 @@ evalpipe(union node *n, int flags) pipelen++; flags |= EV_EXIT; INTOFF; - jp = makejob(n, pipelen); + jp = makejob(pipelen); prevfd = -1; for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) { prehash(lp->n); @@ -579,6 +595,7 @@ evalpipe(union node *n, int flags) close(pip[0]); } if (prevfd > 0) { + reset_input(); dup2(prevfd, 0); close(prevfd); } @@ -615,8 +632,9 @@ evalpipe(union node *n, int flags) void evalbackcmd(union node *n, struct backcmd *result) { - int pip[2]; struct job *jp; + int pip[2]; + int pid; result->fd = -1; result->buf = NULL; @@ -626,10 +644,13 @@ evalbackcmd(union node *n, struct backcmd *result) goto out; } - if (pipe(pip) < 0) - sh_error("Pipe call failed"); - jp = makejob(n, 1); - if (forkshell(jp, n, FORK_NOJOB) == 0) { + sh_pipe(pip, 0); + tpip[0] = pip[0]; + tpip[1] = pip[1]; + jp = makejob(1); + pid = forkshell(jp, n, FORK_NOJOB); + tpip[0] = -1; + if (pid == 0) { FORCEINTON; close(pip[0]); if (pip[1] != 1) { @@ -851,12 +872,14 @@ evalcommand(union node *cmd, int flags) } /* Print the command if xflag is set. */ - if (xflag) { + if (xflag && !inps4) { struct output *out; int sep; out = &preverrout; + inps4 = 1; outstr(expandstr(ps4val()), out); + inps4 = 0; sep = 0; sep = eprintlist(out, varlist.list, sep); eprintlist(out, osp, sep); @@ -885,6 +908,8 @@ evalcommand(union node *cmd, int flags) goto bail; default: + flush_input(); + /* Fork off a child process if necessary. */ if (!(flags & EV_EXIT) || have_traps()) { INTOFF; @@ -1120,6 +1145,7 @@ execcmd(int argc, char **argv) iflag = 0; /* exit on error */ mflag = 0; optschanged(); + flush_input(); shellexec(argv + 1, pathval(), 0); } return 0; diff --git a/src/exec.c b/src/exec.c index 87354d4..6fe0fed 100644 --- a/src/exec.c +++ b/src/exec.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #ifdef HAVE_PATHS_H #include @@ -271,11 +272,16 @@ hashcmd(int argc, char **argv) int c; struct cmdentry entry; char *name; + bool clear; - while ((c = nextopt("r")) != '\0') { + clear = false; + while ((c = nextopt("r")) != '\0') + clear = true; + if(clear) { clearcmdentry(); return 0; } + if (*argptr == NULL) { for (pp = cmdtable ; pp < &cmdtable[CMDTABLESIZE] ; pp++) { for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) { @@ -319,7 +325,22 @@ printentry(struct tblentry *cmdp) out1fmt(snlfmt, cmdp->rehash ? "*" : nullstr); } +static int test_exec(const char *fullname, struct stat64 *statb) +{ + if (!S_ISREG(statb->st_mode)) + return 0; + if ((statb->st_mode & 0111) != 0111 && +#ifdef HAVE_FACCESSAT + !test_file_access(fullname, X_OK) +#else + !test_access(statb, X_OK) +#endif + ) + return 0; + + return 1; +} /* * Resolve a command name. If you change this routine, you may have to @@ -348,9 +369,12 @@ find_command(char *name, struct cmdentry *entry, int act, const char *path) if (errno == EINTR) continue; #endif +absfail: entry->cmdtype = CMDUNKNOWN; return; } + if (!test_exec(name, &statb)) + goto absfail; } entry->cmdtype = CMDNORMAL; return; @@ -445,9 +469,6 @@ find_command(char *name, struct cmdentry *entry, int act, const char *path) e = errno; goto loop; } - e = EACCES; /* if we fail, this will be the error */ - if (!S_ISREG(statb.st_mode)) - continue; if (lpathopt) { /* this is a %func directory */ stalloc(len); readcmdfile(fullname); @@ -458,20 +479,9 @@ find_command(char *name, struct cmdentry *entry, int act, const char *path) stunalloc(fullname); goto success; } -#ifdef notdef - /* XXX this code stops root executing stuff, and is buggy - if you need a group from the group list. */ - if (statb.st_uid == geteuid()) { - if ((statb.st_mode & 0100) == 0) - goto loop; - } else if (statb.st_gid == getegid()) { - if ((statb.st_mode & 010) == 0) - goto loop; - } else { - if ((statb.st_mode & 01) == 0) - goto loop; - } -#endif + e = EACCES; /* if we fail, this will be the error */ + if (!test_exec(fullname, &statb)) + continue; TRACE(("searchexec \"%s\" returns \"%s\"\n", name, fullname)); if (!updatetbl) { entry->cmdtype = CMDNORMAL; @@ -760,21 +770,17 @@ unsetfunc(const char *name) int typecmd(int argc, char **argv) { - int i; int err = 0; - for (i = 1; i < argc; i++) { - err |= describe_command(out1, argv[i], NULL, 1); + nextopt(nullstr); + while (*argptr) { + err |= describe_command(out1, *argptr++, NULL, 1); } return err; } -STATIC int -describe_command(out, command, path, verbose) - struct output *out; - char *command; - const char *path; - int verbose; +static int describe_command(struct output *out, char *command, + const char *path, int verbose) { struct cmdentry entry; struct tblentry *cmdp; @@ -875,10 +881,7 @@ describe_command(out, command, path, verbose) return 0; } -int -commandcmd(argc, argv) - int argc; - char **argv; +int commandcmd(int argc, char **argv) { char *cmd; int c; diff --git a/src/exec.h b/src/exec.h index 423b07e..8707d36 100644 --- a/src/exec.h +++ b/src/exec.h @@ -62,6 +62,8 @@ union node; extern const char *pathopt; /* set by padvance */ +struct stat64; + void shellexec(char **, const char *, int) __attribute__((__noreturn__)); int padvance_magic(const char **path, const char *name, int magic); @@ -78,6 +80,9 @@ void unsetfunc(const char *); int typecmd(int, char **); int commandcmd(int, char **); +int test_file_access(const char *path, int mode); +int test_access(const struct stat64 *sp, int stmode); + static inline int padvance(const char **path, const char *name) { return padvance_magic(path, name, 1); diff --git a/src/expand.c b/src/expand.c index af9cac9..bbf8454 100644 --- a/src/expand.c +++ b/src/expand.c @@ -32,49 +32,52 @@ * SUCH DAMAGE. */ -#include -#include -#include +#include #include -#include -#ifdef HAVE_GETPWNAM -#include -#endif -#include -#include -#include -#include -#include #ifdef HAVE_FNMATCH #include #endif #ifdef HAVE_GLOB #include #endif -#include +#include +#include +#ifdef HAVE_GETPWNAM +#include +#endif +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* * Routines to expand arguments to commands. We have to deal with * backquotes, shell variables, and file metacharacters. */ -#include "shell.h" -#include "main.h" -#include "nodes.h" +#include "error.h" #include "eval.h" #include "expand.h" -#include "syntax.h" -#include "parser.h" #include "jobs.h" -#include "options.h" -#include "var.h" -#include "output.h" +#include "main.h" #include "memalloc.h" -#include "error.h" #include "mystring.h" +#include "nodes.h" +#include "options.h" +#include "output.h" +#include "parser.h" +#include "shell.h" #include "show.h" +#include "syntax.h" #include "system.h" +#include "var.h" /* * _rmescape() flags @@ -99,6 +102,14 @@ struct ifsregion { int nulonly; /* search for nul bytes only */ }; +struct ifs_state { + const char *ifs; + char *start; + char *r; + int maxargs; + int ifsspc; +}; + /* output of current string */ static char *expdest; /* list of back quote expressions */ @@ -110,6 +121,11 @@ static struct ifsregion *ifslastp; /* holds expanded arg list */ static struct arglist exparg; +static char ifsmap[128]; +static const char *ncifs; +static size_t ifsmb0len; +static wchar_t *wcifs; + static char *argstr(char *p, int flag); static char *exptilde(char *startp, int flag); static char *expari(char *start, int flag); @@ -117,26 +133,16 @@ STATIC void expbackq(union node *, int); STATIC char *evalvar(char *, int); static size_t strtodest(const char *p, int flags); static size_t memtodest(const char *p, size_t len, int flags); -STATIC ssize_t varvalue(char *, int, int, int); -STATIC void expandmeta(struct strlist *, int); -#ifdef HAVE_GLOB -STATIC void addglob(const glob_t *); -#else -STATIC void expmeta(char *, unsigned, unsigned); +STATIC ssize_t varvalue(char *, int, unsigned); +STATIC void expandmeta(struct strlist *); +static void addglob(const glob64_t *); +static char *expmeta(char *, unsigned, size_t); STATIC struct strlist *expsort(struct strlist *); STATIC struct strlist *msort(struct strlist *, int); -#endif STATIC void addfname(char *); STATIC int patmatch(char *, const char *); -#ifndef HAVE_FNMATCH -STATIC int pmatch(const char *, const char *); -#else -#define pmatch(a, b) !fnmatch((a), (b), 0) -#endif +STATIC int pmatch(char *, const char *); static size_t cvtnum(intmax_t num, int flags); -STATIC size_t esclen(const char *, const char *); -STATIC char *scanleft(char *, char *, char *, char *, int, int); -STATIC char *scanright(char *, char *, char *, char *, int, int); STATIC void varunset(const char *, const char *, const char *, int) __attribute__((__noreturn__)); @@ -149,21 +155,53 @@ STATIC void varunset(const char *, const char *, const char *, int) STATIC inline char * preglob(const char *pattern, int flag) { + if (FNMATCH_IS_ENABLED) { + if (!flag) + flag = RMESCAPE_GROW; + flag |= RMESCAPE_ALLOC; + } flag |= RMESCAPE_GLOB; return _rmescapes((char *)pattern, flag); } -STATIC size_t -esclen(const char *start, const char *p) { +static size_t mesclen(const char *start, const char *p, char mesc) { size_t esc = 0; - while (p > start && *--p == (char)CTLESC) { + while (p > start && *--p == mesc) { esc++; } return esc; } +static size_t esclen(const char *start, const char *p) { + return mesclen(start, p, CTLESC); +} + +static __attribute__((noinline)) unsigned mbnext(const char *p) +{ + unsigned start = 0; + unsigned end = 0; + unsigned ml; + int c; + + c = (signed char)p[end++]; + + switch (__builtin_expect(c, 0)) { + case CTLMBCHAR: + if ((signed char)p[end] == CTLESC) + end++; + ml = (unsigned char)p[end++]; + start = end; + end = ml + 2; + break; + case CTLESC: + start++; + break; + } + + return start | end << 8; +} static inline const char *getpwhome(const char *name) { @@ -205,7 +243,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag) ifsbreakup(p, -1, &exparg); *exparg.lastp = NULL; exparg.lastp = &exparg.list; - expandmeta(exparg.list, flag); + expandmeta(exparg.list); } else { sp = (struct strlist *)stalloc(sizeof (struct strlist)); sp->text = p; @@ -240,6 +278,7 @@ static char *argstr(char *p, int flag) CTLESC, CTLVAR, CTLBACKQ, + CTLMBCHAR, CTLARI, CTLENDARI, 0 @@ -264,6 +303,8 @@ static char *argstr(char *p, int flag) start: startloc = expdest - (char *)stackblock(); for (;;) { + unsigned ml; + unsigned mb; int end; length += strcspn(p + length, reject); @@ -285,7 +326,7 @@ static char *argstr(char *p, int flag) q = stnputs(p, length, expdest); q[-1] &= end - 1; expdest = q - (flag & EXP_WORD ? end : 0); - newloc = expdest - (char *)stackblock() - end; + newloc = q - (char *)stackblock() - end; if (breakall && !inquotes && newloc > startloc) { recordregion(startloc, newloc, 0); } @@ -313,8 +354,7 @@ static char *argstr(char *p, int flag) continue; case CTLQUOTEMARK: /* "$@" syntax adherence hack */ - if (!inquotes && !memcmp(p, dolatstr + 1, - DOLATSTRLEN - 1)) { + if (!inquotes && !strcmp(p, dolatstr + 1)) { p = evalvar(p + 1, flag | EXP_QUOTED) + 1; goto start; } @@ -326,6 +366,22 @@ static char *argstr(char *p, int flag) startloc++; } break; + case CTLMBCHAR: + c = (signed char)*p--; + mb = mbnext(p); + ml = (mb >> 8) - 2; + if (flag & (QUOTES_ESC | EXP_MBCHAR)) { + length = (mb >> 8) + (mb & 0xff); + if (c == (char)CTLESC) + startloc += length; + break; + } + if (c == CTLESC) + startloc += ml; + p += mb & 0xff; + expdest = stnputs(p, ml, expdest); + p += mb >> 8; + break; case CTLESC: startloc++; length++; @@ -456,9 +512,6 @@ static char *expari(char *start, int flag) removerecordregions(begoff); - if (likely(flag & QUOTES_ESC)) - rmescapes(start); - pushstackmark(&sm, endoff); result = arith(start); popstackmark(&sm); @@ -525,7 +578,7 @@ expbackq(union node *cmd, int flag) /* Eat all trailing newlines */ dest = expdest; - for (; dest > (char *)stackblock() && dest[-1] == '\n';) + for (; dest > ((char *)stackblock() + startloc) && dest[-1] == '\n';) STUNPUTC(dest); expdest = dest; @@ -541,10 +594,8 @@ expbackq(union node *cmd, int flag) } -STATIC char * -scanleft( - char *startp, char *rmesc, char *rmescend, char *str, int quotes, - int zero +static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, + char *str, int quotes, int zero ) { char *loc; char *loc2; @@ -553,57 +604,71 @@ scanleft( loc = startp; loc2 = rmesc; do { + char *s = FNMATCH_IS_ENABLED ? loc2 : loc; + unsigned mb; + unsigned ml; int match; - const char *s = loc2; - c = *loc2; + + c = *s; if (zero) { - *loc2 = '\0'; - s = rmesc; + *s = '\0'; + s = FNMATCH_IS_ENABLED ? rmesc : startp; } match = pmatch(str, s); - *loc2 = c; + *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return loc; - if (quotes && *loc == (char)CTLESC) - loc++; - loc++; - loc2++; - } while (c); + return quotes ? loc : loc2; + + if (!c) + break; + + mb = mbnext(loc); + loc += (mb & 0xff) + (mb >> 8); + ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; + loc2 += ml; + } while (1); return 0; } -STATIC char * -scanright( - char *startp, char *rmesc, char *rmescend, char *str, int quotes, - int zero +static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, + char *str, int quotes, int zero ) { - int esc = 0; + size_t esc = 0; char *loc; char *loc2; - for (loc = str - 1, loc2 = rmescend; loc >= startp; loc2--) { + for (loc = endp, loc2 = rmescend;; loc2--) { + char *s = FNMATCH_IS_ENABLED ? loc2 : loc; + char c = *s; + unsigned ml; int match; - char c = *loc2; - const char *s = loc2; + if (zero) { - *loc2 = '\0'; - s = rmesc; + *s = '\0'; + s = FNMATCH_IS_ENABLED ? rmesc : startp; } match = pmatch(str, s); - *loc2 = c; + *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return loc; - loc--; - if (quotes) { - if (--esc < 0) { - esc = esclen(startp, loc); - } - if (esc % 2) { - esc--; - loc--; - } + return quotes ? loc : loc2; + if (--loc < startp) + break; + if (!esc--) + esc = esclen(startp, loc); + if (esc % 2) { + esc--; + loc--; + continue; } + if (*loc != (char)CTLMBCHAR) + continue; + + ml = (unsigned char)*--loc; + loc -= ml + 2; + if (*loc == (char)CTLESC) + loc--; + loc2 -= ml - 1; } return 0; } @@ -618,7 +683,9 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, long amount; char *rmesc, *rmescend; int zero; - char *(*scan)(char *, char *, char *, char *, int , int); + char *(*scan)(char *, char *, char *, char *, char *, int , int); + int nstrloc = strloc; + char *endp; char *p; p = argstr(start, (flag & EXP_DISCARD) | EXP_TILDE | @@ -646,35 +713,54 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, abort(); #endif - rmesc = startp; rmescend = stackblock() + strloc; - if (quotes) { + str = preglob(rmescend, 0); + if (FNMATCH_IS_ENABLED) { + startp = stackblock() + startloc; + rmescend = stackblock() + strloc; + nstrloc = str - (char *)stackblock(); + } + + rmesc = startp; + if (FNMATCH_IS_ENABLED || !quotes) { rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) { + if (rmesc != startp) rmescend = expdest; - startp = stackblock() + startloc; - } + startp = stackblock() + startloc; + str = stackblock() + nstrloc; } rmescend--; - str = stackblock() + strloc; - preglob(str, 0); /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ zero = subtype >> 1; /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ scan = (subtype & 1) ^ zero ? scanleft : scanright; - loc = scan(startp, rmesc, rmescend, str, quotes, zero); - if (loc) { - if (zero) { - memmove(startp, loc, str - loc); - loc = startp + (str - loc) - 1; + endp = stackblock() + strloc - 1; + loc = scan(startp, endp, rmesc, rmescend, str, quotes, zero); + if (!loc) { + if (quotes) { + rmesc = startp; + rmescend = endp; } - *loc = '\0'; - } else - loc = str - 1; + } else if (!quotes) { + if (zero) + rmesc = loc; + else + rmescend = loc; + } else if (zero) { + rmesc = loc; + rmescend = endp; + } else { + rmesc = startp; + rmescend = loc; + } + + memmove(startp, rmesc, rmescend - rmesc); + loc = startp + (rmescend - rmesc); out: + *loc = '\0'; amount = loc - expdest; STADJUST(amount, expdest); @@ -700,8 +786,9 @@ evalvar(char *p, int flag) ssize_t varlen; int discard; int quoted; + int mbchar; - varflags = *p++; + varflags = *p++ & ~VSBIT; subtype = varflags & VSTYPE; quoted = flag & EXP_QUOTED; @@ -709,8 +796,18 @@ evalvar(char *p, int flag) startloc = expdest - (char *)stackblock(); p = strchr(p, '=') + 1; + mbchar = 0; + switch (subtype) { + case VSTRIMLEFT: + case VSTRIMLEFTMAX: + case VSTRIMRIGHT: + case VSTRIMRIGHTMAX: + mbchar = EXP_MBCHAR; + break; + } + again: - varlen = varvalue(var, varflags, flag, quoted); + varlen = varvalue(var, varflags, flag | mbchar); if (varflags & VSNUL) varlen--; @@ -793,6 +890,52 @@ evalvar(char *p, int flag) return p; } +static char *chtodest(int c, const char *syntax, char *out) +{ + if (syntax[c] == CCTL) + USTPUTC(CTLESC, out); + USTPUTC(c, out); + + return out; +} + +struct mbpair { + unsigned ml; + unsigned ql; +}; + +static struct mbpair mbtodest(const char *p, char *q, const char *syntax, + size_t len) +{ + mbstate_t mbs = {}; + struct mbpair mbp; + char *q0 = q; + size_t ml; + + ml = mbrlen(--p, len, &mbs); + if (ml == -2 || ml == -1 || ml < 2) { + q = chtodest((signed char)*p, syntax, q); + ml = 1; + goto out; + } + + if (syntax[CTLMBCHAR] == CCTL) { + USTPUTC(CTLMBCHAR, q); + USTPUTC(ml, q); + } + + q = mempcpy(q, p, ml); + + if (syntax[CTLMBCHAR] == CCTL) { + USTPUTC(ml, q); + USTPUTC(CTLMBCHAR, q); + } + +out: + mbp.ml = ml - 1; + mbp.ql = q - q0; + return mbp; +} /* * Put a string on the stack. @@ -800,38 +943,77 @@ evalvar(char *p, int flag) static size_t memtodest(const char *p, size_t len, int flags) { - const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX; + const char *syntax; + size_t count = 0; + int expq; char *q; - char *s; if (unlikely(!len)) return 0; - q = makestrspace(len * 2, expdest); - s = q; + /* CTLMBCHAR, 2, c, c, 2, CTLMBCHAR */ + q = makestrspace(len * 3, expdest); - do { +#if QUOTES_ESC != 0x11 || EXP_MBCHAR != 0x20 || EXP_QUOTED != 0x100 +#error QUOTES_ESC != 0x11 || EXP_MBCHAR != 0x20 || EXP_QUOTED != 0x100 +#endif + expq = flags & EXP_QUOTED; + if (likely(!(flags & (expq >> 3 | expq >> 4 | expq >> 8) & + (QUOTES_ESC | EXP_MBCHAR)))) { + while (len >= 8) { + uint64_t x; + + memcpy(&x, p + count, sizeof(x)); + + if ((x | (x - 0x0101010101010101)) & + 0x8080808080808080) + break; + + memcpy(q + count, &x, sizeof(x)); + + count += 8; + len -= 8; + } + + q += count; + p += count; + + syntax = flags & (QUOTES_ESC | EXP_MBCHAR) ? + BASESYNTAX : is_type; + } else + syntax = SQSYNTAX; + + for (; len; len--) { int c = (signed char)*p++; - if (c) { - if ((flags & QUOTES_ESC) && - ((syntax[c] == CCTL) || - (flags & EXP_QUOTED && syntax[c] == CBACK))) - USTPUTC(CTLESC, q); - } else if (!(flags & EXP_KEEPNUL)) + + if (unlikely(!c && !(flags & EXP_KEEPNUL))) continue; - USTPUTC(c, q); - } while (--len); + + count++; + + if (unlikely(c < 0)) { + struct mbpair mbp = mbtodest(p, q, syntax, len); + unsigned mlm; + + q += mbp.ql; + mlm = mbp.ml; + p += mlm; + len -= mlm; + continue; + } + + q = chtodest(c, syntax, q); + } expdest = q; - return q - s; + return count; } static size_t strtodest(const char *p, int flags) { size_t len = strlen(p); - memtodest(p, len, flags); - return len; + return memtodest(p, len, flags); } @@ -840,31 +1022,33 @@ static size_t strtodest(const char *p, int flags) * Add the value of a specialized variable to the stack string. */ -STATIC ssize_t -varvalue(char *name, int varflags, int flags, int quoted) +static ssize_t varvalue(char *name, int varflags, unsigned flags) { + int subtype = varflags & VSTYPE; + unsigned long seplen; + const char *seps; + ssize_t len = 0; + size_t start; + int discard; + char **ap; int num; char *p; int i; - int sep; - char sepc; - char **ap; - int subtype = varflags & VSTYPE; - int discard = (subtype == VSPLUS || subtype == VSLENGTH) | - (flags & EXP_DISCARD); - ssize_t len = 0; - char c; - if (!subtype) { + discard = (subtype == VSPLUS || subtype == VSLENGTH) | + (flags & EXP_DISCARD); + + if (unlikely(!subtype)) { if (discard) return -1; sh_error("Bad substitution"); } - flags |= EXP_KEEPNUL; flags &= discard ? ~QUOTES_ESC : ~0; - sep = (flags & EXP_FULL) << CHAR_BIT; + seps = nullstr; + seplen = flags & EXP_FULL; + start = expdest - (char *)stackblock(); switch (*name) { case '$': @@ -894,13 +1078,14 @@ varvalue(char *name, int varflags, int flags, int quoted) expdest = p; break; case '@': - if (quoted && sep) + if ((flags & (EXP_QUOTED | EXP_FULL)) == + (EXP_QUOTED | EXP_FULL)) goto param; /* fall through */ case '*': - /* We will set c to 0 or ~0 depending on whether + /* We will set seplen to 0 or !0 depending on whether * we're doing field splitting. We won't do field - * splitting if either we're quoted or sep is zero. + * splitting if either we're quoted or seplen is zero. * * Instead of testing (quoted || !sep) the following * trick optimises away any branches by using the @@ -912,20 +1097,22 @@ varvalue(char *name, int varflags, int flags, int quoted) #if EXP_QUOTED >> CHAR_BIT != EXP_FULL #error The following two lines expect EXP_QUOTED == EXP_FULL << CHAR_BIT #endif - c = !((quoted | ~sep) & EXP_QUOTED) - 1; - sep &= ~quoted; - sep |= ifsset() ? (unsigned char)(c & ifsval()[0]) : ' '; + seplen &= ~(flags >> CHAR_BIT); + if (!seplen) + seps = ncifs; + seplen = ((seplen - 1) & (ifsmb0len - 1)) + 1; param: - sepc = sep; if (!(ap = shellparam.p)) return -1; - while ((p = *ap++)) { + if (!(p = *ap)) + break; + for (;;) { len += strtodest(p, flags); - if (*ap && sep) { - len++; - memtodest(&sepc, 1, flags); - } + if (!(p = *++ap)) + break; + + len += memtodest(seps, seplen, flags | EXP_KEEPNUL); } break; case '0': @@ -954,7 +1141,7 @@ varvalue(char *name, int varflags, int flags, int quoted) } if (discard) - STADJUST(-len, expdest); + expdest = (char *)stackblock() + start; return len; } @@ -986,7 +1173,127 @@ recordregion(int start, int end, int nulonly) ifslastp->nulonly = nulonly; } +static unsigned ifsisifs(const char *p, unsigned ml, const char *ifs) +{ + bool isdefifs = false; + bool isifs = false; + wchar_t wc = *p; + wchar_t ifs0; + + if (likely(ifs[0]) && unlikely(wcifs)) { + if (wc & 0x80) { + mbstate_t mbst = {}; + wchar_t wc2; + + if (mbrtowc(&wc2, p, ml, &mbst) != ml) + goto out; + wc = wc2; + } + + isifs = wcschr(wcifs, wc); + ifs0 = wcifs[0]; + } else if (likely(!ml)) { + isifs = strchr(ifs, wc); + ifs0 = ifs[0]; + } + + if (isifs) + isdefifs = iswspace(wc ?: ifs0); + +out: + return isifs << 1 | isdefifs; +} + +static char *ifsbreakup_slow(struct ifs_state *ifst, struct arglist *arglist, + int nulonly, char *p) +{ + struct strlist *sp; + unsigned ifschar; + unsigned sisifs; + bool isdefifs; + unsigned ml; + bool isifs; + char *q; + + q = p; + + ifschar = mbnext(p); + p += ifschar & 0xff; + ml = (ifschar >> 8) > 3 ? + (ifschar >> 8) - 2 : 0; + + sisifs = ifsisifs(p, ml, ifst->ifs); + p += ifschar >> 8; + + isifs = sisifs >> 1; + isdefifs = sisifs & 1; + + /* If only reading one more argument: + * If we have exactly one field, + * read that field without its terminator. + * If we have more than one field, + * read all fields including their terminators, + * except for trailing IFS whitespace. + * + * This means that if we have only IFS + * characters left, and at most one + * of them is non-whitespace, we stop + * reading here. + * Otherwise, we read all the remaining + * characters except for trailing + * IFS whitespace. + * + * In any case, r indicates the start + * of the characters to remove, or NULL + * if no characters should be removed. + */ + if (!ifst->maxargs) { + if (isdefifs) { + if (!ifst->r) + ifst->r = q; + return p; + } + + if (!(isifs && ifst->ifsspc)) + ifst->r = NULL; + } else if (ifst->ifsspc) { + if (isifs) + q = p; + + ifst->start = q; + if (isdefifs) + return p; + } else if (isifs) { + int ifsspc = ifst->ifsspc; + + if (!nulonly) { + ifsspc = isdefifs; + ifst->ifsspc = ifsspc; + } + + /* Ignore IFS whitespace at start */ + if (q == ifst->start && ifsspc) { + ifst->start = p; + goto out_zero_ifsspc; + } + if (ifst->maxargs > 0 && !--ifst->maxargs) { + ifst->r = q; + return p; + } + *q = '\0'; + sp = (struct strlist *)stalloc(sizeof *sp); + sp->text = ifst->start; + *arglist->lastp = sp; + arglist->lastp = &sp->next; + ifst->start = p; + return p; + } + +out_zero_ifsspc: + ifst->ifsspc = 0; + return p; +} /* * Break the argument string into pieces based upon IFS and add the @@ -999,21 +1306,19 @@ void ifsbreakup(char *string, int maxargs, struct arglist *arglist) { struct ifsregion *ifsp; + struct ifs_state ifst; + const char *realifs; struct strlist *sp; - char *start; - char *p; - char *q; - char *r = NULL; - const char *ifs, *realifs; - int ifsspc; int nulonly; + char *p; - - start = string; + ifst.r = NULL; + ifst.start = string; + ifst.maxargs = maxargs; if (ifslastp != NULL) { - ifsspc = 0; + ifst.ifsspc = 0; nulonly = 0; - realifs = ifsset() ? ifsval() : defifs; + realifs = ncifs; ifsp = &ifsfirst; do { int afternul; @@ -1021,106 +1326,60 @@ ifsbreakup(char *string, int maxargs, struct arglist *arglist) p = string + ifsp->begoff; afternul = nulonly; nulonly = ifsp->nulonly; - ifs = nulonly ? nullstr : realifs; - ifsspc = 0; - while (p < string + ifsp->endoff) { - int c; - bool isifs; - bool isdefifs; - - q = p; - c = *p++; - if (c == (char)CTLESC) - c = *p++; - - isifs = strchr(ifs, c); - isdefifs = false; - if (isifs) - isdefifs = strchr(defifs, c); - - /* If only reading one more argument: - * If we have exactly one field, - * read that field without its terminator. - * If we have more than one field, - * read all fields including their terminators, - * except for trailing IFS whitespace. - * - * This means that if we have only IFS - * characters left, and at most one - * of them is non-whitespace, we stop - * reading here. - * Otherwise, we read all the remaining - * characters except for trailing - * IFS whitespace. - * - * In any case, r indicates the start - * of the characters to remove, or NULL - * if no characters should be removed. - */ - if (!maxargs) { - if (isdefifs) { - if (!r) - r = q; - continue; - } - - if (!(isifs && ifsspc)) - r = NULL; - - ifsspc = 0; - continue; + ifst.ifs = nulonly ? nullstr : realifs; + ifst.ifsspc = 0; + for (;;) { + char *p0 = p; + + while (string + ifsp->endoff - p >= 8) { + union { + uint64_t qw; + unsigned char b[8]; + } x; + + memcpy(&x.qw, p, sizeof(x.qw)); + + if ((x.qw & 0x8080808080808080)) + break; + if (ifsmap[x.b[0]] | + ifsmap[x.b[1]] | + ifsmap[x.b[2]] | + ifsmap[x.b[3]] | + ifsmap[x.b[4]] | + ifsmap[x.b[5]] | + ifsmap[x.b[6]] | + ifsmap[x.b[7]]) + break; + p += 8; } - if (ifsspc) { - if (isifs) - q = p; - - start = q; - - if (isdefifs) - continue; - - isifs = false; + if (p != p0) { + if (!ifst.maxargs) + ifst.r = NULL; + else if (ifst.ifsspc) + ifst.start = p0; + ifst.ifsspc = 0; } - if (isifs) { - if (!(afternul || nulonly)) - ifsspc = isdefifs; - /* Ignore IFS whitespace at start */ - if (q == start && ifsspc) { - start = p; - ifsspc = 0; - continue; - } - if (maxargs > 0 && !--maxargs) { - r = q; - continue; - } - *q = '\0'; - sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = start; - *arglist->lastp = sp; - arglist->lastp = &sp->next; - start = p; - continue; - } + if (p >= string + ifsp->endoff) + break; - ifsspc = 0; + p = ifsbreakup_slow(&ifst, arglist, + afternul | nulonly, p); } } while ((ifsp = ifsp->next) != NULL); if (nulonly) goto add; + if (ifst.r) + *ifst.r = '\0'; } - if (r) - *r = '\0'; - - if (!*start) + if (!*ifst.start) return; add: sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = start; + sp->text = ifst.start; *arglist->lastp = sp; arglist->lastp = &sp->next; } @@ -1146,31 +1405,97 @@ void ifsfree(void) ifslastp = NULL; } +void changeifs(const char *ifs) +{ + mbstate_t mbs = {}; + wchar_t *nwcifs; + unsigned mb = 0; + size_t len = 0; + const char *p; + size_t ml; + + if (!ifsset()) + ifs = defifs; + ncifs = ifs; + + memset(ifsmap, 0, sizeof(ifsmap)); + + for (p = ifs;; p++) { + unsigned c = (unsigned char)*p; + + mb |= c >> 7; + if (!(c >> 7)) + ifsmap[c] = 1; + + if (c == 0) + break; + + len++; + } + + nwcifs = NULL; + ifsmb0len = !!len; + + if (!mb) + goto out; + + ml = mbrlen(ifs, len, &mbs); + if (ml == -2 || ml == -1) + ml = 1; + ifsmb0len = ml; + + nwcifs = ckmalloc((len + 1) * sizeof(*wcifs)); + memset(nwcifs, 0, (len + 1) * sizeof(*wcifs)); + + p = ifs; + mbsrtowcs(nwcifs, &p, len + 1, &mbs); + +out: + ckfree(wcifs); + wcifs = nwcifs; +} /* * Expand shell metacharacters. At this point, the only control characters * should be escapes. The results are stored in the list exparg. */ -#ifdef HAVE_GLOB -STATIC void -expandmeta(str, flag) - struct strlist *str; - int flag; +#ifdef __GLIBC__ +static void *opendir_interruptible(const char *pathname) { - /* TODO - EXP_REDIR */ + if (int_pending()) { + suppressint = 0; + onint(); + } + return opendir(pathname); +} +#else +#define GLOB_ALTDIRFUNC 0 +#endif + +static void expandmeta_glob(struct strlist *str) +{ while (str) { const char *p; - glob_t pglob; + glob64_t pglob; int i; if (fflag) goto nometa; + +#ifdef __GLIBC__ + pglob.gl_closedir = (void *)closedir; + pglob.gl_readdir = (void *)readdir64; + pglob.gl_opendir = opendir_interruptible; + pglob.gl_lstat = lstat64; + pglob.gl_stat = stat64; +#endif + INTOFF; - p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); - i = glob(p, GLOB_NOMAGIC, 0, &pglob); + p = preglob(str->text, RMESCAPE_HEAP); + i = glob64(p, GLOB_ALTDIRFUNC | GLOB_NOMAGIC, 0, &pglob); if (p != str->text) ckfree(p); switch (i) { @@ -1179,12 +1504,12 @@ expandmeta(str, flag) (GLOB_NOMAGIC | GLOB_NOCHECK)) goto nometa2; addglob(&pglob); - globfree(&pglob); + globfree64(&pglob); INTON; break; case GLOB_NOMATCH: nometa2: - globfree(&pglob); + globfree64(&pglob); INTON; nometa: *exparg.lastp = str; @@ -1203,9 +1528,7 @@ expandmeta(str, flag) * Add the result of glob(3) to the list. */ -STATIC void -addglob(pglob) - const glob_t *pglob; +static void addglob(const glob64_t *pglob) { char **p = pglob->gl_pathv; @@ -1214,20 +1537,14 @@ addglob(pglob) } while (*++p); } - -#else /* HAVE_GLOB */ -STATIC char *expdir; -STATIC unsigned expdir_max; - - STATIC void -expandmeta(struct strlist *str, int flag) +expandmeta(struct strlist *str) { - static const char metachars[] = { - '*', '?', '[', 0 - }; /* TODO - EXP_REDIR */ + if (GLOB_IS_ENABLED) + return expandmeta_glob(str); + while (str) { struct strlist **savelastp; struct strlist *sp; @@ -1236,18 +1553,15 @@ expandmeta(struct strlist *str, int flag) if (fflag) goto nometa; - if (!strpbrk(str->text, metachars)) + if (!strpbrk(str->text, "*?]") || !strcmp(str->text, "]")) goto nometa; savelastp = exparg.lastp; INTOFF; p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); len = strlen(p); - expdir_max = len + PATH_MAX; - expdir = ckmalloc(expdir_max); expmeta(p, len, 0); - ckfree(expdir); if (p != str->text) ckfree(p); INTON; @@ -1270,131 +1584,188 @@ expandmeta(struct strlist *str, int flag) } } +static void addfname_common(char *name) +{ + struct strlist *sp; + + sp = (struct strlist *)stalloc(sizeof *sp); + sp->text = name; + *exparg.lastp = sp; + exparg.lastp = &sp->next; +} + +static char *addfnamealt(char *enddir, size_t expdir_len) +{ + char *name; + + name = grabstackstr(enddir); + addfname_common(name); + + STARTSTACKSTR(enddir); + return stnputs(name, expdir_len, enddir) - expdir_len; +} + +static char *expmeta_rmescapes(char *enddir, const char *name) +{ + const char *p; + + if (!FNMATCH_IS_ENABLED) + return strchrnul(rmescapes(strcpy(enddir, name)), 0); + + p = name; + do { + char *q = strchrnul(p, '\\'); + + enddir = mempcpy(enddir, p, q - p + 1); + p = q; + if (!*p) + break; + if (*++p) + enddir[-1] = *p++; + } while (1); + + return enddir - 1; +} + +#ifndef HAVE_MEMRCHR +static void *memrchr(const void *s, int c, size_t n) +{ + const unsigned char *str = s; + const unsigned char *cp; + + for (cp = str + n - 1; cp >= str; cp--) + if (*cp == c) + return cp; + return NULL; +} +#endif /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ -STATIC void -expmeta(char *name, unsigned name_len, unsigned expdir_len) +static char *expmeta(char *name, unsigned name_len, size_t expdir_len) { - char *enddir = expdir + expdir_len; - char *p; - const char *cp; - char *start; - char *endname; - int metaflag; + const char mesc = FNMATCH_IS_ENABLED ? '\\' : CTLESC; + struct jmploc *volatile savehandler; + struct jmploc jmploc; struct stat64 statb; - DIR *dirp; - struct dirent *dp; - int atend; + struct dirent64 *dp; + volatile int err; + char *endname; + char *zeroedp; + char *enddir; int matchdot; - int esc; - - metaflag = 0; - start = name; - for (p = name; esc = 0, *p; p += esc + 1) { - if (*p == '*' || *p == '?') - metaflag = 1; - else if (*p == '[') { - char *q = p + 1; - if (*q == '!') - q++; - for (;;) { - if (*q == '\\') - q++; - if (*q == '/' || *q == '\0') - break; - if (*++q == ']') { - metaflag = 1; - break; - } - } - } else { - if (*p == '\\' && p[1]) - esc++; - if (p[esc] == '/') { - if (metaflag) - break; - start = p + esc + 1; - } - } - } - if (metaflag == 0) { /* we've reached the end of the file name */ + unsigned esc; + char *start; + size_t len; + DIR *dirp; + char *pat; + char *cp; + char *p; + int c; + + *(DIR *volatile *)&dirp = NULL; + savehandler = handler; + if (unlikely(err = setjmp(jmploc.loc))) + goto out; + + len = expdir_len + name_len + 1; + cp = growstackto(len); + enddir = cp + expdir_len; + + p = name; + esc = 0; + do { + p = strpbrk(p + esc, "*?]"); + if (!p) + break; + esc = mesclen(name, p, mesc) & 1; + } while (esc); + /* No meta characters */ + if (likely(!p)) { if (!expdir_len) - return; - p = name; - do { - if (*p == '\\' && p[1]) - p++; - *enddir++ = *p; - } while (*p++); - if (lstat64(expdir, &statb) >= 0) - addfname(expdir); - return; - } - endname = p; - if (name < start) { - p = name; - do { - if (*p == '\\' && p[1]) - p++; - *enddir++ = *p++; - } while (p < start); + goto out_opendir; + enddir = expmeta_rmescapes(enddir, name); + if (lstat64(cp, &statb) >= 0) + cp = addfnamealt(enddir + 1, expdir_len); + goto out_opendir; } + start = memrchr(name, '/', p - name); + if (start) { + c = *++start; + *start = 0; + enddir = expmeta_rmescapes(enddir, name); + *start = c; + expdir_len = enddir - cp; + } else + start = name; *enddir = 0; - cp = expdir; - expdir_len = enddir - cp; - if (!expdir_len) - cp = "."; - if ((dirp = opendir(cp)) == NULL) - return; - if (*endname == 0) { - atend = 1; - } else { - atend = 0; - *endname = '\0'; - endname += esc + 1; + + *(DIR *volatile *)&dirp = opendir(expdir_len ? cp : dotdir); + if (!dirp) + goto out_opendir; + esc = 0; + p = strchrnul(p + 1, '/'); + zeroedp = p; + endname = p; + if (*p) { + esc = mesclen(name, p, mesc) & 1; + zeroedp -= esc; + endname++; } + c = *zeroedp; + *zeroedp = '\0'; name_len -= endname - name; matchdot = 0; - p = start; - if (*p == '\\') + pat = start; + p = pat; + if (*p == mesc) p++; if (*p == '.') matchdot++; - while (! int_pending() && (dp = readdir(dirp)) != NULL) { - if (dp->d_name[0] == '.' && ! matchdot) - continue; - if (pmatch(start, dp->d_name)) { - if (atend) { - scopy(dp->d_name, enddir); - addfname(expdir); - } else { - unsigned offset; - unsigned len; - - p = stpcpy(enddir, dp->d_name); - *p = '/'; - - offset = p - expdir + 1; - len = offset + name_len + NAME_MAX; - if (len > expdir_max) { - len += PATH_MAX; - expdir = ckrealloc(expdir, len); - expdir_max = len; - } - - expmeta(endname, name_len, offset); - enddir = expdir + expdir_len; + while ((dp = readdir64(dirp))) { + char *dname = dp->d_name; + + if (*dname == '.' && !matchdot) + goto check_int; + if (c && dp->d_type != DT_DIR && dp->d_type != DT_LNK && + dp->d_type != DT_UNKNOWN) + goto check_int; + len = strlen(dname) + 1; + p = dname; + if (!FNMATCH_IS_ENABLED) { + expdest = enddir; + memtodest(p, len, EXP_MBCHAR | EXP_KEEPNUL); + cp = stackblock(); + enddir = cp + expdir_len; + p = enddir; + } + if (pmatch(pat, p)) { + enddir = stnputs(dname, len, enddir); + if (!c) + cp = addfnamealt(enddir, expdir_len); + else { + enddir[-1] = '/'; + len += expdir_len; + cp = expmeta(endname, name_len, len); } + enddir = cp + expdir_len; } +check_int: + if (int_pending()) + break; } - closedir(dirp); - if (! atend) - endname[-esc - 1] = esc ? '\\' : '/'; + *zeroedp = c; + +out: + closedir(*(DIR *volatile *)&dirp); +out_opendir: + handler = savehandler; + if (err) + longjmp(handler->loc, 1); + return cp; } -#endif /* HAVE_GLOB */ /* @@ -1404,16 +1775,10 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) STATIC void addfname(char *name) { - struct strlist *sp; - - sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = sstrdup(name); - *exparg.lastp = sp; - exparg.lastp = &sp->next; + addfname_common(sstrdup(name)); } -#ifndef HAVE_GLOB /* * Sort the results of file name expansion. It calculates the number of * strings to sort and then calls msort (short for merge sort) to do the @@ -1454,7 +1819,7 @@ msort(struct strlist *list, int len) p = msort(p, len - half); /* sort second half */ lpp = &list; for (;;) { - if (strcmp(p->text, q->text) < 0) { + if (strcoll(p->text, q->text) < 0) { *lpp = p; lpp = &p->next; if ((p = *lpp) == NULL) { @@ -1472,7 +1837,6 @@ msort(struct strlist *list, int len) } return list; } -#endif /* @@ -1486,141 +1850,189 @@ patmatch(char *pattern, const char *string) } -#ifndef HAVE_FNMATCH -STATIC int ccmatch(const char *p, int chr, const char **r) +static __attribute__((noinline)) int ccmatch(char *p, const char *mbc, int ml, + char **r) { - static const struct class { - char name[10]; - int (*fn)(int); - } classes[] = { - { .name = ":alnum:]", .fn = isalnum }, - { .name = ":cntrl:]", .fn = iscntrl }, - { .name = ":lower:]", .fn = islower }, - { .name = ":space:]", .fn = isspace }, - { .name = ":alpha:]", .fn = isalpha }, - { .name = ":digit:]", .fn = isdigit }, - { .name = ":print:]", .fn = isprint }, - { .name = ":upper:]", .fn = isupper }, - { .name = ":blank:]", .fn = isblank }, - { .name = ":graph:]", .fn = isgraph }, - { .name = ":punct:]", .fn = ispunct }, - { .name = ":xdigit:]", .fn = isxdigit }, - }; - const struct class *class, *end; + mbstate_t mbst = {}; + wctype_t type; + wchar_t wc; + char *q; - end = classes + sizeof(classes) / sizeof(classes[0]); - for (class = classes; class < end; class++) { - const char *q; + *r = 0; - q = prefix(p, class->name); - if (!q) - continue; - *r = q; - return class->fn(chr); - } + if (*p++ != ':') + return 0; - *r = 0; - return 0; + q = strstr(p, ":]"); + if (!q) + return 0; + + *q = 0; + type = wctype(p); + *q = ':'; + + if (!type) + return 0; + + *r = q + 2; + + if (mbrtowc(&wc, mbc, ml, &mbst) != ml) + return 0; + + return iswctype(wc, type); } -STATIC int -pmatch(const char *pattern, const char *string) +static int pmatch(char *pattern, const char *string) { - const char *p, *q; + const char *q; + unsigned mb; + char *p; char c; + if (FNMATCH_IS_ENABLED) + return !fnmatch(pattern, string, 0); + p = pattern; q = string; for (;;) { - switch (c = *p++) { + switch ((signed char)(c = *p++)) { case '\0': goto breakloop; - case '\\': - if (*p) { - c = *p++; - } - goto dft; + case CTLESC: + c = *p++; + break; case '?': - if (*q++ == '\0') + if (*q == '\0') return 0; - break; + mb = mbnext(q); + q += (mb >> 8) + (mb & 0xff); + continue; case '*': c = *p; while (c == '*') c = *++p; - if (c != '\\' && c != '?' && c != '*' && c != '[') { - while (*q != c) { - if (*q == '\0') + if (!c) + return 1; + if (c == '?' || c == '[') + c = CTLESC; + for (;;) { + if (c != (char)CTLESC) { + /* Stop should be null-terminated + * as it is passed as a string to + * strpbrk(3). + */ + const char stop[] = { + c, CTLESC, CTLMBCHAR, 0, + }; + q = strpbrk(q, stop); + if (!q) return 0; - q++; } - } - do { if (pmatch(p, q)) return 1; - } while (*q++ != '\0'); + if (!*q) + break; + mb = mbnext(q); + q += (mb >> 8) + (mb & 0xff); + } return 0; case '[': { - const char *startp; + char *startp; int invert, found; char chr; startp = p; invert = 0; - if (*p == '!') { + if (*p == '!' || *p == '^') { invert++; p++; } found = 0; + mb = mbnext(q); + q += mb & 0xff; + mb >>= 8; chr = *q; if (chr == '\0') return 0; c = *p++; do { + unsigned mbp = 0; + const char *mbs = &c; + if (!c) { p = startp; c = '['; goto dft; } if (c == '[') { - const char *r; + char *r; - found |= !!ccmatch(p, chr, &r); + found |= !!ccmatch(p, q, mb > 1 ? + mb - 2 : mb, + &r); if (r) { p = r; continue; } - } else if (c == '\\') + } else if (c == (char)CTLESC) c = *p++; - if (*p == '-' && p[1] != ']') { + else if (c == (char)CTLMBCHAR) { + mbp = mbnext(--p); + p += mbp & 0xff; + mbs = p; + mbp >>= 8; + p += mbp; + } + if (*p == '-' && p[1] && p[1] != ']') { p++; - if (*p == '\\') + if (*p == (char)CTLESC) p++; - if (chr >= c && chr <= *p) + else if (*p == (char)CTLMBCHAR) { + mbp = mbnext(p); + p += mbp & 0xff; + p += mbp >> 8; + continue; + } + if (!(mbp | (mb - 1)) && + chr >= c && chr <= *p) found = 1; p++; - } else { - if (chr == c) - found = 1; - } + } else if (!strncmp(mbs, q, mb)) + found = 1; } while ((c = *p++) != ']'); if (found == invert) return 0; - q++; - break; + q += mb; + continue; } -dft: default: - if (*q++ != c) + case CTLMBCHAR: + mb = mbnext(--p); + p += mb & 0xff; + mb = mbnext(q); + q += mb & 0xff; + mb >>= 8; + + if (strncmp(p - 1, q - 1, mb + 1)) return 0; - break; + + p += mb; + q += mb; + continue; } +dft: + mb = mbnext(q); + if ((mb >> 8) > 1) + return 0; + q += mb & 0xff; + if (*q != c) + return 0; + q += mb >> 8; } breakloop: if (*q != '\0') return 0; return 1; } -#endif @@ -1634,16 +2046,24 @@ _rmescapes(char *str, int flag) char *p, *q, *r; int notescaped; int globbing; + int inquotes; - p = strpbrk(str, qchars); + p = strpbrk(str, cqchars); if (!p) { return str; } q = p; r = str; + globbing = flag & RMESCAPE_GLOB; + if (flag & RMESCAPE_ALLOC) { size_t len = p - str; - size_t fulllen = len + strlen(p) + 1; + size_t fulllen = strlen(p); + + if (FNMATCH_IS_ENABLED && globbing) + fulllen *= 2; + + fulllen += len + 1; if (flag & RMESCAPE_GROW) { int strloc = str - (char *)stackblock(); @@ -1661,30 +2081,68 @@ _rmescapes(char *str, int flag) q = mempcpy(q, str, len); } } - globbing = flag & RMESCAPE_GLOB; + inquotes = 0; notescaped = globbing; while (*p) { - if (*p == (char)CTLQUOTEMARK) { + int c = (signed char)*p; + int newnesc = globbing; + unsigned mb; + unsigned ml; + + if (c == CTLQUOTEMARK) { p++; - notescaped = globbing; + inquotes ^= globbing; continue; - } - if (*p == '\\') { + } else if (c == '\\') { /* naked back slash */ - notescaped = 0; - goto copy; - } - if (*p == (char)CTLESC) { - p++; - if (notescaped) - *q++ = '\\'; + newnesc ^= notescaped; + /* naked backslashes can only occur outside quotes */ + inquotes = 0; + if (!FNMATCH_IS_ENABLED && notescaped) + c = CTLESC; + } else if (c == CTLESC) { + if ((notescaped ^ inquotes) & inquotes) { + if (FNMATCH_IS_ENABLED) + *q++ = '\\'; + else + q[-1] = '\\'; + } + if (globbing) + *q++ = FNMATCH_IS_ENABLED ? '\\' : CTLESC; + + c = *++p; + } else if (c == CTLMBCHAR) { + unsigned tail = 2; + + if (!FNMATCH_IS_ENABLED && (globbing ^ notescaped)) + q--; + + mb = mbnext(p); + ml = mb >> 8; + + if (!globbing || FNMATCH_IS_ENABLED) { + p += mb & 0xff; + ml -= 2; + } else { + ml += mb & 0xff; + tail = 0; + } + + memmove(q, p, ml); + q += ml; + p += ml + tail; + goto setnesc; } - notescaped = globbing; -copy: - *q++ = *p++; + + *q++ = c; + p++; +setnesc: + notescaped = newnesc; } + if (!FNMATCH_IS_ENABLED && (globbing ^ notescaped)) + q[-1] = '\\'; *q = '\0'; - if (flag & RMESCAPE_GROW) { + if (flag & (RMESCAPE_ALLOC | RMESCAPE_GROW)) { expdest = r; STADJUST(q - r + 1, expdest); } @@ -1744,6 +2202,16 @@ varunset(const char *end, const char *var, const char *umsg, int varflags) sh_error("%.*s: %s%s", end - var - 1, var, msg, tail); } +void restore_handler_expandarg(struct jmploc *savehandler, int err) +{ + handler = savehandler; + if (err) { + if (exception != EXERROR) + longjmp(handler->loc, 1); + ifsfree(); + } +} + #ifdef mkinit INCLUDE "expand.h" diff --git a/src/expand.h b/src/expand.h index c44b848..7bcff75 100644 --- a/src/expand.h +++ b/src/expand.h @@ -55,6 +55,7 @@ struct arglist { #define EXP_VARTILDE 0x4 /* expand tildes in an assignment */ #define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ +#define EXP_MBCHAR 0x20 /* mark multi-byte characters */ #define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ #define EXP_WORD 0x80 /* expand word in parameter expansion */ #define EXP_QUOTED 0x100 /* expand word in double quotes */ @@ -62,7 +63,9 @@ struct arglist { #define EXP_DISCARD 0x400 /* discard result of expansion */ +struct jmploc; union node; + void expandarg(union node *, struct arglist *, int); #define rmescapes(p) _rmescapes((p), 0) char *_rmescapes(char *, int); @@ -71,6 +74,8 @@ void recordregion(int, int, int); void removerecordregions(int); void ifsbreakup(char *, int, struct arglist *); void ifsfree(void); +void restore_handler_expandarg(struct jmploc *savehandler, int err); +void changeifs(const char *); /* From arith.y */ intmax_t arith(const char *); diff --git a/src/histedit.c b/src/histedit.c index f5c90ab..5ab718b 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -115,7 +115,7 @@ histedit(void) if (el != NULL) { if (hist) el_set(el, EL_HIST, history, hist); - el_set(el, EL_PROMPT, getprompt); + el_set(el, EL_PROMPT_ESC, getprompt, '\1'); } else { bad: out2str("sh: can't initialize editing\n"); @@ -212,15 +212,12 @@ histcmd(int argc, char **argv) if (hist == NULL) sh_error("history not active"); - if (argc == 1) - sh_error("missing history argument"); - #ifdef __GLIBC__ optind = 0; #else optreset = 1; optind = 1; /* initialize getopt */ #endif - while (not_fcnumber(argv[optind]) && + while (not_fcnumber(argv[optind ?: 1]) && (ch = getopt(argc, argv, ":e:lnrs")) != -1) switch ((char)ch) { case 'e': @@ -246,6 +243,7 @@ histcmd(int argc, char **argv) sh_error("unknown option: -%c", optopt); /* NOTREACHED */ } + optind = optind ?: 1; argc -= optind, argv += optind; /* @@ -288,14 +286,18 @@ histcmd(int argc, char **argv) } /* - * If executing, parse [old=new] now + * If -s is specified, accept [old=new] first only */ - if (lflg == 0 && argc > 0 && - ((repl = strchr(argv[0], '=')) != NULL)) { - pat = argv[0]; - *repl++ = '\0'; - argc--, argv++; + if (sflg) { + if (argc > 0 && ((repl = strchr(argv[0], '=')) != NULL)) { + pat = argv[0]; + *repl++ = '\0'; + argc--, argv++; + } + if (argc >= 2) + sh_error("too many args"); } + /* * determine [first] and [last] */ @@ -375,12 +377,10 @@ histcmd(int argc, char **argv) evalstring(s, 0); if (displayhist && hist) { - /* - * XXX what about recursive and - * relative histnums. - */ history(hist, &he, H_ENTER, s); } + + break; } else fputs(s, efp); } @@ -476,6 +476,8 @@ str_to_event(const char *str, int last) */ retval = history(hist, &he, last ? H_FIRST : H_LAST); + if (retval != -1 && last) + retval = history(hist, &he, H_NEXT); } } if (retval == -1) diff --git a/src/init.h b/src/init.h index 49791a0..e117895 100644 --- a/src/init.h +++ b/src/init.h @@ -34,6 +34,10 @@ * @(#)init.h 8.2 (Berkeley) 5/4/95 */ +union node; + void init(void); void exitreset(void); +void forkreset(union node *); +void postexitreset(void); void reset(void); diff --git a/src/input.c b/src/input.c index ae0c4c8..7856e29 100644 --- a/src/input.c +++ b/src/input.c @@ -32,54 +32,69 @@ * SUCH DAMAGE. */ -#include /* defines BUFSIZ */ #include -#include +#include +#include /* defines BUFSIZ */ #include #include +#include +#include /* * This file implements the input routines used by the parser. */ +#include "alias.h" +#include "error.h" #include "eval.h" -#include "shell.h" -#include "redir.h" -#include "syntax.h" #include "input.h" -#include "output.h" -#include "options.h" -#include "memalloc.h" -#include "error.h" -#include "alias.h" -#include "parser.h" #include "main.h" -#ifndef SMALL +#include "memalloc.h" #include "myhistedit.h" -#endif +#include "options.h" +#include "output.h" +#include "parser.h" +#include "redir.h" +#include "shell.h" +#include "syntax.h" +#include "system.h" +#include "trap.h" -#define EOF_NLEFT -99 /* value of parsenleft when EOF pushed back */ -#define IBUFSIZ (BUFSIZ + 1) +#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1) +MKINIT +struct stdin_state { + tcflag_t canon; + off_t seekable; + struct termios tios; + int pip[2]; + int pending; +}; MKINIT struct parsefile basepf; /* top level input file */ MKINIT char basebuf[IBUFSIZ]; /* buffer for top level input file */ +MKINIT struct parsefile *toppf = &basepf; +MKINIT struct stdin_state stdin_state; struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ - -#ifndef SMALL -EditLine *el; /* cookie for editline package */ -#endif +int stdin_istty = -1; STATIC void pushfile(void); +static void popstring(void); static int preadfd(void); +/* static void setinputfd(int fd, int push); +*/ // libdash static int preadbuffer(void); #ifdef mkinit INCLUDE +INCLUDE +INCLUDE +INCLUDE INCLUDE "input.h" INCLUDE "error.h" +INCLUDE "syntax.h" INIT { basepf.nextc = basepf.buf = basebuf; @@ -87,63 +102,206 @@ INIT { } RESET { + int c; + /* clear input buffer */ - basepf.lleft = basepf.nleft = 0; popallfiles(); + + c = PEOF; + if (toppf->nextc - toppf->buf > toppf->unget) + c = toppf->nextc[-toppf->unget - 1]; + while (c != '\n' && c != PEOF && !int_pending()) + c = pgetc(); +} + +FORKRESET { + popallfiles(); + if (parsefile->fd > 0) { + close(parsefile->fd); + parsefile->fd = 0; + } + if (stdin_state.pip[0]) { + close(stdin_state.pip[0]); + close(stdin_state.pip[1]); + memset(stdin_state.pip, 0, sizeof(stdin_state.pip)); + } +} + +POSTEXITRESET { + flush_input(); } #endif +void input_init(void) +{ + struct stdin_state *st = &stdin_state; + int istty; -/* - * Read a character from the script, returning PEOF on end of file. - * Nul characters in the input are silently discarded. - */ + istty = tcgetattr(0, &st->tios) + 1; + st->seekable = istty ? 0 : lseek(0, 0, SEEK_CUR) + 1; + st->canon = istty ? st->tios.c_lflag & ICANON : 0; + stdin_istty = istty; +} -int -pgetc(void) +static bool stdin_bufferable(void) +{ + struct stdin_state *st = &stdin_state; + + if (stdin_istty < 0) + input_init(); + + return st->canon || st->seekable; +} + +static void flush_tee(void *buf, int nr, int pending) +{ + while (pending > 0) { + int err; + + err = read(0, buf, nr > pending ? pending : nr); + if (err > 0) + pending -= err; + } +} + +static int stdin_tee(void *buf, int nr) +{ + int err; + + if (stdin_istty) + return 0; + + if (!stdin_state.pip[0]) { + err = pipe(stdin_state.pip); + if (err < 0) + return err; + if (stdin_state.pip[0] < 10) + stdin_state.pip[0] = savefd(stdin_state.pip[0], + stdin_state.pip[0]); + if (stdin_state.pip[1] < 10) + stdin_state.pip[1] = savefd(stdin_state.pip[1], + stdin_state.pip[1]); + } + + flush_tee(buf, nr, stdin_state.pending); + + if (USE_TEE) + err = tee(0, stdin_state.pip[1], nr, 0); + else { + errno = EINVAL; + err = -1; + } + stdin_state.pending = err; + return err; +} + +static void freestrings(struct strpush *sp) +{ + INTOFF; + do { + struct strpush *psp; + + if (sp->ap) { + sp->ap->flag &= ~ALIASINUSE; + if (sp->ap->flag & ALIASDEAD) { + unalias(sp->ap->name); + } + } + + psp = sp; + sp = sp->spfree; + + if (psp != &(parsefile->basestrpush)) + ckfree(psp); + } while (sp); + + parsefile->spfree = NULL; + INTON; +} + + +static int __pgetc(void) { int c; - if (parsefile->unget) - return parsefile->lastc[--parsefile->unget]; + if (parsefile->unget) { + long unget = -(long)(unsigned)parsefile->unget--; + + return parsefile->nextc[unget]; + } - if (--parsefile->nleft >= 0) + if (parsefile->nleft > 0) { + parsefile->nleft--; c = (signed char)*parsefile->nextc++; - else + } else c = preadbuffer(); - parsefile->lastc[1] = parsefile->lastc[0]; - parsefile->lastc[0] = c; - return c; } /* - * Same as pgetc(), but ignores PEOA. + * Read a character from the script, returning PEOF on end of file. + * Nul characters in the input are silently discarded. */ -int -pgetc2() +int __attribute__((noinline)) pgetc(void) { - int c; - do { - c = pgetc(); - } while (c == PEOA); - return c; + struct strpush *sp = parsefile->spfree; + + if (unlikely(sp)) + freestrings(sp); + + return __pgetc(); } +int pgetc_eoa(void) +{ + return parsefile->strpush && parsefile->nleft == -1 && + parsefile->strpush->ap ? PEOA : pgetc(); +} + +static int stdin_clear_nonblock(void) +{ + int flags = fcntl(0, F_GETFL, 0); + + if (flags >= 0) { + flags &=~ O_NONBLOCK; + flags = fcntl(0, F_SETFL, flags); + } + + return flags; +} static int preadfd(void) { + char *buf = parsefile->buf; + int fd = parsefile->fd; + int unget; + int pnr; int nr; - char *buf = parsefile->buf; + + nr = input_get_lleft(parsefile); + + unget = parsefile->nextc - buf; + if (unget > PUNGETC_MAX) + unget = PUNGETC_MAX; + + memmove(buf, parsefile->nextc - unget, unget + nr); + buf += unget; parsefile->nextc = buf; + buf += nr; + + nr = BUFSIZ - nr; + if (!IS_DEFINED_SMALL && !nr) + return nr; + pnr = nr; retry: + nr = pnr; #ifndef SMALL - if (parsefile->fd == 0 && el) { + if (fd == 0 && el) { static const char *rl_cp; static int el_len; @@ -156,9 +314,8 @@ preadfd(void) if (rl_cp == NULL) nr = 0; else { - nr = el_len; - if (nr > IBUFSIZ - 1) - nr = IBUFSIZ - 1; + if (nr > el_len) + nr = el_len; memcpy(buf, rl_cp, nr); if (nr != el_len) { el_len -= nr; @@ -167,23 +324,29 @@ preadfd(void) rl_cp = 0; } - } else + return nr; + } #endif - nr = read(parsefile->fd, buf, IBUFSIZ - 1); + if (!fd && !stdin_bufferable()) { + nr = stdin_tee(buf, nr); + fd = stdin_state.pip[0]; + if (nr < 0 && errno == EINVAL) { + fd = 0; + nr = 1; + } + } + + if (nr > 0) + nr = read(fd, buf, nr); if (nr < 0) { - if (errno == EINTR) + if (errno == EINTR && !(basepf.prev && pending_sig)) + goto retry; + if (fd == 0 && errno == EWOULDBLOCK && + stdin_clear_nonblock() >= 0) { + out2str("sh: turning off NDELAY mode\n"); goto retry; - if (parsefile->fd == 0 && errno == EWOULDBLOCK) { - int flags = fcntl(0, F_GETFL, 0); - if (flags >= 0 && flags & O_NONBLOCK) { - flags &=~ O_NONBLOCK; - if (fcntl(0, F_SETFL, flags) >= 0) { - out2str("sh: turning off NDELAY mode\n"); - goto retry; - } - } } } return nr; @@ -193,102 +356,105 @@ preadfd(void) * Refill the input buffer and return the next input character: * * 1) If a string was pushed back on the input, pop it; - * 2) If an EOF was pushed back (parsenleft == EOF_NLEFT) or we are reading - * from a string so we can't refill the buffer, return EOF. - * 3) If the is more stuff in this buffer, use it else call read to fill it. + * 2) If we are reading from a string we can't refill the buffer, return EOF. + * 3) If there is more stuff in this buffer, use it else call read to fill it. * 4) Process input up to the next newline, deleting nul characters. */ static int preadbuffer(void) { - char *q; - int more; -#ifndef SMALL + int first = whichprompt == 1; int something; -#endif char savec; + int more; + char *q; if (unlikely(parsefile->strpush)) { - if ( - parsefile->nleft == -1 && - parsefile->strpush->ap && - parsefile->nextc[-1] != ' ' && - parsefile->nextc[-1] != '\t' - ) { - return PEOA; - } popstring(); - return pgetc(); + return __pgetc(); } - if (unlikely(parsefile->nleft == EOF_NLEFT || - parsefile->buf == NULL)) + if (parsefile->eof & 2) { +eof: + parsefile->eof = 3; return PEOF; + } flushall(); - more = parsefile->lleft; + q = parsefile->nextc; + something = !first; + + more = input_get_lleft(parsefile); + + INTOFF; if (more <= 0) { + int nr; + again: - if ((more = preadfd()) <= 0) { - parsefile->lleft = parsefile->nleft = EOF_NLEFT; - return PEOF; + nr = q - parsefile->nextc; + input_set_lleft(parsefile, nr); + more = preadfd(); + q = parsefile->nextc + nr; + if (more <= 0) { + input_set_lleft(parsefile, parsefile->nleft = 0); + if (!IS_DEFINED_SMALL && nr > 0) + goto save; + INTON; + goto eof; } } - q = parsefile->nextc; - /* delete nul characters */ -#ifndef SMALL - something = 0; -#endif for (;;) { int c; more--; c = *q; - if (!c) + if (!c) { memmove(q, q + 1, more); - else { - q++; + goto check; + } - if (c == '\n') { - parsefile->nleft = q - parsefile->nextc - 1; - break; - } + q++; -#ifndef SMALL - switch (c) { - default: - something = 1; - /* fall through */ - case '\t': - case ' ': - break; - } -#endif + if (IS_DEFINED_SMALL) + goto check; + + switch (c) { + case '\n': + goto done; + + default: + something = 1; + /* fall through */ + + case '\t': + case ' ': + break; } +check: if (more <= 0) { - parsefile->nleft = q - parsefile->nextc - 1; - if (parsefile->nleft < 0) + if (!IS_DEFINED_SMALL) goto again; break; } } - parsefile->lleft = more; +done: + input_set_lleft(parsefile, more); - savec = *q; +save: + parsefile->nleft = q - parsefile->nextc - 1; + if (!IS_DEFINED_SMALL) + savec = *q; *q = '\0'; -#ifndef SMALL if (parsefile->fd == 0 && hist && something) { HistEvent he; - INTOFF; - history(hist, &he, whichprompt == 1? H_ENTER : H_APPEND, + history(hist, &he, first ? H_ENTER : H_APPEND, parsefile->nextc); - INTON; } -#endif + INTON; if (vflag) { out2str(parsefile->nextc); @@ -297,11 +463,17 @@ static int preadbuffer(void) #endif } - *q = savec; + if (!IS_DEFINED_SMALL) + *q = savec; return (signed char)*parsefile->nextc++; } +void pungetn(int n) +{ + parsefile->unget += n; +} + /* * Undo a call to pgetc. Only two characters may be pushed back. * PEOF may be pushed back. @@ -310,7 +482,8 @@ static int preadbuffer(void) void pungetc(void) { - parsefile->unget++; + pungetn(1 - (parsefile->eof & 1)); + parsefile->eof &= ~1; } /* @@ -326,7 +499,8 @@ pushstring(char *s, void *ap) len = strlen(s); INTOFF; /*dprintf("*** calling pushstring: %s, %d\n", s, len);*/ - if (parsefile->strpush) { + if ((unsigned long)parsefile->strpush | + (unsigned long)parsefile->spfree) { sp = ckmalloc(sizeof (struct strpush)); sp->prev = parsefile->strpush; parsefile->strpush = sp; @@ -335,45 +509,39 @@ pushstring(char *s, void *ap) sp->prevstring = parsefile->nextc; sp->prevnleft = parsefile->nleft; sp->unget = parsefile->unget; - memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc)); + sp->spfree = parsefile->spfree; sp->ap = (struct alias *)ap; if (ap) { ((struct alias *)ap)->flag |= ALIASINUSE; - sp->string = s; + sp->string = ((struct alias *)ap)->name; } parsefile->nextc = s; parsefile->nleft = len; parsefile->unget = 0; + parsefile->spfree = NULL; INTON; } -void -popstring(void) +static void popstring(void) { struct strpush *sp = parsefile->strpush; INTOFF; - if (sp->ap) { + if (sp->ap && parsefile->nextc > sp->string) { if (parsefile->nextc[-1] == ' ' || parsefile->nextc[-1] == '\t') { checkkwd |= CHKALIAS; } - if (sp->string != sp->ap->val) { + if (sp->string != sp->ap->name) { ckfree(sp->string); } - sp->ap->flag &= ~ALIASINUSE; - if (sp->ap->flag & ALIASDEAD) { - unalias(sp->ap->name); - } } parsefile->nextc = sp->prevstring; parsefile->nleft = sp->prevnleft; parsefile->unget = sp->unget; - memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc)); /*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/ parsefile->strpush = sp->prev; - if (sp != &(parsefile->basestrpush)) - ckfree(sp); + parsefile->spfree = sp; INTON; } @@ -388,12 +556,9 @@ setinputfile(const char *fname, int flags) int fd; INTOFF; - if ((fd = open(fname, O_RDONLY)) < 0) { - if (flags & INPUT_NOFILE_OK) - goto out; - exitstatus = 127; - exerror(EXERROR, "Can't open %s", fname); - } + fd = sh_open(fname, O_RDONLY, flags & INPUT_NOFILE_OK); + if (fd < 0) + goto out; if (fd < 10) fd = savefd(fd, fd); setinputfd(fd, flags & INPUT_PUSH_FILE); @@ -408,18 +573,17 @@ setinputfile(const char *fname, int flags) * interrupts off. */ +/* static void +*/ +void // libdash setinputfd(int fd, int push) { - if (push) { - pushfile(); - parsefile->buf = 0; - } + pushfile(); + if (!push) + toppf = parsefile; parsefile->fd = fd; - if (parsefile->buf == NULL) - parsefile->buf = ckmalloc(IBUFSIZ); - parsefile->lleft = parsefile->nleft = 0; - plinno = 1; + parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ); } @@ -434,8 +598,7 @@ setinputstring(char *string) pushfile(); parsefile->nextc = string; parsefile->nleft = strlen(string); - parsefile->buf = NULL; - plinno = 1; + parsefile->eof = 2; INTON; } @@ -452,14 +615,20 @@ pushfile(void) struct parsefile *pf; pf = (struct parsefile *)ckmalloc(sizeof (struct parsefile)); + memset(pf, 0, sizeof(*pf)); pf->prev = parsefile; + pf->linno = 1; pf->fd = -1; - pf->strpush = NULL; - pf->basestrpush.prev = NULL; - pf->unget = 0; parsefile = pf; } +void pushstdin(void) +{ + INTOFF; + basepf.prev = parsefile; + parsefile = &basepf; + INTON; +} void popfile(void) @@ -467,21 +636,30 @@ popfile(void) struct parsefile *pf = parsefile; INTOFF; + parsefile = pf->prev; + pf->prev = NULL; + if (pf == &basepf) + goto out; + if (pf->fd >= 0) close(pf->fd); - if (pf->buf) - ckfree(pf->buf); - while (pf->strpush) + ckfree(pf->buf); + if (parsefile->spfree) + freestrings(parsefile->spfree); + while (pf->strpush) { popstring(); - parsefile = pf->prev; + freestrings(parsefile->spfree); + } ckfree(pf); + +out: INTON; } -void unwindfiles(struct parsefile *stop) +void __attribute__((noinline)) unwindfiles(struct parsefile *stop) { - while (parsefile != stop) + while (basepf.prev || parsefile != stop) popfile(); } @@ -493,22 +671,27 @@ void unwindfiles(struct parsefile *stop) void popallfiles(void) { - unwindfiles(&basepf); + unwindfiles(toppf); } +void __attribute__((noinline)) flush_input(void) +{ + int left = basepf.nleft + input_get_lleft(&basepf); + INTOFF; + if (stdin_state.seekable && left) + lseek(0, -left, SEEK_CUR); + else if (stdin_state.pending > left) { + flush_tee(basebuf, BUFSIZ, stdin_state.pending - left); + stdin_state.pending = 0; + } + input_set_lleft(&basepf, basepf.nleft = 0); + INTON; +} -/* - * Close the file(s) that the shell is reading commands from. Called - * after a fork is done. - */ - -void -closescript(void) +void reset_input(void) { - popallfiles(); - if (parsefile->fd > 0) { - close(parsefile->fd); - parsefile->fd = 0; - } + stdin_istty = -1; + basepf.eof = 0; + flush_input(); } diff --git a/src/input.h b/src/input.h index a9c0517..18fe2b4 100644 --- a/src/input.h +++ b/src/input.h @@ -34,7 +34,19 @@ * @(#)input.h 8.2 (Berkeley) 5/4/95 */ +#include +#include + +#ifdef SMALL +#define IS_DEFINED_SMALL 1 +#else +#define IS_DEFINED_SMALL 0 +#endif + +#define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + /* PEOF (the end of file marker) is defined in syntax.h */ +#define PEOA ((PEOF) - 1) enum { INPUT_PUSH_FILE = 1, @@ -50,8 +62,8 @@ struct strpush { struct alias *ap; /* if push was associated with an alias */ char *string; /* remember the string since it may change */ - /* Remember last two characters for pungetc. */ - int lastc[2]; + /* Delay freeing so we can stop nested aliases. */ + struct strpush *spfree; /* Number of outstanding calls to pungetc. */ int unget; @@ -67,20 +79,25 @@ struct parsefile { int linno; /* current line */ int fd; /* file descriptor (or -1 if string) */ int nleft; /* number of chars left in this line */ - int lleft; /* number of chars left in this buffer */ + int eof; /* do not read again once we hit EOF */ char *nextc; /* next char in buffer */ char *buf; /* input buffer */ struct strpush *strpush; /* for pushing strings at this level */ struct strpush basestrpush; /* so pushing one is fast */ - /* Remember last two characters for pungetc. */ - int lastc[2]; + /* Delay freeing so we can stop nested aliases. */ + struct strpush *spfree; + +#ifndef SMALL + int lleft; /* number of chars left in this buffer */ +#endif /* Number of outstanding calls to pungetc. */ int unget; }; extern struct parsefile *parsefile; +extern int stdin_istty; /* * The input line number. Input.c just defines this variable, and saves @@ -90,13 +107,33 @@ extern struct parsefile *parsefile; #define plinno (parsefile->linno) int pgetc(void); -int pgetc2(void); +int pgetc_eoa(void); void pungetc(void); +void pungetn(int); void pushstring(char *, void *); -void popstring(void); int setinputfile(const char *, int); +void setinputfd(int fd, int push); // libdash void setinputstring(char *); +void pushstdin(void); void popfile(void); void unwindfiles(struct parsefile *); void popallfiles(void); -void closescript(void); +void flush_input(void); +void reset_input(void); +void input_init(void); + +static inline int input_get_lleft(struct parsefile *pf) +{ +#ifdef SMALL + return 0; +#else + return pf->lleft; +#endif +} + +static inline void input_set_lleft(struct parsefile *pf, int len) +{ +#ifndef SMALL + pf->lleft = len; +#endif +} diff --git a/src/jobs.c b/src/jobs.c index 26a6248..4aa65b6 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -53,8 +53,10 @@ #include #undef CEOF /* syntax.h redefines this */ #endif +#include "builtins.h" #include "exec.h" #include "eval.h" +#include "init.h" #include "redir.h" #include "show.h" #include "main.h" @@ -77,9 +79,10 @@ #define CUR_STOPPED 0 /* mode flags for dowait */ -#define DOWAIT_NORMAL 0 +#define DOWAIT_NONBLOCK 0 #define DOWAIT_BLOCK 1 #define DOWAIT_WAITCMD 2 +#define DOWAIT_WAITCMD_ALL 4 /* array of jobs */ static struct job *jobtab; @@ -185,17 +188,27 @@ set_curjob(struct job *jp, unsigned mode) int jobctl; +static void xxtcsetpgrp(pid_t pgrp) +{ + int fd = ttyfd; + + if (fd < 0) + return; + + xtcsetpgrp(fd, pgrp); +} + void setjobctl(int on) { + int pgrp = -1; int fd; - int pgrp; if (on == jobctl || rootshell == 0) return; if (on) { int ofd; - ofd = fd = open(_PATH_TTY, O_RDWR); + ofd = fd = sh_open(_PATH_TTY, O_RDWR, 1); if (fd < 0) { fd += 3; while (!isatty(fd)) @@ -205,46 +218,50 @@ setjobctl(int on) fd = savefd(fd, ofd); do { /* while we are in the background */ if ((pgrp = tcgetpgrp(fd)) < 0) { +close: + close(fd); + fd = -1; out: + if (!iflag) + break; sh_warnx("can't access tty; job control turned off"); mflag = on = 0; - goto close; + return; } if (pgrp == getpgrp()) break; + if (!iflag) + goto close; killpg(0, SIGTTIN); } while (1); initialpgrp = pgrp; - - setsignal(SIGTSTP); - setsignal(SIGTTOU); - setsignal(SIGTTIN); pgrp = rootpid; - setpgid(0, pgrp); - xtcsetpgrp(fd, pgrp); } else { /* turning job control off */ fd = ttyfd; pgrp = initialpgrp; - xtcsetpgrp(fd, pgrp); + } + + setsignal(SIGTSTP); + setsignal(SIGTTOU); + setsignal(SIGTTIN); + if (fd >= 0) { setpgid(0, pgrp); - setsignal(SIGTSTP); - setsignal(SIGTTOU); - setsignal(SIGTTIN); -close: - close(fd); - fd = -1; + xtcsetpgrp(fd, pgrp); + + if (!on) { + close(fd); + fd = -1; + } } + ttyfd = fd; jobctl = on; } #endif -int -killcmd(argc, argv) - int argc; - char **argv; +int killcmd(int argc, char **argv) { extern char *signal_names[]; int signo = -1; @@ -328,7 +345,7 @@ killcmd(argc, argv) pid = **argv == '-' ? -number(*argv + 1) : number(*argv); if (kill(pid, signo) != 0) { - sh_warnx("%s\n", strerror(errno)); + sh_warnx(snlfmt, strerror(errno)); i = 1; } } while (*++argv); @@ -392,7 +409,7 @@ restartjob(struct job *jp, int mode) jp->state = JOBRUNNING; pgid = jp->ps->pid; if (mode == FORK_FG) - xtcsetpgrp(ttyfd, pgid); + xxtcsetpgrp(pgid); killpg(pgid, SIGCONT); ps = jp->ps; i = jp->nprocs; @@ -556,8 +573,8 @@ showjobs(struct output *out, int mode) TRACE(("showjobs(%x) called\n", mode)); - /* If not even one one job changed, there is nothing to do */ - dowait(DOWAIT_NORMAL, NULL); + /* If not even one job changed, there is nothing to do */ + dowait(DOWAIT_NONBLOCK, NULL); for (jp = curjob; jp; jp = jp->prev_job) { if (!(mode & SHOW_CHANGED) || jp->changed) @@ -614,7 +631,7 @@ waitcmd(int argc, char **argv) jp->waited = 1; jp = jp->prev_job; } - if (!dowait(DOWAIT_WAITCMD, 0)) + if (!dowait(DOWAIT_WAITCMD_ALL, 0)) goto sigout; } } @@ -746,11 +763,11 @@ getjob(const char *name, int getctl) * Called with interrupts off. */ -struct job * -makejob(union node *node, int nprocs) +struct job *makejob(int nprocs) { - int i; + struct procstat *ps; struct job *jp; + int i; for (i = njobs, jp = jobtab ; ; jp++) { if (--i < 0) { @@ -767,6 +784,9 @@ makejob(union node *node, int nprocs) break; } memset(jp, 0, sizeof(*jp)); + ps = &jp->ps0; + if (nprocs > 1) + ps = ckmalloc(nprocs * sizeof(*ps)); #if JOBS if (jobctl) jp->jobctl = 1; @@ -774,11 +794,8 @@ makejob(union node *node, int nprocs) jp->prev_job = curjob; curjob = jp; jp->used = 1; - jp->ps = &jp->ps0; - if (nprocs > 1) { - jp->ps = ckmalloc(nprocs * sizeof (struct procstat)); - } - TRACE(("makejob(0x%lx, %d) returns %%%d\n", (long)node, nprocs, + jp->ps = ps; + TRACE(("makejob(%d) returns %%%d\n", nprocs, jobno(jp))); return jp; } @@ -855,10 +872,10 @@ static void forkchild(struct job *jp, union node *n, int mode) lvforked = vforked; if (!lvforked) { + mypid = 0; shlvl++; - closescript(); - clear_traps(); + forkreset(mode == FORK_NOJOB ? n : NULL); #if JOBS /* do job control only in root shell */ @@ -871,13 +888,13 @@ static void forkchild(struct job *jp, union node *n, int mode) pid_t pgrp; if (jp->nprocs == 0) - pgrp = getpid(); + mypid = pgrp = getpid(); else pgrp = jp->ps[0].pid; /* This can fail because we are doing it in the parent also */ (void)setpgid(0, pgrp); if (mode == FORK_FG) - xtcsetpgrp(ttyfd, pgrp); + xxtcsetpgrp(pgrp); setsignal(SIGTSTP); setsignal(SIGTTOU); } else @@ -887,21 +904,29 @@ static void forkchild(struct job *jp, union node *n, int mode) ignoresig(SIGQUIT); if (jp->nprocs == 0) { close(0); - if (open(_PATH_DEVNULL, O_RDONLY) != 0) - sh_error("Can't open %s", _PATH_DEVNULL); + sh_open(_PATH_DEVNULL, O_RDONLY, 0); + /* Should call reset_input here, but it's harmless + * for now. + */ } } if (!oldlvl && iflag) { - if (mode != FORK_BG) { - setsignal(SIGINT); - setsignal(SIGQUIT); - } + setsignal(SIGINT); + setsignal(SIGQUIT); setsignal(SIGTERM); } if (lvforked) return; + if (!jp) + return; + + freejob(jp); + + if (issimplecmd(n, JOBSCMD->name)) + return; + for (jp = curjob; jp; jp = jp->prev_job) freejob(jp); } @@ -951,6 +976,9 @@ forkshell(struct job *jp, union node *n, int mode) int pid; TRACE(("forkshell(%%%d, %p, %d) called\n", jobno(jp), n, mode)); + + flush_input(); + pid = fork(); if (pid == 0) forkchild(jp, n, mode); @@ -965,22 +993,21 @@ struct job *vforkexec(union node *n, char **argv, const char *path, int idx) struct job *jp; int pid; - jp = makejob(n, 1); + jp = makejob(1); - sigblockall(NULL); - vforked++; + if (!mypid) + mypid = getpid(); + vforked = mypid; pid = vfork(); if (!pid) { forkchild(jp, n, FORK_FG); - sigclearmask(); shellexec(argv, path, idx); /* NOTREACHED */ } vforked = 0; - sigclearmask(); forkparent(jp, n, FORK_FG, pid); return jp; @@ -1013,14 +1040,14 @@ waitforjob(struct job *jp) int st; TRACE(("waitforjob(%%%d) called\n", jp ? jobno(jp) : 0)); - dowait(jp ? DOWAIT_BLOCK : DOWAIT_NORMAL, jp); + dowait(jp ? DOWAIT_BLOCK : DOWAIT_NONBLOCK, jp); if (!jp) return exitstatus; st = getstatus(jp); #if JOBS if (jp->jobctl) { - xtcsetpgrp(ttyfd, rootpid); + xxtcsetpgrp(rootpid); /* * This is truly gross. * If we're doing job control, then we did a TIOCSPGRP which @@ -1123,15 +1150,28 @@ static int waitone(int block, struct job *job) static int dowait(int block, struct job *jp) { - int pid = block == DOWAIT_NORMAL ? gotsigchld : 1; + int gotchld = *(volatile int *)&gotsigchld; + int rpid; + int pid; - while (jp ? jp->state == JOBRUNNING : pid > 0) { - if (!jp) - gotsigchld = 0; + if (jp && jp->state != JOBRUNNING) + block = DOWAIT_NONBLOCK; + + if (block == DOWAIT_NONBLOCK && !gotchld) + return 1; + + rpid = 1; + + do { pid = waitone(block, jp); - } + rpid &= !!pid; - return pid; + block &= ~DOWAIT_WAITCMD_ALL; + if (!pid || (jp && jp->state != JOBRUNNING)) + block = DOWAIT_NONBLOCK; + } while (pid >= 0); + + return rpid; } /* @@ -1163,7 +1203,15 @@ waitproc(int block, int *status) #endif do { - err = wait3(status, flags, NULL); + gotsigchld = 0; + do +#ifdef HAVE_WAIT3 + err = wait3(status, flags, NULL); +#else + err = waitpid((pid_t)-1, status, flags, NULL); +#endif + while (err < 0 && errno == EINTR); + if (err || (err = -!block)) break; @@ -1173,8 +1221,6 @@ waitproc(int block, int *status) sigsuspend(&oldmask); sigclearmask(); - - err = 0; } while (gotsigchld); return err; @@ -1191,6 +1237,8 @@ stoppedjobs(void) int retval; retval = 0; + if (!JOBS) + goto out; if (job_warning) goto out; jp = curjob; @@ -1216,13 +1264,19 @@ commandtext(union node *n) { char *name; - STARTSTACKSTR(cmdnextc); - cmdtxt(n); + STARTSTACKSTR(name); + commandtextcont(n, name); name = stackblock(); TRACE(("commandtext: name %p, end %p\n", name, cmdnextc)); return savestr(name); } +char *commandtextcont(union node *n, char *next) +{ + cmdnextc = next; + cmdtxt(n); + return cmdnextc; +} STATIC void cmdtxt(union node *n) @@ -1497,7 +1551,13 @@ showpipe(struct job *jp, struct output *out) STATIC void xtcsetpgrp(int fd, pid_t pgrp) { - if (tcsetpgrp(fd, pgrp)) + int err; + + sigblockall(NULL); + err = tcsetpgrp(fd, pgrp); + sigclearmask(); + + if (err) sh_error("Cannot set tty process group (%s)", strerror(errno)); } #endif @@ -1507,8 +1567,15 @@ STATIC int getstatus(struct job *job) { int status; int retval; + struct procstat *ps; + + ps = job->ps + job->nprocs - 1; + status = ps->status; + if (pipefail) { + while (status == 0 && --ps >= job->ps) + status = ps->status; + } - status = job->ps[job->nprocs - 1].status; retval = WEXITSTATUS(status); if (!WIFEXITED(status)) { #if JOBS diff --git a/src/jobs.h b/src/jobs.h index 6ac6c56..a58d2a2 100644 --- a/src/jobs.h +++ b/src/jobs.h @@ -102,11 +102,12 @@ int jobscmd(int, char **); struct output; void showjobs(struct output *, int); int waitcmd(int, char **); -struct job *makejob(union node *, int); +struct job *makejob(int); int forkshell(struct job *, union node *, int); struct job *vforkexec(union node *n, char **argv, const char *path, int idx); int waitforjob(struct job *); int stoppedjobs(void); +char *commandtextcont(union node *n, char *next); #if ! JOBS #define setjobctl(on) ((void)(on)) /* do nothing */ diff --git a/src/mail.c b/src/mail.c index 8eacb2d..49cd5fa 100644 --- a/src/mail.c +++ b/src/mail.c @@ -80,7 +80,7 @@ chkmail(void) int len; len = padvance_magic(&mpath, nullstr, 2); - if (!len) + if (len < 0) break; p = stackblock(); if (*p == '\0') diff --git a/src/main.c b/src/main.c index 6b3a090..7eb8b42 100644 --- a/src/main.c +++ b/src/main.c @@ -32,6 +32,7 @@ * SUCH DAMAGE. */ +#include #include #include #include @@ -63,6 +64,7 @@ #define PROFILE 0 int rootpid; +int mypid; int shlvl; #ifdef __GLIBC__ int *dash_errno; @@ -71,11 +73,24 @@ int *dash_errno; short profile_buf[16384]; extern int etext(); #endif +MKINIT struct jmploc main_handler; STATIC void read_profile(const char *); STATIC char *find_dot_file(char *); static int cmdloop(int); + +//libdash +void +initialize_dash_errno() +{ +#ifdef __GLIBC__ + dash_errno = __errno_location(); +#endif +} + +#ifdef MAIN // libdash int main(int, char **); +#endif //MAIN // libdash /* * Main routine. We initialize things, parse the arguments, execute @@ -85,12 +100,12 @@ int main(int, char **); * is used to figure out how far we had gotten. */ +#ifdef MAIN //libdash int main(int argc, char **argv) { char *shinit; volatile int state; - struct jmploc jmploc; struct stackmark smark; int login; @@ -101,8 +116,11 @@ main(int argc, char **argv) #if PROFILE monitor(4, etext, profile_buf, sizeof profile_buf, 50); #endif + + setlocale(LC_ALL, ""); + state = 0; - if (unlikely(setjmp(jmploc.loc))) { + if (unlikely(setjmp(main_handler.loc))) { int e; int s; @@ -112,7 +130,7 @@ main(int argc, char **argv) s = state; if (e == EXEND || e == EXEXIT || s == 0 || iflag == 0 || shlvl) - exitshell(); + goto exit; reset(); @@ -137,15 +155,15 @@ main(int argc, char **argv) else goto state4; } - handler = &jmploc; + handler = &main_handler; #ifdef DEBUG opentrace(); trputs("Shell args: "); trargs(argv); #endif - rootpid = getpid(); + mypid = rootpid = getpid(); init(); setstackmark(&smark); - login = procargs(argc, argv); + login = procargs(argv); if (login) { state = 1; read_profile("/etc/profile"); @@ -175,6 +193,7 @@ main(int argc, char **argv) state4: /* XXX ??? - why isn't this before the "if" statement */ cmdloop(1); } +exit: #if PROFILE monitor(0); #endif @@ -188,6 +207,7 @@ main(int argc, char **argv) /* NOTREACHED */ } +#endif // MAIN // libdash /* * Read and execute commands. "Top" is nonzero for the top level command @@ -233,7 +253,7 @@ cmdloop(int top) out2str("\nUse \"exit\" to leave shell.\n"); } numeof++; - } else if (nflag == 0) { + } else { int i; job_warning = (job_warning == 2) ? 1 : 0; @@ -298,7 +318,7 @@ find_dot_file(char *basename) { char *fullname; const char *path = pathval(); - struct stat statb; + struct stat64 statb; int len; /* don't try this for absolute or relative paths */ @@ -308,7 +328,7 @@ find_dot_file(char *basename) while ((len = padvance(&path, basename)) >= 0) { fullname = stackblock(); if ((!pathopt || *pathopt == 'f') && - !stat(fullname, &statb) && S_ISREG(statb.st_mode)) { + !stat64(fullname, &statb) && S_ISREG(statb.st_mode)) { /* This will be freed by the caller. */ return stalloc(len); } @@ -353,3 +373,11 @@ exitcmd(int argc, char **argv) exraise(EXEXIT); /* NOTREACHED */ } + +#ifdef mkinit +INCLUDE "error.h" + +FORKRESET { + handler = &main_handler; +} +#endif diff --git a/src/main.h b/src/main.h index 19e4983..c88ab53 100644 --- a/src/main.h +++ b/src/main.h @@ -38,6 +38,8 @@ /* pid of main shell */ extern int rootpid; +/* pid of current shell */ +extern int mypid; /* shell level: 0 for the main shell, 1 for its children, and so on */ extern int shlvl; #define rootshell (!shlvl) @@ -48,6 +50,7 @@ extern int *dash_errno; #undef errno #define errno (*dash_errno) #endif +void initialize_dash_errno(); // libdash void readcmdfile(char *); int dotcmd(int, char **); diff --git a/src/memalloc.c b/src/memalloc.c index 60637da..3275e51 100644 --- a/src/memalloc.c +++ b/src/memalloc.c @@ -43,19 +43,28 @@ #include "mystring.h" #include "system.h" +static __attribute__((__always_inline__)) inline void outofspace(void) +{ + sh_error("Out of space"); +} + +static void *checknull(void *p) +{ + if (!p) + outofspace(); + return p; +} + /* * Like malloc, but returns an error when out of space. */ -pointer -ckmalloc(size_t nbytes) +__attribute__((__noinline__)) void *ckmalloc(size_t nbytes) { - pointer p; + void *p; p = malloc(nbytes); - if (p == NULL) - sh_error("Out of space"); - return p; + return checknull(p); } @@ -63,13 +72,10 @@ ckmalloc(size_t nbytes) * Same for realloc. */ -pointer -ckrealloc(pointer p, size_t nbytes) +__attribute__((__noinline__)) void *ckrealloc(void *p, size_t nbytes) { p = realloc(p, nbytes); - if (p == NULL) - sh_error("Out of space"); - return p; + return checknull(p); } @@ -80,10 +86,7 @@ ckrealloc(pointer p, size_t nbytes) char * savestr(const char *s) { - char *p = strdup(s); - if (!p) - sh_error("Out of space"); - return p; + return checknull(strdup(s)); } @@ -110,8 +113,7 @@ char *stacknxt = stackbase.space; size_t stacknleft = MINSIZE; char *sstrend = stackbase.space + MINSIZE; -pointer -stalloc(size_t nbytes) +void *stalloc(size_t nbytes) { char *p; size_t aligned; @@ -127,7 +129,7 @@ stalloc(size_t nbytes) blocksize = MINSIZE; len = sizeof(struct stack_block) - MINSIZE + blocksize; if (len < blocksize) - sh_error("Out of space"); + outofspace(); INTOFF; sp = ckmalloc(len); sp->prev = stackp; @@ -144,8 +146,7 @@ stalloc(size_t nbytes) } -void -stunalloc(pointer p) +void stunalloc(void *p) { #ifdef DEBUG if (!p || (stacknxt < (char *)p) || ((char *)p < stackp->space)) { @@ -159,7 +160,8 @@ stunalloc(pointer p) -void pushstackmark(struct stackmark *mark, size_t len) +__attribute__((__noinline__)) void pushstackmark(struct stackmark *mark, + size_t len) { mark->stackp = stackp; mark->stacknxt = stacknxt; @@ -201,13 +203,14 @@ popstackmark(struct stackmark *mark) * part of the block that has been used. */ -static void growstackblock(size_t min) +static char *growstackblock(size_t min) { size_t newlen; + char *p; newlen = stacknleft * 2; if (newlen < stacknleft) - sh_error("Out of space"); + outofspace(); min = SHELL_ALIGN(min | 128); if (newlen < min) newlen += min; @@ -221,22 +224,25 @@ static void growstackblock(size_t min) sp = stackp; prevstackp = sp->prev; grosslen = newlen + sizeof(struct stack_block) - MINSIZE; - sp = ckrealloc((pointer)sp, grosslen); + sp = ckrealloc(sp, grosslen); sp->prev = prevstackp; stackp = sp; - stacknxt = sp->space; + p = stacknxt = sp->space; stacknleft = newlen; sstrend = sp->space + newlen; INTON; } else { char *oldspace = stacknxt; int oldlen = stacknleft; - char *p = stalloc(newlen); + + p = stalloc(newlen); /* free the space we just allocated */ stacknxt = memcpy(p, oldspace, oldlen); stacknleft += newlen; } + + return p; } /* @@ -262,14 +268,13 @@ growstackstr(void) { size_t len = stackblocksize(); - growstackblock(0); - return stackblock() + len; + return growstackblock(0) + len; } -char *growstackto(size_t len) +__attribute__((__noinline__)) char *growstackto(size_t len) { if (stackblocksize() < len) - growstackblock(len); + return growstackblock(len); return stackblock(); } @@ -277,16 +282,14 @@ char *growstackto(size_t len) * Called from CHECKSTRSPACE. */ -char * -makestrspace(size_t newlen, char *p) +__attribute__((__noinline__)) char *makestrspace(size_t newlen, char *p) { size_t len = p - stacknxt; return growstackto(len + newlen) + len; } -char * -stnputs(const char *s, size_t n, char *p) +__attribute__((__noinline__)) char *stnputs(const char *s, size_t n, char *p) { p = makestrspace(n, p); p = mempcpy(p, s, n); diff --git a/src/memalloc.h b/src/memalloc.h index b9c63da..1895c1e 100644 --- a/src/memalloc.h +++ b/src/memalloc.h @@ -35,6 +35,7 @@ */ #include +#include struct stackmark { struct stack_block *stackp; @@ -47,11 +48,11 @@ extern char *stacknxt; extern size_t stacknleft; extern char *sstrend; -pointer ckmalloc(size_t); -pointer ckrealloc(pointer, size_t); +void *ckmalloc(size_t); +void *ckrealloc(void *, size_t); char *savestr(const char *); -pointer stalloc(size_t); -void stunalloc(pointer); +void *stalloc(size_t); +void stunalloc(void *); void pushstackmark(struct stackmark *mark, size_t len); void setstackmark(struct stackmark *); void popstackmark(struct stackmark *); @@ -80,11 +81,11 @@ static inline char *_STPUTC(int c, char *p) { #define STPUTC(c, p) ((p) = _STPUTC((c), (p))) #define CHECKSTRSPACE(n, p) \ ({ \ - char *q = (p); \ - size_t l = (n); \ - size_t m = sstrend - q; \ - if (l > m) \ - (p) = makestrspace(l, q); \ + char *_q = (p); \ + size_t _l = (n); \ + size_t _m = sstrend - _q; \ + if (_l > _m) \ + (p) = makestrspace(_l, _q); \ 0; \ }) #define USTPUTC(c, p) (*p++ = (c)) @@ -97,4 +98,4 @@ static inline char *_STPUTC(int c, char *p) { #define ungrabstackstr(s, p) stunalloc((s)) #define stackstrend() ((void *)sstrend) -#define ckfree(p) free((pointer)(p)) +#define ckfree(p) free((void *)(p)) diff --git a/src/miscbltin.c b/src/miscbltin.c index 5ccbbcb..e66666c 100644 --- a/src/miscbltin.c +++ b/src/miscbltin.c @@ -36,28 +36,31 @@ * Miscelaneous builtins. */ +#include +#include +#include +#include #include /* quad_t */ #include /* BSD4_4 */ #include #include #include #include -#include -#include -#include -#include "shell.h" -#include "options.h" -#include "var.h" -#include "output.h" -#include "memalloc.h" #include "error.h" +#include "expand.h" +#include "input.h" +#include "memalloc.h" #include "miscbltin.h" #include "mystring.h" #include "main.h" -#include "expand.h" +#include "options.h" +#include "output.h" #include "parser.h" +#include "shell.h" +#include "syntax.h" #include "trap.h" +#include "var.h" #undef rflag @@ -115,14 +118,13 @@ readcmd_handle_line(char *s, int ac, char **ap) int readcmd(int argc, char **argv) { - char **ap; - char c; - int rflag; char *prompt; - char *p; int startloc; int newloc; int status; + char **ap; + int rflag; + char *p; int i; rflag = 0; @@ -145,25 +147,30 @@ readcmd(int argc, char **argv) status = 0; STARTSTACKSTR(p); + pushstdin(); + goto start; for (;;) { - switch (read(0, &c, 1)) { - case 1: - break; - default: - if (errno == EINTR && !pending_sig) - continue; - /* fall through */ - case 0: + unsigned ml; + int c; + + CHECKSTRSPACE((MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + 4, p); + c = pgetc(); + if (c == PEOF) { status = 1; - goto out; + break; } if (c == '\0') continue; + ml = getmbc(c, p, 0); + if (ml) { + p += ml; + goto record; + } if (newloc >= startloc) { if (c == '\n') - goto resetbs; + goto record; goto put; } if (!rflag && c == '\\') { @@ -173,20 +180,19 @@ readcmd(int argc, char **argv) if (c == '\n') break; put: - CHECKSTRSPACE(2, p); if (strchr(qchars, c)) USTPUTC(CTLESC, p); USTPUTC(c, p); +record: if (newloc >= startloc) { -resetbs: recordregion(startloc, newloc, 0); start: startloc = p - (char *)stackblock(); newloc = startloc - 1; } } -out: + popfile(); recordregion(startloc, p - (char *)stackblock(), 0); STACKSTRNUL(p); readcmd_handle_line(p + 1, argc - (ap - argv), ap); @@ -237,7 +243,7 @@ umaskcmd(int argc, char **argv) *ap++ = ','; } ap[-1] = '\0'; - out1fmt("%s\n", buf); + out1fmt(snlfmt, buf); } else { out1fmt("%.4o\n", mask); } @@ -268,7 +274,10 @@ umaskcmd(int argc, char **argv) } if (!positions) positions = 0111; /* default is a */ - if (!strchr("=+-", op = *ap)) + op = *ap; + if (!op) + goto error; + if (!strchr("=+-", op)) break; ap++; new_val = 0; @@ -308,6 +317,7 @@ umaskcmd(int argc, char **argv) break; } if (*ap) { +error: sh_error("Illegal mode: %s", *argptr); return 1; } @@ -440,6 +450,9 @@ ulimitcmd(int argc, char **argv) #endif #ifdef RLIMIT_LOCKS "w" +#endif +#ifdef RLIMIT_RTPRIO + "r" #endif )) != '\0') switch (optc) { diff --git a/src/mkinit.c b/src/mkinit.c index 5bca9ee..2514ebf 100644 --- a/src/mkinit.c +++ b/src/mkinit.c @@ -91,6 +91,7 @@ struct event { char *name; /* name of event (e.g. INIT) */ char *routine; /* name of routine called on event */ char *comment; /* comment describing routine */ + char *args; /* arguments to routine */ struct text code; /* code for handling event */ }; @@ -113,6 +114,16 @@ char exitreset[] = "\ * but prior to exitshell. \n\ */\n"; +char forkreset[] = "\ +/*\n\ + * This routine is called when we enter a subshell.\n\ + */\n"; + +char postexitreset[] = "\ +/*\n\ + * This routine is called in exitshell.\n\ + */\n"; + char reset[] = "\ /*\n\ * This routine is called when an error or an interrupt occurs in an\n\ @@ -123,6 +134,8 @@ char reset[] = "\ struct event event[] = { {"INIT", "init", init}, {"EXITRESET", "exitreset", exitreset}, + {"FORKRESET", "forkreset", forkreset, "union node *n"}, + {"POSTEXITRESET", "postexitreset", postexitreset}, {"RESET", "reset", reset}, {NULL, NULL} }; @@ -382,7 +395,7 @@ output(void) for (ep = event ; ep->name ; ep++) { fputs("\n\n\n", fp); fputs(ep->comment, fp); - fprintf(fp, "\nvoid\n%s() {\n", ep->routine); + fprintf(fp, "\nvoid\n%s(%s) {\n", ep->routine, ep->args ?: ""); writetext(&ep->code, fp); fprintf(fp, "}\n"); } diff --git a/src/mksignames.c b/src/mksignames.c index a832eab..192728b 100644 --- a/src/mksignames.c +++ b/src/mksignames.c @@ -360,9 +360,7 @@ initialize_signames () } } -void -write_signames (stream) - FILE *stream; +void write_signames(FILE *stream) { register int i; diff --git a/src/mksyntax.c b/src/mksyntax.c index a23c18c..4d7280b 100644 --- a/src/mksyntax.c +++ b/src/mksyntax.c @@ -64,7 +64,6 @@ struct synclass synclass[] = { { "CEOF", "end of file" }, { "CCTL", "like CWORD, except it must be escaped" }, { "CSPCL", "these terminate a word" }, - { "CIGN", "character should be ignored" }, { NULL, NULL } }; @@ -145,9 +144,8 @@ main(int argc, char **argv) fprintf(hfile, "/* %s */\n", is_entry[i].comment); } putc('\n', hfile); - fprintf(hfile, "#define SYNBASE %d\n", 130); - fprintf(hfile, "#define PEOF %d\n\n", -130); - fprintf(hfile, "#define PEOA %d\n\n", -129); + fprintf(hfile, "#define SYNBASE %d\n", 129); + fprintf(hfile, "#define PEOF %d\n\n", -129); putc('\n', hfile); fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile); fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile); @@ -170,7 +168,6 @@ main(int argc, char **argv) add("$", "CVAR"); add("}", "CENDVAR"); add("<>();&| \t", "CSPCL"); - syntax[1] = "CSPCL"; print("basesyntax"); init(); fputs("\n/* syntax table used when in double quotes */\n", cfile); @@ -181,14 +178,14 @@ main(int argc, char **argv) add("$", "CVAR"); add("}", "CENDVAR"); /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */ - add("!*?[=~:/-]", "CCTL"); + add("^!*?[=~:/-]", "CCTL"); print("dqsyntax"); init(); fputs("\n/* syntax table used when in single quotes */\n", cfile); add("\n", "CNL"); add("'", "CENDQUOTE"); /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */ - add("!*?[=~:/-]\\", "CCTL"); + add("^!*?[=~:/-]\\", "CCTL"); print("sqsyntax"); init(); fputs("\n/* syntax table used when in arithmetic */\n", cfile); @@ -223,7 +220,7 @@ filltable(char *dftval) { int i; - for (i = 0 ; i < 258; i++) + for (i = 0 ; i < 257; i++) syntax[i] = dftval; } @@ -239,9 +236,8 @@ init(void) filltable("CWORD"); syntax[0] = "CEOF"; - syntax[1] = "CIGN"; for (ctl = CTL_FIRST; ctl <= CTL_LAST; ctl++ ) - syntax[130 + ctl] = "CCTL"; + syntax[129 + ctl] = "CCTL"; } @@ -253,7 +249,7 @@ static void add(char *p, char *type) { while (*p) - syntax[(signed char)*p++ + 130] = type; + syntax[(signed char)*p++ + 129] = type; } @@ -271,7 +267,7 @@ print(char *name) fprintf(hfile, "extern const char %s[];\n", name); fprintf(cfile, "const char %s[] = {\n", name); col = 0; - for (i = 0 ; i < 258; i++) { + for (i = 0 ; i < 257; i++) { if (i == 0) { fputs(" ", cfile); } else if ((i & 03) == 0) { diff --git a/src/mktokens b/src/mktokens index cd52241..dcef676 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,8 +37,11 @@ # token marks the end of a list. The third column is the name to print in # error messages. -cat > /tmp/ka$$ <<\! +: "${TMPDIR:=/tmp}" + +cat > "${TMPDIR}"/ka$$ <<\! TEOF 1 end of file +TBLANK 0 blank TNL 0 newline TSEMI 0 ";" TBACKGND 0 "&" @@ -68,28 +71,28 @@ TWHILE 0 "while" TBEGIN 0 "{" TEND 1 "}" ! -nl=`wc -l /tmp/ka$$` +nl=`wc -l "${TMPDIR}"/ka$$` exec > token.h -awk '{print "#define " $1 " " NR-1}' /tmp/ka$$ +awk '{print "#define " $1 " " NR-1}' "${TMPDIR}"/ka$$ exec > token_vars.h echo ' /* Array indicating which tokens mark the end of a list */ static const char tokendlist[] = {' -awk '{print "\t" $2 ","}' /tmp/ka$$ +awk '{print "\t" $2 ","}' "${TMPDIR}"/ka$$ echo '}; static const char *const tokname[] = {' sed -e 's/"/\\"/g' \ -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/ "\1",/' \ - /tmp/ka$$ + "${TMPDIR}"/ka$$ echo '}; ' -sed 's/"//g' /tmp/ka$$ | awk ' +sed 's/"//g' "${TMPDIR}"/ka$$ | awk ' /TNOT/{print "#define KWDOFFSET " NR-1; print ""; print "static const char *const parsekwd[] = {"} /TNOT/,/neverfound/{if (last) print " \"" last "\","; last = $3} END{print " \"" last "\"\n};"}' -rm /tmp/ka$$ +rm "${TMPDIR}"/ka$$ diff --git a/src/myhistedit.h b/src/myhistedit.h index 22e5c43..1736f62 100644 --- a/src/myhistedit.h +++ b/src/myhistedit.h @@ -31,9 +31,27 @@ * @(#)myhistedit.h 8.2 (Berkeley) 5/4/95 */ +#ifdef SMALL +typedef void History; +typedef void EditLine; +typedef int HistEvent; + +enum { + H_APPEND, + H_ENTER, +}; + +#define hist NULL + +static inline void history(History *h, HistEvent *he, int action, char *p) +{ +} +#else #include extern History *hist; +#endif + extern EditLine *el; extern int displayhist; diff --git a/src/mystring.c b/src/mystring.c index de624b8..97e240c 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -60,11 +60,15 @@ char nullstr[1]; /* zero length string */ const char spcstr[] = " "; const char snlfmt[] = "%s\n"; -const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL, '@', '=', +const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL | VSBIT, '@', '=', CTLQUOTEMARK, '\0' }; -const char qchars[] = { CTLESC, CTLQUOTEMARK, 0 }; +const char cqchars[] = { + '\\', + CTLESC, CTLMBCHAR, CTLQUOTEMARK, 0 +}; const char illnum[] = "Illegal number: %s"; const char homestr[] = "HOME"; +const char dotdir[] = "."; /* * equal - #defined in mystring.h @@ -125,9 +129,6 @@ intmax_t atomax(const char *s, int base) errno = 0; r = strtoimax(s, &p, base); - if (errno == ERANGE) - badnum(s); - /* * Disallow completely blank strings in non-arithmetic (base != 0) * contexts. diff --git a/src/mystring.h b/src/mystring.h index 083ea98..0857c32 100644 --- a/src/mystring.h +++ b/src/mystring.h @@ -37,13 +37,27 @@ #include #include +#ifdef HAVE_FNMATCH +#define FNMATCH_IS_ENABLED 1 +#ifdef HAVE_GLOB +#define GLOB_IS_ENABLED 1 +#else +#define GLOB_IS_ENABLED 0 +#endif +#else +#define FNMATCH_IS_ENABLED 0 +#define GLOB_IS_ENABLED 0 +#endif + extern const char snlfmt[]; extern const char spcstr[]; extern const char dolatstr[]; #define DOLATSTRLEN 6 -extern const char qchars[]; +extern const char cqchars[]; +#define qchars (cqchars + 1) extern const char illnum[]; extern const char homestr[]; +extern const char dotdir[]; #if 0 void scopyn(const char *, char *, int); diff --git a/src/nodes.c.pat b/src/nodes.c.pat index 9125bc7..636456c 100644 --- a/src/nodes.c.pat +++ b/src/nodes.c.pat @@ -87,18 +87,14 @@ copyfunc(union node *n) -STATIC void -calcsize(n) - union node *n; +static void calcsize(union node *n) { %CALCSIZE } -STATIC void -sizenodelist(lp) - struct nodelist *lp; +static void sizenodelist(struct nodelist *lp) { while (lp) { funcblocksize += SHELL_ALIGN(sizeof(struct nodelist)); @@ -109,9 +105,7 @@ sizenodelist(lp) -STATIC union node * -copynode(n) - union node *n; +static union node *copynode(union node *n) { union node *new; @@ -120,9 +114,7 @@ copynode(n) } -STATIC struct nodelist * -copynodelist(lp) - struct nodelist *lp; +static struct nodelist *copynodelist(struct nodelist *lp) { struct nodelist *start; struct nodelist **lpp; @@ -142,9 +134,7 @@ copynodelist(lp) -STATIC char * -nodesavestr(s) - char *s; +static char *nodesavestr(char *s) { char *rtn = funcstring; diff --git a/src/options.c b/src/options.c index 6f381e6..c4eedeb 100644 --- a/src/options.c +++ b/src/options.c @@ -80,6 +80,7 @@ static const char *const optnames[NOPTS] = { "notify", "nounset", "nolog", + "pipefail", "debug", }; @@ -101,6 +102,7 @@ const char optletters[NOPTS] = { 'u', 0, 0, + 0, }; char optlist[NOPTS]; @@ -117,31 +119,30 @@ STATIC int getopts(char *, char *, char **); */ int -procargs(int argc, char **argv) +procargs(char **xargv) { int i; - const char *xminusc; - char **xargv; int login; - xargv = argv; login = xargv[0] && xargv[0][0] == '-'; arg0 = xargv[0]; - if (argc > 0) + if (xargv[0]) xargv++; for (i = 0; i < NOPTS; i++) optlist[i] = 2; argptr = xargv; login |= options(1); xargv = argptr; - xminusc = minusc; if (*xargv == NULL) { - if (xminusc) + if (minusc) sh_error("-c requires an argument"); sflag = 1; } - if (iflag == 2 && sflag == 1 && isatty(0) && isatty(1)) - iflag = 1; + if (iflag == 2 && sflag == 1) { + input_init(); + if (stdin_istty && isatty(2)) + iflag = 1; + } if (mflag == 2) mflag = iflag; for (i = 0; i < NOPTS; i++) @@ -151,7 +152,7 @@ procargs(int argc, char **argv) debug = 1; #endif /* POSIX 1003.2: first arg after -c cmd is $0, remainder $1... */ - if (xminusc) { + if (minusc) { minusc = *xargv++; if (*xargv) goto setarg0; @@ -159,7 +160,6 @@ procargs(int argc, char **argv) setinputfile(*xargv, 0); setarg0: arg0 = *xargv++; - commandname = arg0; } shellparam.p = xargv; @@ -390,11 +390,9 @@ setcmd(int argc, char **argv) } -void -getoptsreset(value) - const char *value; +void getoptsreset(const char *value) { - shellparam.optind = number(value) ?: 1; + shellparam.optind = 1; shellparam.optoff = -1; } @@ -410,8 +408,11 @@ getoptscmd(int argc, char **argv) { char **optbase; + nextopt(nullstr); + argc -= argptr - argv - 1; + argv = argptr - 1; if (argc < 3) - sh_error("Usage: getopts optstring var [arg]"); + sh_error("Usage: getopts optstring var [arg...]"); else if (argc == 3) { optbase = shellparam.p; if ((unsigned)shellparam.optind > shellparam.nparam + 1) { @@ -463,7 +464,7 @@ getopts(char *optstr, char *optvar, char **optfirst) } c = *p++; - for (q = optstr; *q != c; ) { + for (q = optstr[0] == ':' ? optstr + 1 : optstr; *q != c; ) { if (*q == '\0') { if (optstr[0] == ':') { s[0] = c; diff --git a/src/options.h b/src/options.h index 975fe33..0ad5535 100644 --- a/src/options.h +++ b/src/options.h @@ -60,9 +60,10 @@ struct shparam { #define bflag optlist[13] #define uflag optlist[14] #define nolog optlist[15] -#define debug optlist[16] +#define pipefail optlist[16] +#define debug optlist[17] -#define NOPTS 17 +#define NOPTS 18 extern const char optletters[NOPTS]; extern char optlist[NOPTS]; @@ -75,7 +76,7 @@ extern char **argptr; /* argument list for builtin commands */ extern char *optionarg; /* set by nextopt */ extern char *optptr; /* used by nextopt */ -int procargs(int, char **); +int procargs(char **); void optschanged(void); void setparam(char **); void freeparam(volatile struct shparam *); diff --git a/src/parser.c b/src/parser.c index 1f9e8ec..d013817 100644 --- a/src/parser.c +++ b/src/parser.c @@ -36,7 +36,11 @@ #include #endif +#include +#include #include +#include +#include #include "shell.h" #include "parser.h" @@ -46,10 +50,12 @@ #include "syntax.h" #include "options.h" #include "input.h" +#include "jobs.h" #include "output.h" #include "var.h" #include "error.h" #include "memalloc.h" +#include "init.h" /* defines reset() */ // libdash #include "mystring.h" #include "alias.h" #include "show.h" @@ -115,7 +121,6 @@ STATIC union node *simplecmd(void); STATIC union node *makename(void); STATIC void parsefname(void); STATIC void parseheredoc(void); -STATIC int peektoken(void); STATIC int readtoken(void); STATIC int xxreadtoken(void); STATIC int pgetc_eatbnl(); @@ -133,6 +138,12 @@ int isassignment(const char *p) return *q == '='; } +int issimplecmd(union node *n, const char *name) +{ + return n && n->type == NCMD && n->ncmd.args && + equal(n->ncmd.args->narg.text, name); +} + static inline int realeofmark(const char *eofmark) { return eofmark && eofmark != FAKEEOFMARK; @@ -154,28 +165,53 @@ parsecmd(int interact) if (doprompt) setprompt(doprompt); needprompt = 0; + return list(1); } +// libdash +/* 2018-09-25 manually install a handler here so we can return an appropriate error code */ +union node * +parsecmd_safe(int interact) +{ + struct jmploc jmploc; + + tokpushback = 0; + checkkwd = 0; + heredoclist = 0; + doprompt = interact; + if (doprompt) + setprompt(doprompt); + needprompt = 0; + + if (unlikely(setjmp(jmploc.loc))) { + return NERR; + } + handler = &jmploc; + + return list(1); +} STATIC union node * list(int nlflag) { + int chknl = nlflag & 1 ? 0 : CHKNL; union node *n1, *n2, *n3; int tok; n1 = NULL; for (;;) { - switch (readtoken()) { + checkkwd = chknl | CHKKWD | CHKALIAS; + tok = readtoken(); + switch (tok) { case TNL: - if (!(nlflag & 1)) - break; parseheredoc(); return n1; case TEOF: - if (!n1 && (nlflag & 1)) + if (!n1 && !chknl) n1 = NEOF; +out_eof: parseheredoc(); tokpushback++; lasttoken = TEOF; @@ -183,8 +219,7 @@ list(int nlflag) } tokpushback++; - checkkwd = CHKNL | CHKKWD | CHKALIAS; - if (nlflag == 2 && tokendlist[peektoken()]) + if (nlflag == 2 && tokendlist[tok]) return n1; nlflag |= 2; @@ -214,15 +249,16 @@ list(int nlflag) n1 = n3; } switch (tok) { - case TNL: case TEOF: + goto out_eof; + case TNL: tokpushback++; /* fall through */ case TBACKGND: case TSEMI: break; default: - if ((nlflag & 1)) + if (!chknl) synexpect(-1); tokpushback++; return n1; @@ -430,6 +466,8 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); cp->type = NCLIST; app = &cp->nclist.pattern; for (;;) { + if (lasttoken < TWORD) + synexpect(TWORD); *app = ap = (union node *)stalloc(sizeof (struct narg)); ap->type = NARG; ap->narg.text = wordtext; @@ -470,6 +508,9 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); break; case TWORD: case TREDIR: +// libdash +/* 2019-04-25 to allow for proper handling of empty aliases */ + case TNL: tokpushback++; return simplecmd(); } @@ -614,7 +655,7 @@ void fixredir(union node *n, const char *text, int err) else { if (err) - synerror("Bad fd number"); + sh_error("Bad fd number: %s", text); else n->ndup.vname = makename(); } @@ -627,9 +668,10 @@ parsefname(void) union node *n = redirnode; if (n->type == NHERE) - checkkwd = CHKEOFMARK; + checkkwd |= CHKEOFMARK; if (readtoken() != TWORD) synexpect(-1); + checkkwd &= ~CHKEOFMARK; if (n->type == NHERE) { struct heredoc *here = heredoc; struct heredoc *p; @@ -685,16 +727,6 @@ parseheredoc(void) } } -STATIC int -peektoken(void) -{ - int t; - - t = readtoken(); - tokpushback++; - return (t); -} - STATIC int readtoken(void) { @@ -713,10 +745,17 @@ readtoken(void) if (kwd & CHKNL) { while (t == TNL) { parseheredoc(); + checkkwd = 0; t = xxreadtoken(); } } + kwd |= checkkwd; + checkkwd = 0; + +// libdash +/* 2019-04-25 to handle empty aliases */ +ignorenl: if (t != TWORD || quoteflag) { goto out; } @@ -734,17 +773,21 @@ readtoken(void) } } - if (checkkwd & CHKALIAS) { + if (kwd & CHKALIAS) { struct alias *ap; if ((ap = lookupalias(wordtext, 1)) != NULL) { +// libdash +/* 2019-04-25 to handle empty aliases */ if (*ap->val) { pushstring(ap->val, ap); - } - goto top; + goto top; + } else { + t = xxreadtoken(); + goto ignorenl; + } } } out: - checkkwd = 0; #ifdef DEBUG if (!alreadyseen) TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); @@ -799,10 +842,11 @@ xxreadtoken(void) setprompt(2); } for (;;) { /* until token or start of word found */ + int tok; + c = pgetc_eatbnl(); switch (c) { case ' ': case '\t': - case PEOA: continue; case '#': while ((c = pgetc()) != '\n' && c != PEOF); @@ -833,9 +877,10 @@ xxreadtoken(void) case ')': RETURN(TRP); } - break; + tok = readtoken1(c, BASESYNTAX, (char *)NULL, 0); + if (tok != TBLANK) + return tok; } - return readtoken1(c, BASESYNTAX, (char *)NULL, 0); #undef RETURN } @@ -844,7 +889,7 @@ static int pgetc_eatbnl(void) int c; while ((c = pgetc()) == '\\') { - if (pgetc2() != '\n') { + if (pgetc() != '\n') { pungetc(); break; } @@ -875,7 +920,96 @@ static void synstack_pop(struct synstack **stack) *stack = (*stack)->next; } +unsigned getmbc(int c, char *out, int mode) +{ + char *const start = out; + mbstate_t mbst = {}; + unsigned ml = 0; + size_t ml2; + wchar_t wc; + char *mbc; + + if (likely(c >= 0 || c <= PEOF)) + return 0; + mbc = (mode & 3) < 2 ? out + 2 + (mode == 1) : out; + mbc[ml] = c; + while ((ml2 = mbrtowc(&wc, mbc + ml++, 1, &mbst)) == -2) { + if (ml >= MB_LEN_MAX) + break; + c = pgetc_eoa(); + if (c == PEOA || c == PEOF) + break; + mbc[ml] = c; + } + + if (ml2 == 1 && ml > 1) { + if (mode == 4 && iswblank(wc)) + return 1; + + if ((mode & 3) < 2) { + USTPUTC(CTLMBCHAR, out); + if (mode == 1) + USTPUTC(CTLESC, out); + USTPUTC(ml, out); + } + STADJUST(ml, out); + if ((mode & 3) < 2) { + USTPUTC(ml, out); + USTPUTC(CTLMBCHAR, out); + } + + return out - start; + } + + if (ml > 1) + pungetn(ml - 1); + + return 0; +} + +static char *dollarsq_escape(char *out) +{ + /* 10 = length of UXXXXXXXX + NUL */ + char str[10]; + unsigned len; + char *p; + + for (len = 0; len < sizeof(str) - 1;) { + int c = pgetc(); + + if (c <= PEOF) + break; + + str[len++] = c; + + if (c == '\'') + break; + } + str[len] = 0; + + p = str; + if (*p != 'c') { + unsigned ret; + + ret = conv_escape(p, out, true); + p += ret >> 4; + out += ret & 15; + } else if (*++p) { + int conv_ch; + int c; + + c = (unsigned char)*p++; + + p += !((c ^ *p) | (c ^ '\\')); + + conv_ch = (c & ~((c & 0x40) >> 1)) ^ 0x40; + USTPUTC(conv_ch, out); + } + + pungetn(len - (p - str)); + return out; +} /* * If eofmark is NULL, read a word or a redirection symbol. If eofmark @@ -899,20 +1033,19 @@ static void synstack_pop(struct synstack **stack) STATIC int readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) { - int c = firstc; - char *out; - size_t len; - struct nodelist *bqlist; - int quotef; - int oldstyle; - /* syntax stack */ struct synstack synbase = { .syntax = syntax }; + int chkeofmark = checkkwd & CHKEOFMARK; struct synstack *synstack = &synbase; + struct nodelist *bqlist = NULL; + int dollarsq = 0; + int c = firstc; + int quotef = 0; + int oldstyle; + size_t len; + char *out; if (syntax == DQSYNTAX) synstack->dblquote = 1; - quotef = 0; - bqlist = NULL; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ @@ -927,12 +1060,29 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } #endif CHECKEND(); /* set c to PEOF if at end of here document */ - for (;;) { /* until end of line or end of word */ - CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ + /* Until end of line or end of word */ + for (;; c = pgetc_top(synstack)) { + int fieldsplitting; + unsigned ml; + + /* Permit max(MB_LEN_MAX, 23) calls to USTPUTC. */ + CHECKSTRSPACE((MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + 7, + out); + fieldsplitting = synstack->syntax == BASESYNTAX && + !synstack->varnest ? 4 : 0; + ml = getmbc(c, out, fieldsplitting); + if (ml == 1) { + if (out == stackblock()) + return TBLANK; + c = pgetc(); + break; + } + out += ml; + if (ml) + continue; switch(synstack->syntax[c]) { case CNL: /* '\n' */ - if (synstack->syntax == BASESYNTAX && - !synstack->varnest) + if (fieldsplitting) goto endword; /* exit outer loop */ USTPUTC(c, out); nlprompt(); @@ -942,6 +1092,10 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CCTL: + if (c == dollarsq) { + out = dollarsq_escape(out); + break; + } if ((!eofmark) | synstack->dblquote | synstack->varnest) USTPUTC(CTLESC, out); @@ -949,33 +1103,41 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) break; /* backslash */ case CBACK: - c = pgetc2(); + c = pgetc(); if (c == PEOF) { USTPUTC(CTLESC, out); USTPUTC('\\', out); pungetc(); - } else { - if ( - synstack->dblquote && - c != '\\' && c != '`' && - c != '$' && ( - c != '"' || - (eofmark != NULL && - !synstack->varnest) - ) && ( - c != '}' || - !synstack->varnest - ) - ) { - USTPUTC(CTLESC, out); - USTPUTC('\\', out); - } + break; + } + + if ( + synstack->dblquote && + c != '\\' && c != '`' && + c != '$' && ( + c != '"' || + (eofmark != NULL && + !synstack->varnest) + ) && ( + c != '}' || + !synstack->varnest + ) + ) { USTPUTC(CTLESC, out); - USTPUTC(c, out); - quotef++; + USTPUTC('\\', out); } + quotef++; + + ml = getmbc(c, out, 1); + out += ml; + if (ml) + break; + + USTPUTC(CTLESC, out); + USTPUTC(c, out); break; case CSQUOTE: +csquote: synstack->syntax = SQSYNTAX; quotemark: if (eofmark == NULL) { @@ -996,6 +1158,14 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } if (synstack->dqvarnest == 0) { + if (likely(dollarsq)) { + char *p = stackblock(); + + *out = 0; + out = p + strlen(p); + dollarsq = 0; + } + synstack->syntax = BASESYNTAX; synstack->dblquote = 0; } @@ -1017,53 +1187,44 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) synstack_pop(&synstack); else if (synstack->dqvarnest > 0) synstack->dqvarnest--; - USTPUTC(CTLENDVAR, out); - } else { - USTPUTC(c, out); + if (!chkeofmark) + c = CTLENDVAR; } + USTPUTC(c, out); break; case CLP: /* '(' in arithmetic */ synstack->parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ - if (synstack->parenlevel > 0) { - USTPUTC(c, out); + if (synstack->parenlevel > 0) --synstack->parenlevel; + else if (pgetc_eatbnl() == ')') { + synstack_pop(&synstack); + if (chkeofmark) + USTPUTC(c, out); + else + c = CTLENDARI; } else { - if (pgetc_eatbnl() == ')') { - USTPUTC(CTLENDARI, out); - synstack_pop(&synstack); - } else { - /* - * unbalanced parens - * (don't 2nd guess - no error) - */ - pungetc(); - USTPUTC(')', out); - } + /* + * unbalanced parens + * (don't 2nd guess - no error) + */ + pungetc(); } + USTPUTC(c, out); break; case CBQUOTE: /* '`' */ - if (checkkwd & CHKEOFMARK) { - USTPUTC('`', out); - break; - } - + USTPUTC('`', out); PARSEBACKQOLD(); break; case CEOF: goto endword; /* exit outer loop */ - case CIGN: - break; default: - if (synstack->varnest == 0) + if (fieldsplitting) goto endword; /* exit outer loop */ - if (c != PEOA) { - USTPUTC(c, out); - } + USTPUTC(c, out); } - c = pgetc_top(synstack); } } endword: @@ -1109,21 +1270,17 @@ checkend: { int markloc; char *p; - if (c == PEOA) { - c = pgetc2(); - } if (striptabs) { - while (c == '\t') { - c = pgetc2(); - } + while (c == '\t') + c = pgetc(); } markloc = out - (char *)stackblock(); for (p = eofmark; STPUTC(c, out), *p; p++) { - if (c != *p) + if (c != (signed char)*p) goto more_heredoc; - c = pgetc2(); + c = pgetc(); } if (c == '\n' || c == PEOF) { @@ -1137,7 +1294,7 @@ checkend: { len = out - p; if (len) { - len -= c < 0; + len -= c <= PEOF; c = p[-1]; if (len) { @@ -1227,34 +1384,34 @@ parseredir: { */ parsesub: { + const char *newsyn = synstack->syntax; + static const char types[] = "}-+?="; int subtype; - int typeloc; char *p; - static const char types[] = "}-+?="; + + USTPUTC('$', out); c = pgetc_eatbnl(); - if ( - (checkkwd & CHKEOFMARK) || - c <= PEOA || - (c != '(' && c != '{' && !is_name(c) && !is_special(c)) - ) { - USTPUTC('$', out); - pungetc(); - } else if (c == '(') { /* $(command) or $((arith)) */ + if (c == '(') { /* $(command) or $((arith)) */ + USTPUTC(c, out); if (pgetc_eatbnl() == '(') { PARSEARITH(); } else { pungetc(); PARSEBACKQNEW(); } - } else { - const char *newsyn = synstack->syntax; - - USTPUTC(CTLVAR, out); - typeloc = out - (char *)stackblock(); - STADJUST(1, out); + } else if (c == '\'' && newsyn['&']) { + STADJUST(-1, out); + dollarsq = '\\'; + goto csquote; + } else if (c == '{' || is_name(c) || is_special(c)) { + int typeloc = out - (char *)stackblock(); + + STADJUST(!chkeofmark, out); subtype = VSNORMAL; if (likely(c == '{')) { + if (chkeofmark) + USTPUTC('{', out); c = pgetc_eatbnl(); subtype = 0; } @@ -1268,7 +1425,8 @@ parsesub: { do { STPUTC(c, out); c = pgetc_eatbnl(); - } while (is_digit(c)); + } while ((subtype <= 0 || subtype >= VSLENGTH) && + is_digit(c)); } else if (c != '}') { int cc = c; @@ -1277,8 +1435,11 @@ parsesub: { if (!subtype && cc == '#') { subtype = VSLENGTH; - if (c == '_' || isalnum(c)) + if (c == '_' || isalnum(c)) { + if (chkeofmark) + USTPUTC('#', out); goto varname; + } cc = c; c = pgetc_eatbnl(); @@ -1287,7 +1448,8 @@ parsesub: { subtype = 0; c = cc; cc = '#'; - } + } else if (chkeofmark) + USTPUTC('#', out); } if (!is_special(cc)) { @@ -1303,10 +1465,15 @@ parsesub: { if (subtype == 0) { int cc = c; + if (chkeofmark) + STPUTC(c, out); + switch (c) { case ':': subtype = VSNUL; c = pgetc_eatbnl(); + if (chkeofmark) + STPUTC(c, out); /*FALLTHROUGH*/ default: p = strchr(types, c); @@ -1319,15 +1486,19 @@ parsesub: { subtype = c == '#' ? VSTRIMLEFT : VSTRIMRIGHT; c = pgetc_eatbnl(); - if (c == cc) + if (c == cc) { + if (chkeofmark) + STPUTC(c, out); subtype++; - else + } else pungetc(); newsyn = BASESYNTAX; break; } } else { + if (subtype == VSLENGTH && c != '}') + subtype = 0; badsub: pungetc(); } @@ -1346,14 +1517,21 @@ parsesub: { synstack->dblquote = newsyn != BASESYNTAX; } - *((char *)stackblock() + typeloc) = subtype; if (subtype != VSNORMAL) { synstack->varnest++; if (synstack->dblquote) synstack->dqvarnest++; } - STPUTC('=', out); - } + if (!chkeofmark) { + char *p = stackblock(); + + p[typeloc - 1] = CTLVAR; + p[typeloc] = subtype | VSBIT; + STPUTC('=', out); + } + } else + pungetc(); + goto parsesub_return; } @@ -1366,50 +1544,55 @@ parsesub: { */ parsebackq: { + int uninitialized_var(saveprompt); + struct heredoc *saveheredoclist; struct nodelist **nlpp; + size_t savelen; union node *n; + unsigned ml; + char *pstr; char *str; - size_t savelen; - struct heredoc *saveheredoclist; - int uninitialized_var(saveprompt); - str = NULL; - savelen = out - (char *)stackblock(); - if (savelen > 0) { - str = alloca(savelen); - memcpy(str, stackblock(), savelen); + if (!chkeofmark) { + STADJUST(oldstyle - 1, out); + out[-1] = CTLBACKQ; + } + if (!chkeofmark || !oldstyle) { + str = stackblock(); + savelen = out - (char *)stackblock(); + grabstackblock(savelen); + STARTSTACKSTR(out); } if (oldstyle) { /* We must read until the closing backquote, giving special treatment to some slashes, and then push the string and reread it as input, interpreting it normally. */ - char *pout; + bool done = false; + char *pout = out; int pc; - size_t psavelen; - char *pstr; - - STARTSTACKSTR(pout); - for (;;) { + while (!done) { if (needprompt) { setprompt(2); } switch (pc = pgetc_eatbnl()) { case '`': - goto done; + done = true; + break; case '\\': - pc = pgetc_eatbnl(); + pc = pgetc(); if (pc != '\\' && pc != '`' && pc != '$' && (!synstack->dblquote || pc != '"')) STPUTC('\\', pout); - if (pc > PEOA) { - break; - } - /* fall through */ + CHECKSTRSPACE(MB_LEN_MAX, pout); + ml = getmbc(pc, pout, 2); + pout += ml; + if (ml) + continue; + break; case PEOF: - case PEOA: synerror("EOF in backquote substitution"); case '\n': @@ -1421,13 +1604,13 @@ parsebackq: { } STPUTC(pc, pout); } -done: - STPUTC('\0', pout); - psavelen = pout - (char *)stackblock(); - if (psavelen > 0) { - pstr = grabstackstr(pout); - setinputstring(pstr); - } + if (chkeofmark) { + out = pout; + goto parsebackq_oldreturn; + } + pout[-1] = 0; + pstr = grabstackstr(pout); + setinputstring(pstr); } nlpp = &bqlist; while (*nlpp) @@ -1451,27 +1634,30 @@ parsebackq: { if (readtoken() != TRP) synexpect(TRP); setinputstring(nullstr); - parseheredoc(); } + parseheredoc(); heredoclist = saveheredoclist; (*nlpp)->n = n; /* Start reading from old file again. */ popfile(); - /* Ignore any pushed back tokens left from the backquote parsing. */ - if (oldstyle) + + out = stnputs(str, savelen, stackblock()); + + if (oldstyle) { + /* Ignore any pushed back tokens left from the backquote + * parsing. + */ tokpushback = 0; - out = growstackto(savelen + 1); - if (str) { - memcpy(out, str, savelen); - STADJUST(savelen, out); - } - USTPUTC(CTLBACKQ, out); - if (oldstyle) goto parsebackq_oldreturn; - else + } else { + if (chkeofmark) { + out = commandtextcont(n, out); + STPUTC(')', out); + } goto parsebackq_newreturn; + } } /* @@ -1483,7 +1669,12 @@ parsearith: { synstack->prev ?: alloca(sizeof(*synstack)), ARISYNTAX); synstack->dblquote = 1; - USTPUTC(CTLARI, out); + if (chkeofmark) + USTPUTC(c, out); + else { + STADJUST(-1, out); + out[-1] = CTLARI; + } goto parsearith_return; } @@ -1571,28 +1762,49 @@ setprompt(int which) const char * expandstr(const char *ps) { - union node n; + struct parsefile *file_stop; + struct jmploc *volatile savehandler; + struct heredoc *saveheredoclist; + const char *result; int saveprompt; + struct jmploc jmploc; + union node n; + int err; + + file_stop = parsefile; /* XXX Fix (char *) cast. */ setinputstring((char *)ps); + saveheredoclist = heredoclist; + heredoclist = NULL; saveprompt = doprompt; doprompt = 0; + needprompt = 0; + result = ps; + savehandler = handler; + if (unlikely(err = setjmp(jmploc.loc))) + goto out; + handler = &jmploc; readtoken1(pgetc_eatbnl(), DQSYNTAX, FAKEEOFMARK, 0); - doprompt = saveprompt; - - popfile(); - n.narg.type = NARG; n.narg.next = NULL; n.narg.text = wordtext; n.narg.backquote = backquotelist; expandarg(&n, NULL, EXP_QUOTED); - return stackblock(); + result = stackblock(); + +out: + restore_handler_expandarg(savehandler, err); + + doprompt = saveprompt; + unwindfiles(file_stop); + heredoclist = saveheredoclist; + + return result; } /* diff --git a/src/parser.h b/src/parser.h index 524ac1c..dd9d85c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -36,12 +36,15 @@ #include "token.h" +union node; + /* control characters in argument strings */ #define CTL_FIRST -127 /* first 'special' character */ #define CTLESC -127 /* escape next character */ #define CTLVAR -126 /* variable defn */ #define CTLENDVAR -125 #define CTLBACKQ -124 +#define CTLMBCHAR -123 #define CTLARI -122 /* arithmetic expression */ #define CTLENDARI -121 #define CTLQUOTEMARK -120 @@ -50,6 +53,7 @@ /* variable substitution byte (follows CTLVAR) */ #define VSTYPE 0x0f /* type of variable substitution */ #define VSNUL 0x10 /* colon--treat the empty string as unset */ +#define VSBIT 0x20 /* Ensure subtype is not zero */ /* values of VSTYPE field */ #define VSNORMAL 0x1 /* normal variable: $var or ${var} */ @@ -62,6 +66,7 @@ #define VSTRIMLEFT 0x8 /* ${var#pattern} */ #define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */ #define VSLENGTH 0xa /* ${#var} */ +/* VSLENGTH must come last. */ /* values of checkkwd variable */ #define CHKALIAS 0x1 @@ -78,17 +83,22 @@ extern int lasttoken; extern int tokpushback; #define NEOF ((union node *)&tokpushback) +/* 2018-09-25 similar story for an error return value */ // libdash +#define NERR ((union node *)&lasttoken) extern int whichprompt; /* 1 == PS1, 2 == PS2 */ extern int checkkwd; int isassignment(const char *p); +int issimplecmd(union node *n, const char *name); union node *parsecmd(int); +union node *parsecmd_safe(int); // libdash void fixredir(union node *, const char *, int); const char *getprompt(void *); const char *const *findkwd(const char *); char *endofname(const char *); const char *expandstr(const char *); +unsigned getmbc(int c, char *out, int mode); static inline int goodname(const char *p) diff --git a/src/redir.c b/src/redir.c index 6c81dd0..e823462 100644 --- a/src/redir.c +++ b/src/redir.c @@ -32,6 +32,7 @@ * SUCH DAMAGE. */ +#include #include #include #include /* PIPE_BUF */ @@ -45,16 +46,19 @@ * Code for dealing with input/output redirection. */ +#include "error.h" +#include "expand.h" +#include "input.h" +#include "jobs.h" #include "main.h" -#include "shell.h" +#include "memalloc.h" #include "nodes.h" -#include "jobs.h" #include "options.h" -#include "expand.h" -#include "redir.h" #include "output.h" -#include "memalloc.h" -#include "error.h" +#include "redir.h" +#include "shell.h" +#include "system.h" +#include "trap.h" #define EMPTY -2 /* marks an unused slot in redirtab */ @@ -139,6 +143,8 @@ redirect(union node *redir, int flags) continue; fd = n->nfile.fd; + if (fd == 0) + reset_input(); if (sv) { int closed; @@ -180,56 +186,83 @@ redirect(union node *redir, int flags) } +static int sh_open_fail(const char *, int, int) __attribute__((__noreturn__)); +static int sh_open_fail(const char *pathname, int flags, int e) +{ + const char *word; + int action; + + word = "open"; + action = E_OPEN; + if (flags & O_CREAT) { + word = "create"; + action = E_CREAT; + } + + sh_error("cannot %s %s: %s", word, pathname, errmsg(e, action)); +} + + +int sh_open(const char *pathname, int flags, int mayfail) +{ + int fd; + int e; + + do { + fd = open64(pathname, flags, 0666); + e = errno; + } while (fd < 0 && e == EINTR && !pending_sig); + + if (mayfail || fd >= 0) + return fd; + + sh_open_fail(pathname, flags, e); +} + + STATIC int openredirect(union node *redir) { struct stat64 sb; char *fname; + int flags; int f; switch (redir->nfile.type) { case NFROM: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_RDONLY)) < 0) - goto eopen; + flags = O_RDONLY; +do_open: + f = sh_open(redir->nfile.expfname, flags, 0); break; case NFROMTO: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_RDWR|O_CREAT, 0666)) < 0) - goto ecreate; - break; + flags = O_RDWR|O_CREAT; + goto do_open; case NTO: /* Take care of noclobber mode. */ if (Cflag) { fname = redir->nfile.expfname; if (stat64(fname, &sb) < 0) { - if ((f = open64(fname, O_WRONLY|O_CREAT|O_EXCL, 0666)) < 0) - goto ecreate; - } else if (!S_ISREG(sb.st_mode)) { - if ((f = open64(fname, O_WRONLY, 0666)) < 0) - goto ecreate; - if (!fstat64(f, &sb) && S_ISREG(sb.st_mode)) { - close(f); - errno = EEXIST; - goto ecreate; - } - } else { - errno = EEXIST; + flags = O_WRONLY|O_CREAT|O_EXCL; + goto do_open; + } + + if (S_ISREG(sb.st_mode)) + goto ecreate; + + f = sh_open(fname, O_WRONLY, 0); + if (!fstat64(f, &sb) && S_ISREG(sb.st_mode)) { + close(f); goto ecreate; } break; } /* FALLTHROUGH */ case NCLOBBER: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) - goto ecreate; - break; + flags = O_WRONLY|O_CREAT|O_TRUNC; + goto do_open; case NAPPEND: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_WRONLY|O_CREAT|O_APPEND, 0666)) < 0) - goto ecreate; - break; + flags = O_WRONLY|O_CREAT|O_APPEND; + goto do_open; case NTOFD: case NFROMFD: f = redir->ndup.dupfd; @@ -249,26 +282,32 @@ openredirect(union node *redir) return f; ecreate: - sh_error("cannot create %s: %s", fname, errmsg(errno, E_CREAT)); -eopen: - sh_error("cannot open %s: %s", fname, errmsg(errno, E_OPEN)); + sh_open_fail(fname, O_CREAT, EEXIST); } +static int sh_dup2(int ofd, int nfd, int cfd) +{ + if (nfd < 0) { + nfd = dup(ofd); + if (nfd >= 0) + cfd = -1; + } else + nfd = dup2(ofd, nfd); + if (likely(cfd >= 0)) + close(cfd); + if (nfd < 0) + sh_error("%d: %s", ofd, strerror(errno)); + + return nfd; +} -STATIC void #ifdef notyet -dupredirect(redir, f, memory) +static void dupredirect(union node *redir, int f, char memory[10]) #else -dupredirect(redir, f) -#endif - union node *redir; - int f; -#ifdef notyet - char memory[10]; +static void dupredirect(union node *redir, int f) #endif - { +{ int fd = redir->nfile.fd; - int err = 0; #ifdef notyet memory[fd] = 0; @@ -281,27 +320,30 @@ dupredirect(redir, f) memory[fd] = 1; else #endif - if (dup2(f, fd) < 0) { - err = errno; - goto err; - } + sh_dup2(f, fd, -1); return; } - f = fd; - } else if (dup2(f, fd) < 0) - err = errno; + close(fd); + } else + sh_dup2(f, fd, f); +} - close(f); - if (err < 0) - goto err; +int sh_pipe(int pip[2], int memfd) +{ + if (memfd) { + pip[0] = USE_MEMFD_CREATE ? memfd_create("dash", 0) : -1; + if (pip[0] >= 0) { + pip[1] = sh_dup2(pip[0], -1, pip[0]); + return 1; + } + } - return; + if (pipe(pip) < 0) + sh_error("Pipe call failed"); -err: - sh_error("%d: %s", f, strerror(err)); + return 0; } - /* * Handle here documents. Normally we fork off a process to write the * data to a pipe. If the document is short, we can stuff the data in @@ -311,12 +353,10 @@ dupredirect(redir, f) STATIC int openhere(union node *redir) { - char *p; - int pip[2]; size_t len = 0; - - if (pipe(pip) < 0) - sh_error("Pipe call failed"); + int pip[2]; + int memfd; + char *p; p = redir->nhere.doc->narg.text; if (redir->type == NXHERE) { @@ -325,8 +365,11 @@ openhere(union node *redir) } len = strlen(p); - if (len <= PIPESIZE) { + memfd = sh_pipe(pip, len > PIPESIZE); + + if (memfd || len <= PIPESIZE) { xwrite(pip[1], p, len); + lseek(pip[1], 0, SEEK_SET); goto out; } @@ -375,8 +418,11 @@ popredir(int drop) close(i); break; default: - if (!drop) + if (!drop) { + if (i == 0) + reset_input(); dup2(rp->renamed[i], i); + } close(rp->renamed[i]); break; } @@ -401,9 +447,33 @@ EXITRESET { unwindredir(0); } +FORKRESET { + redirlist = NULL; +} + #endif +/* + * Just a convenience because fcntl isn't well exposed in OCaml. + */ +// libdash +int +freshfd_ge10(int fd) +{ + int newfd; + int err; + + newfd = fcntl(fd, F_DUPFD_CLOEXEC, 10); + + err = newfd < 0 ? errno : 0; + if (err == EBADF) { + newfd = -1; + } else if (err) { + newfd = -2; + } + return newfd; +} /* * Move a file descriptor to > 10. Invokes sh_error on error unless @@ -416,13 +486,18 @@ savefd(int from, int ofd) int newfd; int err; +#if HAVE_F_DUPFD_CLOEXEC + newfd = fcntl(from, F_DUPFD_CLOEXEC, 10); +#else newfd = fcntl(from, F_DUPFD, 10); +#endif + err = newfd < 0 ? errno : 0; if (err != EBADF) { close(ofd); if (err) sh_error("%d: %s", from, strerror(err)); - else + else if(!HAVE_F_DUPFD_CLOEXEC) fcntl(newfd, F_SETFD, FD_CLOEXEC); } @@ -443,9 +518,7 @@ redirectsafe(union node *redir, int flags) handler = &jmploc; redirect(redir, flags); } - handler = savehandler; - if (err && exception != EXERROR) - longjmp(handler->loc, 1); + restore_handler_expandarg(savehandler, err); RESTOREINT(saveint); return err; } diff --git a/src/redir.h b/src/redir.h index 8e56995..e3945e4 100644 --- a/src/redir.h +++ b/src/redir.h @@ -45,9 +45,10 @@ struct redirtab; union node; void redirect(union node *, int); void popredir(int); -void clearredir(void); +int freshfd_ge10(int); // libdash int savefd(int, int); int redirectsafe(union node *, int); void unwindredir(struct redirtab *stop); struct redirtab *pushredir(union node *redir); - +int sh_open(const char *pathname, int flags, int mayfail); +int sh_pipe(int pip[2], int memfd); diff --git a/src/system.h b/src/system.h index 007952c..8cb4726 100644 --- a/src/system.h +++ b/src/system.h @@ -28,6 +28,7 @@ #include #include +#include #include #ifndef SSIZE_MAX @@ -54,6 +55,13 @@ static inline void sigclearmask(void) #endif } +#ifndef HAVE_MEMFD_CREATE +static inline int memfd_create(const char *name, unsigned int flags) +{ + return -1; +} +#endif + #ifndef HAVE_MEMPCPY void *mempcpy(void *, const void *, size_t); #endif @@ -111,8 +119,75 @@ long sysconf(int) __attribute__((__noreturn__)); int isblank(int c); #endif +#ifndef HAVE_TEE +static inline ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags) +{ + return -1; +} +#endif + +#ifndef HAVE_FNMATCH +static inline int fnmatch(const char *pattern, const char *string, int flags) +{ + return -1; +} +#endif + +#ifndef HAVE_GLOB +#define GLOB_ERR (1 << 0)/* Return on read errors. */ +#define GLOB_MARK (1 << 1)/* Append a slash to each name. */ +#define GLOB_NOSORT (1 << 2)/* Don't sort the names. */ +#define GLOB_DOOFFS (1 << 3)/* Insert PGLOB->gl_offs NULLs. */ +#define GLOB_NOCHECK (1 << 4)/* If nothing matches, return the pattern. */ +#define GLOB_APPEND (1 << 5)/* Append to results of a previous call. */ +#define GLOB_NOESCAPE (1 << 6)/* Backslashes don't quote metacharacters. */ +#define GLOB_PERIOD (1 << 7)/* Leading `.' can be matched by metachars. */ +#define GLOB_MAGCHAR (1 << 8)/* Set in gl_flags if any metachars seen. */ +#define GLOB_ALTDIRFUNC (1 << 9)/* Use gl_opendir et al functions. */ +#define GLOB_BRACE (1 << 10)/* Expand "{a,b}" to "a" "b". */ +#define GLOB_NOMAGIC (1 << 11)/* If no magic chars, return the pattern. */ +#define GLOB_TILDE (1 << 12)/* Expand ~user and ~ to home directories. */ +#define GLOB_ONLYDIR (1 << 13)/* Match only directories. */ +#define GLOB_TILDE_CHECK (1 << 14)/* Like GLOB_TILDE but return an error + if the user name is not available. */ + +#define GLOB_NOSPACE 1 /* Ran out of memory. */ +#define GLOB_ABORTED 2 /* Read error. */ +#define GLOB_NOMATCH 3 /* No matches found. */ +#define GLOB_NOSYS 4 /* Not implemented. */ + +struct dirent64; +struct stat64; + +typedef struct { + size_t gl_pathc; + char **gl_pathv; + size_t gl_offs; + int gl_flags; + + void (*gl_closedir)(void *); + struct dirent64 *(*gl_readdir)(void *); + void *(*gl_opendir)(const char *); + int (*gl_lstat)(const char *, struct stat64 *); + int (*gl_stat)(const char *, struct stat64 *); +} glob64_t; + +static inline int glob64(const char *pattern, int flags, + int (*errfunc)(const char *epath, int eerrno), + glob64_t *restrict pglob) +{ + return -1; +} + +static inline void globfree64(glob64_t *pglob) +{ +} +#endif + /* * A trick to suppress uninitialized variable warning without generating any * code */ #define uninitialized_var(x) x = x + +unsigned conv_escape(char *str, char *out, bool mbchar); diff --git a/src/trap.c b/src/trap.c index 58a7c60..23829a5 100644 --- a/src/trap.c +++ b/src/trap.c @@ -37,6 +37,7 @@ #include #include +#include "builtins.h" #include "shell.h" #include "main.h" #include "nodes.h" /* for other headers */ @@ -47,6 +48,7 @@ #include "options.h" #include "syntax.h" #include "output.h" +#include "parser.h" #include "memalloc.h" #include "error.h" #include "trap.h" @@ -67,6 +69,8 @@ /* trap handler commands */ static char *trap[NSIG]; +/* traps have not been fully cleared */ +static int ptrap; /* number of non-null traps */ int trapcnt; /* current value of signal */ @@ -76,18 +80,25 @@ static char gotsig[NSIG - 1]; /* last pending signal */ volatile sig_atomic_t pending_sig; /* received SIGCHLD */ -int gotsigchld; +volatile sig_atomic_t gotsigchld; extern char *signal_names[]; static int decode_signum(const char *); +MKINIT void clear_traps(union node *); #ifdef mkinit +INCLUDE "memalloc.h" INCLUDE "trap.h" + INIT { sigmode[SIGCHLD - 1] = S_DFL; setsignal(SIGCHLD); } + +FORKRESET { + clear_traps(n); +} #endif /* @@ -115,6 +126,8 @@ trapcmd(int argc, char **argv) } return 0; } + if (ptrap) + clear_traps(NULL); if (!ap[1] || decode_signum(*ap) >= 0) action = NULL; else @@ -154,21 +167,30 @@ trapcmd(int argc, char **argv) * Clear traps on a fork. */ -void -clear_traps(void) +void clear_traps(union node *n) { + int simplecmd; char **tp; + simplecmd = issimplecmd(n, TRAPCMD->name); + INTOFF; for (tp = trap ; tp < &trap[NSIG] ; tp++) { if (*tp && **tp) { /* trap not NULL or SIG_IGN */ - ckfree(*tp); + char *otp = *tp; + *tp = NULL; if (tp != &trap[0]) setsignal(tp - trap); + + if (simplecmd) + *tp = otp; + else + ckfree(*tp); } } trapcnt = 0; + ptrap = simplecmd; INTON; } @@ -274,11 +296,11 @@ setsignal(int signo) void ignoresig(int signo) { - if (sigmode[signo - 1] != S_IGN && sigmode[signo - 1] != S_HARD_IGN) { - signal(signo, SIG_IGN); - } + if (sigmode[signo - 1] == S_IGN || sigmode[signo - 1] == S_HARD_IGN) + return; + signal(signo, SIG_IGN); if (!vforked) - sigmode[signo - 1] = S_HARD_IGN; + sigmode[signo - 1] = S_IGN; } @@ -290,7 +312,7 @@ ignoresig(int signo) void onsig(int signo) { - if (vforked) + if (vforked && getpid() != vforked) return; if (signo == SIGCHLD) { @@ -396,12 +418,15 @@ exitshell(void) handler = &loc; if ((p = trap[0])) { trap[0] = NULL; + if (ptrap) + goto out; evalskip = 0; evalstring(p, 0); evalskip = SKIPFUNCDEF; } out: exitreset(); + postexitreset(); /* * Disable job control so that whoever had the foreground before we * started can get it back. diff --git a/src/trap.h b/src/trap.h index 5fd65af..beaf660 100644 --- a/src/trap.h +++ b/src/trap.h @@ -39,10 +39,9 @@ extern int trapcnt; extern char sigmode[]; extern volatile sig_atomic_t pending_sig; -extern int gotsigchld; +extern volatile sig_atomic_t gotsigchld; int trapcmd(int, char **); -void clear_traps(void); void setsignal(int); void ignoresig(int); void onsig(int); diff --git a/src/type_description.ml b/src/type_description.ml new file mode 100644 index 0000000..7ee7915 --- /dev/null +++ b/src/type_description.ml @@ -0,0 +1,184 @@ +open Ctypes + +module Types (F : Ctypes.TYPE) = struct + open F + + (* stackmarks [used for string allocation in dash] *) + type stackmark + + let stackmark : stackmark structure typ = structure "stackmark" + let stackp = field stackmark "stackp" (ptr void) + let nxt = field stackmark "nxt" string + let size = field stackmark "stacknleft" F.size_t + let () = seal stackmark + + (* AST nodes *) + + (* define the node type... *) + type node + let node : node union typ = union "node" + let node_type = field node "type" int + (* ...but don't seal it yet! *) + + type nodelist + let nodelist : nodelist structure typ = structure "nodelist" + let nodelist_next = field nodelist "next" (ptr nodelist) + let nodelist_n = field nodelist "n" (ptr node) + let () = seal nodelist + + type ncmd + + let ncmd : ncmd structure typ = structure "ncmd" + let ncmd_type = field ncmd "type" int + let ncmd_linno = field ncmd "linno" int + let ncmd_assign = field ncmd "assign" (ptr node) + let ncmd_args = field ncmd "args" (ptr node) + let ncmd_redirect = field ncmd "redirect" (ptr node) + let () = seal ncmd + + let node_ncmd = field node "ncmd" ncmd + + type npipe + + let npipe : npipe structure typ = structure "npipe" + let npipe_type = field npipe "type" int + let npipe_backgnd = field npipe "backgnd" int + let npipe_cmdlist = field npipe "cmdlist" (ptr nodelist) + let () = seal npipe + + let node_npipe = field node "npipe" npipe + + type nredir + + let nredir : nredir structure typ = structure "nredir" + let nredir_type = field nredir "type" int + let nredir_linno = field nredir "linno" int + let nredir_n = field nredir "n" (ptr node) + let nredir_redirect = field nredir "redirect" (ptr node) + let () = seal nredir + + let node_nredir = field node "nredir" nredir + + type nbinary + + let nbinary : nbinary structure typ = structure "nbinary" + let nbinary_type = field nbinary "type" int + let nbinary_ch1 = field nbinary "ch1" (ptr node) + let nbinary_ch2 = field nbinary "ch2" (ptr node) + let () = seal nbinary + + let node_nbinary = field node "nbinary" nbinary + + type nif + + let nif : nif structure typ = structure "nif" + let nif_type = field nif "type" int + let nif_test = field nif "test" (ptr node) + let nif_ifpart = field nif "ifpart" (ptr node) + let nif_elsepart = field nif "elsepart" (ptr node) + let () = seal nif + + let node_nif = field node "nif" nif + + type nfor + + let nfor : nfor structure typ = structure "nfor" + let nfor_type = field nfor "type" int + let nfor_linno = field nfor "linno" int + let nfor_args = field nfor "args" (ptr node) + let nfor_body = field nfor "body" (ptr node) + let nfor_var = field nfor "var" string + let () = seal nfor + + let node_nfor = field node "nfor" nfor + + type ncase + + let ncase : ncase structure typ = structure "ncase" + let ncase_type = field ncase "type" int + let ncase_linno = field ncase "linno" int + let ncase_expr = field ncase "expr" (ptr node) + let ncase_cases = field ncase "cases" (ptr node) + let () = seal ncase + + let node_ncase = field node "ncase" ncase + + type nclist + + let nclist : nclist structure typ = structure "nclist" + let nclist_type = field nclist "type" int + let nclist_next = field nclist "next" (ptr node) + let nclist_pattern = field nclist "pattern" (ptr node) + let nclist_body = field nclist "body" (ptr node) + let () = seal nclist + + let node_nclist = field node "nclist" nclist + + type ndefun + + let ndefun : ndefun structure typ = structure "ndefun" + let ndefun_type = field ndefun "type" int + let ndefun_linno = field ndefun "linno" int + let ndefun_text = field ndefun "text" string + let ndefun_body = field ndefun "body" (ptr node) + let () = seal ndefun + + let node_ndefun = field node "ndefun" ndefun + + type narg + + let narg : narg structure typ = structure "narg" + let narg_type = field narg "type" int + let narg_next = field narg "next" (ptr node) + let narg_text = field narg "text" string + let narg_backquote = field narg "backquote" (ptr nodelist) + let () = seal narg + + let node_narg = field node "narg" narg + + type nfile + + let nfile : nfile structure typ = structure "nfile" + let nfile_type = field nfile "type" int + let nfile_next = field nfile "next" (ptr node) + let nfile_fd = field nfile "fd" int + let nfile_fname = field nfile "fname" (ptr node) + let nfile_expfname = field nfile "expfname" string + let () = seal nfile + + let node_nfile = field node "nfile" nfile + + type ndup + + let ndup : ndup structure typ = structure "ndup" + let ndup_type = field ndup "type" int + let ndup_next = field ndup "next" (ptr node) + let ndup_fd = field ndup "fd" int + let ndup_dupfd = field ndup "dupfd" int + let ndup_vname = field ndup "vname" (ptr node) + let () = seal ndup + + let node_ndup = field node "ndup" ndup + + type nhere + + let nhere : nhere structure typ = structure "nhere" + let nhere_type = field nhere "type" int + let nhere_next = field nhere "next" (ptr node) + let nhere_fd = field nhere "fd" int + let nhere_doc = field nhere "doc" (ptr node) + let () = seal nhere + + let node_nhere = field node "nhere" nhere + + type nnot + + let nnot : nnot structure typ = structure "nnot" + let nnot_type = field nnot "type" int + let nnot_com = field nnot "com" (ptr node) + let () = seal nnot + + let node_nnot = field node "nnot" nnot + let () = seal node + +end diff --git a/src/var.c b/src/var.c index 0d7e1db..0ebab25 100644 --- a/src/var.c +++ b/src/var.c @@ -86,14 +86,14 @@ struct var varinit[] = { #if ATTY { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "ATTY\0", 0 }, #endif - { 0, VSTRFIXED|VTEXTFIXED, defifsvar, 0 }, + { 0, VSTRFIXED|VTEXTFIXED, defifsvar, changeifs }, { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAIL\0", changemail }, { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAILPATH\0", changemail }, { 0, VSTRFIXED|VTEXTFIXED, defpathvar, changepath }, { 0, VSTRFIXED|VTEXTFIXED, "PS1=$ ", 0 }, { 0, VSTRFIXED|VTEXTFIXED, "PS2=> ", 0 }, { 0, VSTRFIXED|VTEXTFIXED, "PS4=+ ", 0 }, - { 0, VSTRFIXED|VTEXTFIXED, defoptindvar, getoptsreset }, + { 0, VSTRFIXED|VTEXTFIXED|VNOFUNC, defoptindvar, getoptsreset }, #ifdef WITH_LINENO { 0, VSTRFIXED|VTEXTFIXED, linenovar, 0 }, #endif @@ -107,7 +107,7 @@ STATIC struct var *vartab[VTABSIZE]; STATIC struct var **hashvar(const char *); STATIC int vpcmp(const void *, const void *); -STATIC struct var **findvar(struct var **, const char *); +STATIC struct var **findvar(const char *); /* * Initialize the varable symbol tables and import the environment @@ -125,7 +125,7 @@ INIT { char **envp; static char ppid[32] = "PPID="; const char *p; - struct stat st1, st2; + struct stat64 st1, st2; initvar(); for (envp = environ ; *envp ; envp++) { @@ -143,7 +143,7 @@ INIT { p = lookupvar("PWD"); if (p) - if (*p != '/' || stat(p, &st1) || stat(".", &st2) || + if (*p != '/' || stat64(p, &st1) || stat64(dotdir, &st2) || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) p = 0; setpwd(p, 0); @@ -154,6 +154,10 @@ RESET { } #endif +static char *varnull(const char *s) +{ + return (strchr(s, '=') ?: nullstr - 1) + 1; +} /* * This routine initializes the builtin variables. It is called when the @@ -247,11 +251,12 @@ struct var *setvareq(char *s, int flags) { struct var *vp, **vpp; - vpp = hashvar(s); flags |= (VEXPORT & (((unsigned) (1 - aflag)) - 1)); - vpp = findvar(vpp, s); + vpp = findvar(s); vp = *vpp; if (vp) { + unsigned bits; + if (vp->flags & VREADONLY) { const char *n; @@ -262,17 +267,14 @@ struct var *setvareq(char *s, int flags) n); } - if (flags & VNOSET) - goto out; - - if (vp->func && (flags & VNOFUNC) == 0) - (*vp->func)(strchrnul(s, '=') + 1); - if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); - if (((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) | - (vp->flags & VSTRFIXED)) == VUNSET) { + if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) != VUNSET) + bits = ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET); + else if ((vp->flags & VSTRFIXED)) + bits = VSTRFIXED; + else { *vpp = vp->next; ckfree(vp); out_free: @@ -281,10 +283,8 @@ struct var *setvareq(char *s, int flags) goto out; } - flags |= vp->flags & ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET); + flags |= vp->flags & bits; } else { - if (flags & VNOSET) - goto out; if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET) goto out_free; /* not found */ @@ -298,6 +298,9 @@ struct var *setvareq(char *s, int flags) vp->text = s; vp->flags = flags; + if (vp->func && (flags & VNOFUNC) == 0) + (*vp->func)(varnull(s)); + out: return vp; } @@ -311,7 +314,7 @@ lookupvar(const char *name) { struct var *v; - if ((v = *findvar(hashvar(name), name)) && !(v->flags & VUNSET)) { + if ((v = *findvar(name)) && !(v->flags & VUNSET)) { #ifdef WITH_LINENO if (v == &vlineno && v->text == linenovar) { fmtstr(linenovar+7, sizeof(linenovar)-7, "%d", lineno); @@ -324,7 +327,7 @@ lookupvar(const char *name) intmax_t lookupvarint(const char *name) { - return atomax(lookupvar(name) ?: nullstr, 0); + return atomax(lookupvar(name) ?: "0", 0); } @@ -418,7 +421,7 @@ exportcmd(int argc, char **argv) if ((p = strchr(name, '=')) != NULL) { p++; } else { - if ((vp = *findvar(hashvar(name), name))) { + if ((vp = *findvar(name))) { vp->flags |= flag; continue; } @@ -462,7 +465,6 @@ localcmd(int argc, char **argv) void mklocal(char *name, int flags) { struct localvar *lvp; - struct var **vpp; struct var *vp; INTOFF; @@ -475,8 +477,7 @@ void mklocal(char *name, int flags) } else { char *eq; - vpp = hashvar(name); - vp = *findvar(vpp, name); + vp = *findvar(name); eq = strchr(name, '='); if (vp == NULL) { if (eq) @@ -504,8 +505,8 @@ void mklocal(char *name, int flags) * Interrupts must be off. */ -void -poplocalvars(int keep) +static void +poplocalvars(void) { struct localvar_list *ll; struct localvar *lvp, *next; @@ -522,23 +523,7 @@ poplocalvars(int keep) next = lvp->next; vp = lvp->vp; TRACE(("poplocalvar %s\n", vp ? vp->text : "-")); - if (keep) { - int bits = VSTRFIXED; - - if (lvp->flags != VUNSET) { - if (vp->text == lvp->text) - bits |= VTEXTFIXED; - else if (!(lvp->flags & (VTEXTFIXED|VSTACK))) - ckfree(lvp->text); - } - - vp->flags &= ~bits; - vp->flags |= (lvp->flags & bits); - - if ((vp->flags & - (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET) - unsetvar(vp->text); - } else if (vp == NULL) { /* $- saved */ + if (vp == NULL) { /* $- saved */ memcpy(optlist, lvp->text, sizeof(optlist)); ckfree(lvp->text); optschanged(); @@ -546,12 +531,12 @@ poplocalvars(int keep) vp->flags &= ~(VSTRFIXED|VREADONLY); unsetvar(vp->text); } else { - if (vp->func) - (*vp->func)(strchrnul(lvp->text, '=') + 1); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); vp->flags = lvp->flags; vp->text = lvp->text; + if (vp->func && !(vp->flags & VNOFUNC)) + (*vp->func)(varnull(vp->text)); } ckfree(lvp); } @@ -586,7 +571,7 @@ struct localvar_list *pushlocalvars(int push) void unwindlocalvars(struct localvar_list *stop) { while (localvar_stack != stop) - poplocalvars(0); + poplocalvars(); } @@ -637,12 +622,7 @@ void unsetvar(const char *s) STATIC struct var ** hashvar(const char *p) { - unsigned int hashval; - - hashval = ((unsigned char) *p) << 4; - while (*p && *p != '=') - hashval += (unsigned char) *p++; - return &vartab[hashval % VTABSIZE]; + return &vartab[hashval(p) % VTABSIZE]; } @@ -656,19 +636,19 @@ hashvar(const char *p) int varcmp(const char *p, const char *q) { - int c, d; - - while ((c = *p) == (d = *q)) { - if (!c || c == '=') - goto out; + int c = *p, d = *q; + while (c == d) { + if (!c) + break; p++; q++; + c = *p; + d = *q; + if (c == '=') + c = '\0'; + if (d == '=') + d = '\0'; } - if (c == '=') - c = 0; - if (d == '=') - d = 0; -out: return c - d; } @@ -679,9 +659,11 @@ vpcmp(const void *a, const void *b) } STATIC struct var ** -findvar(struct var **vpp, const char *name) +findvar(const char *name) { - for (; *vpp; vpp = &(*vpp)->next) { + struct var **vpp; + + for (vpp = hashvar(name); *vpp; vpp = &(*vpp)->next) { if (varequal((*vpp)->text, name)) { break; } diff --git a/src/var.h b/src/var.h index cd0477f..f6fb320 100644 --- a/src/var.h +++ b/src/var.h @@ -48,7 +48,7 @@ #define VSTACK 0x10 /* text is allocated on the stack */ #define VUNSET 0x20 /* the variable is not set */ #define VNOFUNC 0x40 /* don't call the callback function */ -#define VNOSET 0x80 /* do not set variable - just readonly test */ +/* #define VNOSET 0x80 do not set variable - just readonly test */ #define VNOSAVE 0x100 /* when text is on the heap before setvareq */ @@ -148,12 +148,26 @@ int exportcmd(int, char **); int localcmd(int, char **); void mklocal(char *name, int flags); struct localvar_list *pushlocalvars(int push); -void poplocalvars(int); void unwindlocalvars(struct localvar_list *stop); int unsetcmd(int, char **); void unsetvar(const char *); int varcmp(const char *, const char *); +static inline unsigned int hashval(const char *p) +{ + unsigned int hashval; + + hashval = ((unsigned char) *p) << 4; + while (*p) { + hashval += (unsigned char) *p++; + if (*p == '=') + break; + } + + return hashval; +} + + static inline int varequal(const char *a, const char *b) { return !varcmp(a, b); } diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..a352601 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,12 @@ +*.a +*.cmxa +test +*.native +*~ +*.o +*.cmx +*.cmi +test.err +test.byte +test.cmo +ocaml_python.log diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..2a642e8 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,17 @@ +SCRIPTS_DIR=$(PASH_TOP) + +PYTHON_FILES=../python/rt.py $(addprefix ../libdash/,__init__.py _dash.py ast.py parser.py printer.py) +OCAML_FILES=../ocaml/rt.sh + +.PHONY : test clean + +test: test_ocaml_python.sh $(PYTHON_FILES) $(OCAML_FILES) + @echo "LOCAL TESTS" + @find tests -type f | while read f; do ./test_ocaml_python.sh "$$f"; done | tee ocaml_python.log + + @echo "PASH TESTS" + @find pash_tests -type f | while read f; do ./test_ocaml_python.sh "$$f"; done | tee -a ocaml_python.log + @cat ocaml_python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c + +clean : + rm -f ocaml_python.log diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..c3b7f08 --- /dev/null +++ b/test/README.md @@ -0,0 +1,7 @@ +There are three directories of tests: + + - `tests` are the original libdash tests, mostly handwritten + - `pash_tests` are shell scripts taken from [`pash`](https://github.com/binpash/pash) + - `failing` are shell scripts that aren't working right now (which is probably a bug) + +Both OCaml and Python bindings use the `round_trip.sh` to test round tripping. The `test_ocaml_python.sh` script compares the output from Python and OCaml. diff --git a/test/failing/.travis-ocaml.sh b/test/failing/.travis-ocaml.sh new file mode 100644 index 0000000..6730871 --- /dev/null +++ b/test/failing/.travis-ocaml.sh @@ -0,0 +1,327 @@ +## basic OCaml and opam installation + +full_apt_version () { + package=$1 + version=$2 + case "${version}" in + latest) echo -n "${package}" ;; + *) echo -n "${package}=" + apt-cache show "$package" \ + | sed -n "s/^Version: \(${version}\)/\1/p" \ + | head -1 + esac +} + +set -uex + +if [ "$TRAVIS_OS_NAME" = freebsd -a "${OPAM_VERSION+x}" = x ]; then + echo OPAM_VERSION not permitted for FreeBSD targets + exit 1 +fi + +OCAML_VERSION=${OCAML_VERSION:-latest} +SYS_OCAML_VERSION=4.05 +# Default opam is the latest release of opam 2 +OPAM_VERSION=${OPAM_VERSION:-2} +OPAM_INIT=${OPAM_INIT:-true} +OCAML_BETA=${OCAML_BETA:-disable} + +OPAM_LATEST_RELEASE=2.0.7 + +case ${TRAVIS_CPU_ARCH:-amd64} in + amd64|notset) OPAM_ARCH=x86_64;; + arm64) OPAM_ARCH=arm64;; + *) echo "'$TRAVIS_CPU_ARCH' architecture not currently supported"; exit 1;; +esac + +case $OPAM_VERSION in + 2|2.0) OPAM_VERSION=$OPAM_LATEST_RELEASE;; + 1.*) echo "Opam version '$OPAM_VERSION' is not supported"; exit 1;; +esac + +if [ "$TRAVIS_OS_NAME" = "osx" ] ; then + brew update &> /dev/null + BREW_OPAM_VERSION=$(brew info opam --json=v1 | sed -e 's/.*"versions":{[^}]*"stable":"//' -e 's/".*//') + if [ "$OPAM_VERSION" != "$BREW_OPAM_VERSION" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Ignored OPAM_VERSION=$OPAM_VERSION; interpreted as \"$BREW_OPAM_VERSION\"" >&2 + echo -e "[\e[0;31mWARNING\e[0m] opam 2 is installed via Homebrew" >&2 + set -x + fi + OPAM_VERSION="$BREW_OPAM_VERSION" +fi + +if [ "$OPAM_VERSION" != "$OPAM_LATEST_RELEASE" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Out-of-date opam $OPAM_VERSION requested" >&2 + echo -e "[\e[0;31mWARNING\e[0m] Latest release is $OPAM_LATEST_RELEASE" >&2 + set -x +fi + +if [ "${INSTALL_LOCAL+x}" = x ] ; then + if [ "$TRAVIS_OS_NAME" = osx -o "$TRAVIS_OS_NAME" = freebsd ] ; then + echo INSTALL_LOCAL not permitted for macOS and FreeBSD targets + exit 1 + fi + + if [ "${OPAM_SWITCH:=ocaml-system}" != ocaml-system ] ; then + echo "INSTALL_LOCAL requires OPAM_SWITCH=ocaml-system (or unset/null)" + exit 1 + fi +fi + +# the base opam repository to use for bootstrapping and catch-all namespace +BASE_REMOTE=${BASE_REMOTE:-git://github.com/ocaml/opam-repository} + +# whether we need a new gcc and binutils +UPDATE_GCC_BINUTILS=${UPDATE_GCC_BINUTILS:-"0"} + +# Install Xenial remotes +UBUNTU_XENIAL=${UBUNTU_XENIAL:-"0"} + +# Install XQuartz on OSX +INSTALL_XQUARTZ=${INSTALL_XQUARTZ:-"false"} + +APT_UPDATED=0 + +add_ppa () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + APT_UPDATED=0 + sudo add-apt-repository --yes ppa:$1 + fi +} + +apt_install () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + if [ "$APT_UPDATED" -eq 0 ] ; then + APT_UPDATED=1 + sudo apt-get update -qq + fi + sudo apt-get install --no-install-recommends -y "$@" + fi +} + +install_ocaml () { + apt_install \ + ocaml ocaml-base ocaml-native-compilers ocaml-compiler-libs \ + ocaml-interp ocaml-base-nox ocaml-nox +} + +install_opam2 () { + case $TRAVIS_OS_NAME in + freebsd) + # Opam does not have any ready to use binaries for FreeBSD + sudo pkg install -qy ocaml-opam ;; + linux) + case $TRAVIS_DIST in + precise|trusty|xenial) + # Required for bubblewrap (supports arm64 & amd64) + add_ppa avsm/ppa ;; + esac + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + install_ocaml + fi + apt_install bubblewrap + sudo wget https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-linux -O /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + osx) + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + brew install ocaml + fi + sudo curl -fsSL https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-macos -o /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + esac +} + +install_ppa () { + add_ppa $1 + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + sudo apt-get -qq update + APT_UPDATED=1 + apt_install \ + "$(full_apt_version ocaml $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-native-compilers $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-compiler-libs $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-interp $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base-nox $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-nox $SYS_OCAML_VERSION)" + fi + apt_install opam +} + +install_on_freebsd () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +install_on_linux () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac + + XENIAL="deb mirror://mirrors.ubuntu.com/mirrors.txt xenial main restricted universe" + + if [ "$UPDATE_GCC_BINUTILS" != "0" ] ; then + echo "installing a recent gcc and binutils (mainly to get mirage-entropy-xen working!)" + sudo add-apt-repository "${XENIAL}" + sudo add-apt-repository --yes ppa:ubuntu-toolchain-r/test + sudo apt-get -qq update + sudo apt-get install -y gcc-5 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 90 + sudo add-apt-repository -r "${XENIAL}" + fi + + if [ "$UBUNTU_XENIAL" != "0" ] ; then + echo "Adding Ubuntu Xenial mirrors" + sudo add-apt-repository "${XENIAL}" + sudo apt-get -qq update + APT_UPDATED=1 + fi + + if [ "${INSTALL_LOCAL:=0}" != 0 ] ; then + ( set +x; echo -en "travis_fold:start:build.ocaml\r" ) 2>/dev/null + echo "Building a local OCaml; this may take a few minutes..." + wget "http://caml.inria.fr/pub/distrib/ocaml-${OCAML_FULL_VERSION%.*}/ocaml-$OCAML_FULL_VERSION.tar.gz" + tar -xzf "ocaml-$OCAML_FULL_VERSION.tar.gz" + cd "ocaml-$OCAML_FULL_VERSION" + ./configure -prefix /usr/local ${OCAML_CONFIGURE_ARGS:=--with-debug-runtime} + make world.opt + sudo make install + cd .. + rm -rf "ocaml-$OCAML_FULL_VERSION" + ( set +x; echo -en "travis_fold:end:build.ocaml\r" ) 2>/dev/null + fi +} + +install_on_osx () { + case $INSTALL_XQUARTZ in + true) + curl -OL "http://xquartz.macosforge.org/downloads/SL/XQuartz-2.7.6.dmg" + sudo hdiutil attach XQuartz-2.7.6.dmg + sudo installer -verbose -pkg /Volumes/XQuartz-2.7.6/XQuartz.pkg -target / + ;; + esac + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.0; + OPAM_SWITCH=${OPAM_SWITCH:-ocaml-system}; + brew install ocaml; + install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +case $TRAVIS_OS_NAME in + freebsd) install_on_freebsd ;; + osx) install_on_osx ;; + linux) install_on_linux ;; +esac + +ocaml_package=ocaml-base-compiler +if [ "$OCAML_BETA" = "enable" ]; then + ocaml_package=ocaml-variants +fi + +OPAM_SWITCH=${OPAM_SWITCH:-$ocaml_package.$OCAML_FULL_VERSION} + +PACKAGES="$OPAM_SWITCH" +case "$OCAML_VERSION" in + 3.12|4.00|4.01|4.02|4.03|4.04|4.05|4.06) + PACKAGES="$PACKAGES,ocaml-secondary-compiler";; +esac + +export OPAMYES=1 + +case $OPAM_INIT in + true) + opam init -a --bare "$BASE_REMOTE" + opam_repo_selection= + if [ "$OCAML_BETA" = "enable" ]; then + opam repo add --dont-select beta git://github.com/ocaml/ocaml-beta-repository.git + opam_repo_selection="--repo=default,beta" + fi + opam switch "$OPAM_SWITCH" || opam switch create $opam_repo_selection "$OPAM_SWITCH" --packages="$PACKAGES" + eval $(opam config env) + ;; +esac + +echo OCAML_VERSION=$OCAML_VERSION > .travis-ocaml.env +echo OPAM_SWITCH=$OPAM_SWITCH >> .travis-ocaml.env + +# Temporary fix an issue with opam-depext < 1.1.3 on FreeBSD. +# See https://github.com/ocaml/opam-depext/pull/123 +echo export ASSUME_ALWAYS_YES=YES >> .travis-ocaml.env + +if [ -x "$(command -v ocaml)" ]; then + # "|| true" is a temp fix for OCaml 4.12: https://github.com/ocaml/ocaml/pull/9798 + ocaml -version || true +else + echo "OCaml is not yet installed" +fi + +opam --version +opam --git-version diff --git a/test/failing/1.tomp3.sh b/test/failing/1.tomp3.sh new file mode 100755 index 0000000..9752159 --- /dev/null +++ b/test/failing/1.tomp3.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# tag: wav-to-mp3 +set -e + +IN=${WAV:-$PASH_TOP/evaluation/benchmarks/aliases/input/wav} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/aliases/input/out} + +find $IN -name '*.wav' | + xargs -n1 basename | + sed "s;\(.*\);-i $IN/\1 -ab 192000 $OUT/\1.mp3;" | + xargs -L1 ffmpeg -y -loglevel quiet -hide_banner diff --git a/test/failing/3.resiz.sh b/test/failing/3.resiz.sh new file mode 100755 index 0000000..42c0354 --- /dev/null +++ b/test/failing/3.resiz.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# tag: resize image +set -e + + +IN=${JPG:-$PASH_TOP/evaluation/benchmarks/aliases/input/jpg} +OUT=${OUT:-PASH_TOP/evaluation/benchmarks/aliases/input/out} + +find $IN -name "*.jpg" | + xargs -n1 basename | + sed "s;\(.*\);-resize 70% $IN/\1 $OUT/\1.70;" | + xargs -L1 convert diff --git a/test/failing/append_nl_if_not.sh b/test/failing/append_nl_if_not.sh new file mode 100755 index 0000000..f8142c0 --- /dev/null +++ b/test/failing/append_nl_if_not.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +## Adds a newline at the end of a file if it doesn't already end in a newline. +## Used to prepare inputs for PaSh. + +if [ -z "$1" ]; then + echo "No file argument given!" + exit 1 +else + if [ ! -f "$1" ]; then + echo "File $1 doesn't exist!" + exit 1 + else + tail -c 1 "$1" | od -ta | grep -q nl + if [ $? -eq 1 ] + then + echo >> "$1" + fi + fi +fi diff --git a/test/failing/array.sh b/test/failing/array.sh new file mode 100644 index 0000000..06647a1 --- /dev/null +++ b/test/failing/array.sh @@ -0,0 +1 @@ +p=${cmd_array[$i]} diff --git a/test/failing/async.sh b/test/failing/async.sh new file mode 100755 index 0000000..0e1d484 --- /dev/null +++ b/test/failing/async.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# A script that showcases truly async pipes (via fs) +# Note to self: remember | { lambda } + +fz () { sleep $0; echo "1-"$0; } + +export -f fz + +: > f1 + +tail -f ./f1 | xargs -n 1 bash -c 'fz "$@"' & + +# {seq 5; echo 'yay!' >&2 ; } > ./f1 +seq 5 > ./f1 +echo 'yay!' diff --git a/test/failing/auto-split.sh b/test/failing/auto-split.sh new file mode 100755 index 0000000..d6b5b53 --- /dev/null +++ b/test/failing/auto-split.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +input="$1" +shift +outputs=("$@") +n_outputs="$#" + +# Set a default DISH_TOP in this directory if it doesn't exist +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +# generate a temp file +temp="$(mktemp -u /tmp/pash_XXXXXXXXXX)" + +cat "$input" > "$temp" +total_lines=$(wc -l "$temp" | cut -f 1 -d ' ') +batch_size=$((total_lines / n_outputs)) +# echo "Input: $input" +# echo "Ouputs: $outputs" +# echo "Number of outputs: $n_outputs" +# echo "Total Lines: $total_lines" +# echo "Batch Size: $batch_size" + +cleanup() +{ + kill -SIGPIPE "$split_pid" > /dev/null 2>&1 +} +trap cleanup EXIT + + +# echo "$PASH_TOP/evaluation/tools/split $input $batch_size $outputs" +"$PASH_TOP"/runtime/split "$temp" "$batch_size" "${outputs[@]}" & +split_pid=$! +wait "$split_pid" +rm -f "$temp" diff --git a/test/failing/bio.sh b/test/failing/bio.sh new file mode 100755 index 0000000..7c615bc --- /dev/null +++ b/test/failing/bio.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# https://www.biostars.org/p/43677/ +# https://github.com/h3abionet/h3agatk +# https://docs.google.com/document/d/1siCZrequI4plggz3ho351NnX57CoyCJl9GWp3azlxfU/edit# +bwa mem -M -p -t [num_threads] \ + -R "@RG\tID:1\tPL:ILLUMINA\tPU:pu\tLB:group1\tSM:SAMPLEID" \ + [reference_fasta] \ + [input_fastq] > [output] + +bwa mem genome.fa reads.fastq | samtools sort -o output.bam - + +# https://www.biostars.org/p/43677/ +bwa aln -t 4 ./hg19.fasta ./s1_1.fastq > ./s1_1.sai +bwa aln -t 4 ./hg19.fasta ./s1_2.fastq > ./s1_2.sai +bwa sampe ./hg19.fasta ./s1_1.sai ./s1_2.sai ./s1_1.fastq ./s1_2.fastq | + samtools view -Shu - | + samtools sort - - | + samtools rmdup -s - - | + tee s1_sorted_nodup.bam | + bamToBed > s1_sorted_nodup.bed + +# 4 cores, -M is for Picard compatibility +bwa mem -M -t 4 ./hg19.fasta ./s1_1.fastq ./s1_2.fastq > s1.sam + +samtools merge - *.bam | +# tee merged.bam | + samtools rmdup - - | +# tee rmdup.bam | + samtools mpileup - uf ./hg19.fasta - | + bcftools view -bvcg - | gzip > var.raw.bcf.gz + +bwa sampe ./hg19.fasta <(bwa aln -t 4 ./hg19.fasta ./s1_1.fastq) <(bwa aln -t 4 ./hg19.fasta ./s1_2.fastq) ./s1_1.fastq ./s1_2.fastq | samtools view -Shb /dev/stdin > s1.bam diff --git a/test/failing/bio2.sh b/test/failing/bio2.sh new file mode 100644 index 0000000..fb00a1b --- /dev/null +++ b/test/failing/bio2.sh @@ -0,0 +1,73 @@ +#### Ported #### +# https://dfzljdn9uc3pi.cloudfront.net/2013/203/1/Supplement_S2.pdf +set -e +cd $PASH_TOP/evaluation/benchmarks/bio/bio1/input/ +ls *.R1.fq > namelist +sed -i 's/.R1.fq//g' namelist +NAMES=( `cat "namelist" `) +mkdir -p assembly +# Trims raw files two different ways. +# First way removes any reads with substantial amounts of adapter, but does no +# quality trimming. These reads are used for assembly and must be uniform lengths +# Second way removes adapters and does quality trimming. These reads will be +# used for mapping. +for i in "${NAMES[@]}" +do + echo $i + Trim/trim_galore --paired -q 0 --length 90 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATATCGTATGCCGTCTTCTGCTTG -a2 GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCG --stringency 20 ${i}.R1.fq ${i}.R2.fq --output_dir ./assembly + Trim/trim_galore --paired -q 20 --length 20 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATATCGTATGCCGTCTTCTGCTTG -a2 GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCG --stringency 10 $i.R1.fq $i.R2.fq +done + +# Renaming trimmed files to simpler names +for i in "${NAMES[@]}" +do + mv $i.R1_val_1.fq $i.1.fq + mv $i.R2_val_2.fq $i.2.fq +done + +### Assembly ### +# These parameters could be further optimized for particular taxa +# First step concatenates reads into one forward and one reverse fastq file +cat ./assembly/*.R1_val_1.fq > forward +cat ./assembly/*.R2_val_2.fq > reverse +# Rainbow now clusters and assembles +rainbow/rainbow cluster -1 forward -2 reverse > cat.rbcluster.out 2> log +# we can add -f $1 but im not good with maths +rainbow/rainbow div -i cat.rbcluster.out -o cat.rbdiv.out +rainbow/rainbow merge -a -i cat.rbdiv.out -o cat.rbasm.out -N 1000 +perl rainbow/select_best_rbcontig.pl cat.rbasm.out > rainbowf +# Renames contigs to sequential numbers for simplicity +fastx_renamer -n COUNT -i rainbowf -o reference +## Mapping +# Use BWA to index reference +bwa-0.7.17/bwa index -a bwtsw reference +# Use BWA to map reads to reference. +### These parameters could be further optimized for particular taxa +for i in "${NAMES[@]}" +do + bwa-0.7.17/bwa mem reference $i.1.fq $i.2.fq -t 32 -a -T 10 > $i.sam +done +#Convert Sam to Bam and remove low quality, ambiguous mapping +for i in "${NAMES[@]}" +do + samtools view -bS -q15 $i.sam > $i.bam + samtools sort $i.bam -o $i +done +# Index reference for SAMtools +samtools faidx reference +# sort the Sample1.bam cause it sucks. The file needs to be sorted in that +# way before index is called +samtools sort -m 2G -@ 4 Sample1.bam -o lala +mv lala Sample1.bam +# index the bamfile +samtools index Sample1.bam +samtools mpileup -D -f reference *.bam >mpileup +# VarScan calls all sites with at least 5X coverage, a variant frequency above +# 10%, and 95% probability of being a SNP. Need varscan 2.3.5 version +java -jar VarScan.jar mpileup2snp mpileup --output-vcf --min-coverage 5 --strand-filter 0 --min-var-freq 0.1 --p-value 0.05 >SNPS.vcf +# VCFtools to filter raw SNPs and create a filtered vcf file (Final.recode.vcf) +# with SNPs that are present in every individual and that are not INDels +# can also work with --geno 0.99 flag but it needs vcftools 0.1.10 version +vcftools --vcf SNPS.vcf --out Final --recode --non-ref-af 0.001 --remove-indels +# VCFtools again to filter for SNPs that are present at an average of 10X coverage +vcftools --vcf Final.recode.vcf --out Final10X --recode --min-meanDP 10 diff --git a/test/failing/bio3.sh b/test/failing/bio3.sh new file mode 100644 index 0000000..3f2c5a1 --- /dev/null +++ b/test/failing/bio3.sh @@ -0,0 +1,10 @@ +# **Create the bowtie2 alignment database for the Arabidopsis genome** +# https://bioinformaticsworkbook.org/Appendix/GNUparallel/GNU_parallel_examples.html#gsc.tab=0 +cd $PASH_TOP/evaluation/bio/input/bio3 +bowtie2-build TAIR10_chr_all.fas tair +#theirs +time parallel -j2 "bowtie2 --threads 4 -x tair -k1 -q -1 {1} -2 {2} -S {1/.}.sam >& {1/.}.log" ::: fastqfiles/*_1.fastq.gz :::+ fastqfiles/*_2.fastq.gz +#ours +paste <(find . -name "*_1.fastq.gz") <(find . -name "*_2.fastq.gz") | xargs -n \ +2 sh -c 'bowtie2 --threads 4 -x tair -k1 -q -1 "$1" -2 "$2" -S fifth_R1.sam' argv0 + diff --git a/test/failing/buggy_comm_script.sh b/test/failing/buggy_comm_script.sh new file mode 100755 index 0000000..3cf15c9 --- /dev/null +++ b/test/failing/buggy_comm_script.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +mkfifo s1 s2 s3 s4 s5 + +PREV_IN=../evaluation/scripts/input/1M.txt +IN=/tmp/1M.txt + +cat $PREV_IN > $IN +echo "end" >> $IN + +cat $IN | grep "king" | tee s4 >s3 & +comm -23 $IN s3 > s1 & +comm -23 $IN s4 > s2 & +{ ../runtime/eager s2 s5 "/tmp/eager_intermediate_#file1" & } +cat s1 s5 > /tmp/buggy.out + +comm -23 <(cat $IN $IN) <(cat $IN | grep "king") > /tmp/seq.out + +rm s1 s2 s3 s4 s5 + +diff /tmp/buggy.out /tmp/seq.out diff --git a/test/failing/build_lib.sh b/test/failing/build_lib.sh new file mode 100644 index 0000000..136fede --- /dev/null +++ b/test/failing/build_lib.sh @@ -0,0 +1,75 @@ +## +## A library of shell functions that can be used to +## easily create a building/dependency installing/input +## downloading scripts. +## + + + +## +## This function checks if all the files in the arguments exist +## It returns 0 if all files exist, or 1 otherwise +## +files_exist_done_check() +{ + for file in "$@"; do + if [ ! -f "$file" ]; then + return 1 + fi + done + return 0 +} + +## +## This function checks if number of files in a sequence of directories +## is correct. +## Returns 0 if number is correct, or 1 otherwise +## +number_of_files_in_dir() +{ + local expected_number=$1 + local actual_number=$(ls "${@:2}" | wc -l) + if [ $expected_number -eq $actual_number ]; then + return 0 + else + return 1 + fi +} + +## +## This function executes a single idempotent step only if its check fails +## +## Requirements: +## - The step needs to be idempotent +## - The check needs to also check file sizes if there is concern of non-idempotence or failed download +## +execute_step() +{ + local step_fun=$1 + local step_done_check_fun=$2 + local step_desc=${3:-"Execution step"} + + # shellcheck disable=SC2086 + if ! eval $step_done_check_fun; then + echo "$step_desc is not done, executing..." + # shellcheck disable=SC2086 + eval $step_fun + # shellcheck disable=SC2086 + eval $step_done_check_fun || { echo "ERROR: $step_desc failed!"; exit 1; } + fi + echo "$step_desc completed." +} + +## Issues: +## +## - An overarching problem is that these take time in general, +## and therefore testing them out is not really feasible. +## - Another problem is that by doing that manually, +## we cannot get completely fine-grained. For example, we could +## only copy the missing file _a la_ Rattle, instead of running +## the whole step. +## - Another problem is that idempotence checking is hard to do manually. +## - Another issue is that generating the checks is cumbersome and error-prone. +## Users need to think whether they need file_exists/number_of_files/size checks, +## and if they are downloading, they need to first download and then determine the check. +## \ No newline at end of file diff --git a/test/failing/ci-perf.sh b/test/failing/ci-perf.sh new file mode 100755 index 0000000..e613263 --- /dev/null +++ b/test/failing/ci-perf.sh @@ -0,0 +1,163 @@ +#! /usr/bin/env bash + +# Run performance tests + +main() { + set -Eex; + + local pash_d="$(get_pash_dir)"; + + cd "$pash_d"; + git fetch; + local initial_revision="$(get_revision HEAD)"; + local latest_main_revision="$(get_revision main)"; + local revision="${1:-$latest_main_revision}"; + + local output_dir="${2:-/tmp/results}"; + local output_revision_directory="${output_dir}/$revision"; + echo "Will write to $output_revision_directory"; + + # For reproducibility. + trap "git checkout '$initial_revision'" EXIT + + # Use subshell for new working directory and + # visual distinction in `set -e` + echo "Running performance tests for $revision" + (git checkout "$revision" && \ + build_pash_runtime && \ + run_performance_test_suites); + + mkdir -p "$output_revision_directory"; + cp -r "$pash_d/evaluation/results/." "$output_revision_directory/" + + # The code to build the summary file might not be in the commit + # used to run the tests. + git checkout "$latest_main_revision"; + + echo "Summarizing results"; + local eurosys_tests='bigrams,diff,minimal_grep,minimal_sort,set-diff,spell,topn,wf' + summarize_perf_suite "EuroSys One-liners" \ + "$revision" \ + "${output_revision_directory}/eurosys_small" \ + "$eurosys_tests" \ + "2" \ + "distr_auto_split" \ + "${output_dir}/summary_eurosys_small" + + # Generate index page so others can review available summaries + # through web server. + cd "${output_dir}" + ls summary_* > index; + cd - +} + + +build_pash_runtime() { + make -C "$(get_pash_dir)/runtime"; +} + +get_pash_dir() { + local here="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"; + git -C "$here" rev-parse --show-toplevel; +} + +get_revision() { + git rev-parse --short "${1:-HEAD}"; +} + +run_performance_test_suites() { + local pash_d=$(get_pash_dir); + cd "$pash_d/evaluation/eurosys"; + ./execute_eurosys_one_liners.sh -s + # ./execute_unix_benchmarks.sh -l + # ./execute_baseline_sort.sh + # ./execute_max_temp_dish_evaluation.sh + # ./execute_web_index_dish_evaluation.sh +} + +summarize_perf_suite() { + local heading="$1"; + local revision="$2"; + local input_dir="$3"; + local tests="$4"; + local width="$5"; + local variant="$6"; + local summary_file="$7"; + local cell_fmt='%-20s'; + + IFS=',' read -ra test_array <<< "$tests"; + + # When starting a summary file, include a header. + if [[ ! -f "$summary_file" ]]; then + ( + printf "$heading (width=$width variant=$variant)\n"; + printf "$cell_fmt" 'revision'; + for t in "${test_array[@]}"; do + printf "$cell_fmt" "$t"; + done; + printf '\n'; + ) > "$summary_file"; + fi + + # Add a row of test data. + printf "$cell_fmt" "$revision" >> "$summary_file"; + for t in "${test_array[@]}"; do + local perf_file="${input_dir}/${t}_${width}_${variant}.time"; + echo "Summarizing $perf_file"; + printf "$cell_fmt" $(summarize_perf_file "$perf_file") >> "$summary_file"; + done + printf '\n' >> "$summary_file"; +} + +print_pash_execution_time() { + LC_NUMERIC='C' \ + cat "$1" | \ + grep 'Execution time: ' | \ + sed 's/[^0-9\.]//g' | \ + awk '{s+=sprintf("%f", $1)}END{printf "%.4f",s}'; +} + +print_user_time() { + local time_string="$(egrep 'user[^m]+m[0-9\.]+s' "$1" | sed 's/^[^0-9]+//g')"; + local seconds="$(echo "$time_string" | sed -nr 's/.*m([^s]+)s/\1/p')" + local minutes="$(echo "$time_string" | sed -nr 's/^[^0-9]+([0-9\.]+)m.*/\1/p')"; + echo "scale=4; ($minutes * 60) + $seconds" | bc; +} + +summarize_perf_file() { + local perf_file="$1"; + read -a data < <(split_perf_file_name "$perf_file"); + + local test="${data[0]}"; + local width="${data[1]}"; + local variant="${data[2]}"; + + if [[ "$variant" == 'seq' ]]; then + printf "%ss" "$(print_user_time "$1")"; + elif [[ -f "$(make_perf_file_name "$test" "$width" "seq")" ]]; then + local ptime="$(print_pash_execution_time "$1")"; + local utime="$(print_user_time "$1")"; + printf "%ss,x%s" "$ptime" "$(echo "scale=4; $utime / $ptime" | bc)"; + else + print_pash_execution_time "$1"; + fi +} + +split_perf_file_name() { + if [[ "$(basename $1)" =~ (.*)_([0-9]+)_(.*).time$ ]]; then + echo "${BASH_REMATCH[@]:1}"; + return 0 + else + return 1 + fi +} + +make_perf_file_name() { + local name="$1"; + local width="$2"; + local variant="$3"; + echo "${name}_${width}_${variant}.time"; +} + + +(return 0 2>/dev/null) || main "$@" diff --git a/test/failing/ci.sh b/test/failing/ci.sh new file mode 100755 index 0000000..6454fd4 --- /dev/null +++ b/test/failing/ci.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +## +# This runs the majority of the core CI job, including packaging the repo +# running tests. Do not add environment installation in this script, this should +# be done manually (both for security and convenience). No two process of this +# script can execute in parallel, nor can this process be safely interleaved +# with any process running `git` (such as pkg). The program webhook.js serves as +# a synchronization point; do not start this script if webhook.js is running on +# the same computer and accepting requests, but rather use webhook.js (as a +# daemon) to launch this script. Otherwise you run the risk of running into +# concurrency issues. See additional notes on webhook.js. +## + +set -ex + +# Placeholder for CI +REPORT_DIR=../../reports +C=5 +cd .. +PASH_TOP="$PWD" +cd - + +SMOOSH_RESULTS="" + +trim() { + tr -d '\n' | awk 'length > 40{$0 = substr($0, 1, 37) "..."} {print $0}' +} + +build_runtime() { + cd ../runtime + make + cd $PASH_TOP/scripts +} + +pash_tests() { + cd ../compiler + ./test_evaluation_scripts.sh | tee >(grep '^Summary' | cut -d ' ' -f2 > pash_tests.sum) + PASH_RESULTS=$(cat pash_tests.sum) + cd $PASH_TOP/scripts +} + +smoosh_tests() { + cd ../../smoosh + TEST_SHELL="$PASH_TOP/pa.sh --width 2 --log_file /tmp/log_file" make -C tests veryclean + TEST_SHELL="$PASH_TOP/pa.sh --width 2 --log_file /tmp/log_file" make -C tests | tee >(grep 'tests passed' | cut -d ' ' -f2 > smoosh_tests.sum) + SMOOSH_RESULTS=$(cat smoosh_tests.sum) + cd $PASH_TOP/scripts +} + +git pull + +# Vars used in report summary +REV=$(git rev-parse --short HEAD) +MSG="$(git log -1 --pretty=%B | trim | head -n 1)" +RES="fail" +TIME="0s" + +# Two report files +RF=$REPORT_DIR/$REV +SF=$REPORT_DIR/summary +ISF=$REPORT_DIR/summary.inv + +err_report() { + echo "Error on line $1" + FORMAT="%s %s %-40s %s %s\n" + SUM="$(printf "$FORMAT" "$(date '+%F;%T')" "$REV" "$MSG" "$RES" "$TIME")" + echo "$SUM" >> $SF +} + +stage() { + echo $(date '+%F %T') $REV $1 >> $RF +} + +cleanup() { + git clean -f +} + +trap 'err_report $LINENO' ERR +trap 'cleanup' EXIT + +# To respect invariants of stages +mkdir -p $REPORT_DIR ../../get + +echo $(date '+%F %T') $REV "Starting" > $RF +START_TIME=$(date +%s); +stage "Packaging PaSh" +./pkg.sh +stage "Building Runtime" +build_runtime >> $RF +stage "Running PaSh Tests" +pash_tests >> $RF +stage "Running Smoosh Tests" +smoosh_tests >> $RF +stage "Completing CI" +END_TIME=$(date +%s); + +RES="$(echo $PASH_RESULTS '|' $SMOOSH_RESULTS)" +TIME=$(echo $((END_TIME-START_TIME)) | awk '{print int($1/60)":"int($1%60)}') + +FORMAT="%s %s %-40s %s %ss\n" +SUM="$(printf "$FORMAT" "$(date '+%F;%T')" "$REV" "$MSG" "$RES" "$TIME")" +cat $SF | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' > $ISF +echo "$SUM" >> $ISF +cat $ISF | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' > $SF + + diff --git a/test/failing/dgsh_tee.sh b/test/failing/dgsh_tee.sh new file mode 100755 index 0000000..7fc992a --- /dev/null +++ b/test/failing/dgsh_tee.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +input=${1?"ERROR: dgsh-tee: No input file given"} +output=${2?"ERROR: dgsh-tee: No output file given"} +args=("${@:3}") + +# Set a default DISH_TOP in this directory if it doesn't exist +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} + +# TODO: Doable check if this is still needed. Turned off for distributed exection. +# PR https://github.com/binpash/pash/pull/495 might've resolved it. +# cleanup() +# { +# kill -SIGTERM $dgsh_tee_pid > /dev/null 2>&1 +# } +# trap cleanup EXIT + +# $PASH_TOP/runtime/dgsh-tee -i "$input" -o "$output" $args & +# dgsh_tee_pid=$! +# wait $dgsh_tee_pid +"$PASH_TOP"/runtime/dgsh-tee -i "$input" -o "$output" "${args[@]}" diff --git a/test/failing/driver.sh b/test/failing/driver.sh new file mode 100755 index 0000000..7f65a81 --- /dev/null +++ b/test/failing/driver.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Generates an executable script for running all parallel experiments +# ./driver.sh [parallelism_factor] + +set -e + +if [[ $(hostname) =~ '.*star$' ]]; +then + # {death,live,mem}star servers: + DIFF_STAT="diffstat" + PARTIAL_DIR=/dev/shm + # TODO: Try Disk vs. Memory-mapped FS + # PARTIAL_DIR="." + IN=${IN:-../scripts/input/100M.txt} +else + DIFF_STAT="wc -l" + PARTIAL_DIR="." + IN=${IN:-../scripts/input/10M.txt} +fi + +PRE="dish" +CREATE="touch" # or mkfifo +CPUs=${1:-$(nproc)} +OUT1=${OUT:-./out1.txt} +OUT2=${OUT:-./out2.txt} +SEQ=${2:-"./seq-grep"} # grep has 3 levels + +# divide by number of chunks in AWK +echo dividing input to $CPUs chunks +total_size=$(wc -l $IN | awk -F " " '{print $1}') +chunk_size=$((total_size / CPUs)) +split -l $chunk_size $IN $PRE-chunk- + +find . -maxdepth 1 -type p -delete + +echo '#!/bin/bash' > $PRE-execute.sh +chmod +x ./$PRE-execute.sh +echo "# This script is auto-generated by driver.sh" >> $PRE-execute.sh +echo "#seq script: time (cat $IN | $SEQ > $OUT)" >> $PRE-execute.sh + +# echo "set -x" >> $PRE-execute.sh + +echo creating $CPUs channels +counter=0 +for chunk in $PRE-chunk-*; do + # echo "mkfifo $PRE-channel-$((counter++))" >> $PRE-execute.sh + if [[ $CREATE == 'mkfifo' ]]; then + $CREATE $PARTIAL_DIR/$PRE-channel-$((counter++)) + fi +done + +counter=0 +for chunk in $PRE-chunk-*; do + if [[ $CREATE == 'touch' ]]; then + # echo 'Channel is persistent file, using `>` to create it' + echo "cat $chunk | $SEQ > $PARTIAL_DIR/$PRE-channel-$((counter++)) &" >> $PRE-execute.sh + else + # echo 'Channel is FIFO, using `>>` to append to it' + echo "cat $chunk | $SEQ >> $PARTIAL_DIR/$PRE-channel-$((counter++)) &" >> $PRE-execute.sh + fi +done + +# #FIXME: bash doesn't expand `*` in _numberic_ order (1, 10, 2..) affecting cat +# echo cat '$PARTIAL_DIR/$PRE-channel-* >>' $OUT2 >> $PRE-execute.sh +# # echo 'wait' >> $PRE-execute.sh + +echo 'wait' >> $PRE-execute.sh + +counter=0 +args="" +for chunk in $PRE-chunk-*; do + args="$args $PARTIAL_DIR/$PRE-channel-$((counter++))" +done +echo cat $args '>' $OUT2 >> $PRE-execute.sh + +echo Sequential Timing: +time (cat $IN | $SEQ > $OUT1) + +echo Parallel Timing: +time ./$PRE-execute.sh + +echo Result Diff: +diff $OUT1 $OUT2 | $DIFF_STAT + +find . -maxdepth 1 -type p -delete +rm $PARTIAL_DIR/$PRE-channel-* diff --git a/test/failing/execute_baseline_sort.sh b/test/failing/execute_baseline_sort.sh new file mode 100755 index 0000000..afa6275 --- /dev/null +++ b/test/failing/execute_baseline_sort.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## This sets up to what extent we run the evaluation. +## There are 2 levels: +## 1. Small input | --width 2, 16 +## 2. Big input | -- width 2, 4, 8, 16, 32, 64 +evaluation_level=1 + +while getopts 'slh' opt; do + case $opt in + s) evaluation_level=1 ;; + l) evaluation_level=2 ;; + h) echo "There are two possible execution levels:" + echo "option -s: Small input | --width 2, 16" + echo "option -l: Big input | -- width 2, 4, 8, 16, 32, 64" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +if [ "$evaluation_level" -eq 1 ]; then + echo "Executing small baseline sort evaluation..." + n_inputs=( + 2 + 16 + ) + env_suffix="small" + intermediary_prefix="small_" +elif [ "$evaluation_level" -eq 2 ]; then + echo "Executing large baseline sort evaluation..." + n_inputs=( + 2 + 4 + 8 + 16 + 32 + 64 + ) + env_suffix="" + intermediary_prefix="" +else + echo "Unrecognizable execution level: $evaluation_level" + exit 1 +fi + +eval_directory="$PASH_TOP/evaluation/" +intermediary_dir="$PASH_TOP/evaluation/intermediary/" +script_dir="${eval_directory}scripts/" +microbenchmarks_dir="$PASH_TOP/evaluation/microbenchmarks/" +results="${eval_directory}results/baseline_sort/" + +mkdir -p $results + +for n_in in "${n_inputs[@]}"; do + experiment="baseline_sort_${n_in}" + sort_parallel_script="${intermediary_dir}sort_${n_in}_seq.sh" + env_file="${intermediary_dir}sort_${n_in}_env.sh" + + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $script_dir "sort" $n_in $intermediary_dir $env_suffix + + . $env_file + export $(cut -d= -f1 $env_file) + + p_n_in="$(( $n_in * 2 ))" + experiment="baseline_sort_${intermediary_prefix}${p_n_in}" + echo "Executing sort with parallel flag for parallelism: ${p_n_in}" + { time /bin/bash $sort_parallel_script "${p_n_in}" > /tmp/seq_output ; } 2> >(tee "${results}${experiment}_parallel.time" >&2) + + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $microbenchmarks_dir "sort" $n_in $intermediary_dir $env_suffix + + exec_script="${intermediary_dir}sort_${n_in}_seq.sh" + experiment="baseline_sort_${intermediary_prefix}${n_in}" + + if [ "$n_in" -eq 2 ]; then + echo "Executing sort with bash" + { time /bin/bash $exec_script ; } 1> /tmp/bash_output 2> >(tee "${results}${experiment}_seq.time" >&2) + fi + + echo "Executing pash (no eager) on sort with --width ${n_in}" + { time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time --no_eager $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_pash_no_eager.time" >&2) + diff -s /tmp/seq_output /tmp/pash_output | head + + echo "Executing pash on sort with --width ${n_in}" + { time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_pash.time" >&2) + diff -s /tmp/seq_output /tmp/pash_output | head +done diff --git a/test/failing/execute_compile_evaluation_script.sh b/test/failing/execute_compile_evaluation_script.sh new file mode 100755 index 0000000..f1e18d7 --- /dev/null +++ b/test/failing/execute_compile_evaluation_script.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +execute_seq_flag=0 +eager_flag=0 +no_task_par_eager_flag=0 +auto_split_flag=0 +assert_compiler_success="" + +while getopts 'senpac' opt; do + case $opt in + s) execute_seq_flag=1 ;; + e) eager_flag=1 ;; + n) no_task_par_eager_flag=1 ;; + a) auto_split_flag=1 ;; + c) assert_compiler_success="--assert_compiler_success" ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +## We assume that each evaluation script has a sequential, a +## distributed, and an environment +microbenchmark=$1 +n_in=$2 +results_subdir=$3 +intermediary_prefix=$4 + +experiment="${microbenchmark}_${n_in}" + +eval_directory="$PASH_TOP/evaluation/" +directory="${eval_directory}/${4}intermediary/" +results="${eval_directory}results/${results_subdir}/" +prefix="${directory}${experiment}" +env_file="${prefix}_env.sh" +funs_file="${prefix}_funs.sh" +seq_script="${prefix}_seq.sh" +input_file="${prefix}.in" + +seq_output="${directory}/${microbenchmark}_seq_output" +pash_output="${directory}/${microbenchmark}_pash_output" + +echo "Environment:" +# cat $env_file +. $env_file +vars_to_export=$(cut -d= -f1 $env_file) +if [ ! -z "$vars_to_export" ]; then + export $vars_to_export +fi + +## Export necessary functions +if [ -f "$funs_file" ]; then + source $funs_file +fi + +## Redirect the input if there is an input file +stdin_redir="/dev/null" +if [ -f "$input_file" ]; then + stdin_redir="$(cat "$input_file")" + echo "Has input file: $stdin_redir" +fi + +## TODO: Extend this script to give input to some arguments from stdin. + +if [ "$execute_seq_flag" -eq 1 ]; then + echo "Sequential:" + cat $seq_script + cat $stdin_redir | { time /bin/bash $seq_script > $seq_output ; } 2> >(tee "${results}${experiment}_seq.time" >&2) +else + echo "Not executing sequential..." +fi + +## Save the configuration to restore it afterwards +auto_split_opt="--width 1" +config_path_opt="" + +if [ "$auto_split_flag" -eq 1 ]; then + echo "Distributed with auto-split:" + eager_opt="" + auto_split_opt="--width ${n_in}" + distr_result_filename="${results}${experiment}_distr_auto_split.time" +elif [ "$eager_flag" -eq 1 ]; then + echo "Distributed:" + eager_opt="" + distr_result_filename="${results}${experiment}_distr.time" +elif [ "$no_task_par_eager_flag" -eq 1 ]; then + echo "Distributed with naive (no-task-par) eager:" + eager_opt="" + distr_result_filename="${results}${experiment}_distr_no_task_par_eager.time" + + ## Change the configuration + config_path="/tmp/new-config.yaml" + config_path_opt="--config_path ${config_path}" + cat "$PASH_TOP/compiler/config.yaml" > ${config_path} + sed -i 's/runtime\/eager.sh/runtime\/eager-no-task-par.sh/g' "${config_path}" +else + echo "Distributed without eager:" + eager_opt="--no_eager" + distr_result_filename="${results}${experiment}_distr_no_eager.time" +fi + +cat $stdin_redir | { time python3 $PASH_TOP/compiler/pash.py -d 1 --speculation no_spec $assert_compiler_success $eager_opt $auto_split_opt $config_path_opt --output_time $seq_script ; } 1> $pash_output 2> >(tee "${distr_result_filename}" >&2) && +echo "Checking for equivalence..." && +diff -s $seq_output $pash_output | head | tee -a "${distr_result_filename}" >&2 diff --git a/test/failing/execute_eurosys_one_liners.sh b/test/failing/execute_eurosys_one_liners.sh new file mode 100755 index 0000000..4df30f8 --- /dev/null +++ b/test/failing/execute_eurosys_one_liners.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## This sets up to what extent we run the evaluation. +## There are 3 levels: +## 1. Small inputs | --width 2, 16 | Only full PaSh config +## 2. Small inputs | --width 2, 16 | All PaSh configs +## 3. Big inputs | -- width 2, 4, 8, 16, 32, 64 | All PaSh configs +## +## Note that for the small inputs there could be some variance with the results +## (especially with higher widths). +evaluation_level=1 + +while getopts 'smlh' opt; do + case $opt in + s) evaluation_level=1 ;; + m) evaluation_level=2 ;; + l) evaluation_level=3 ;; + h) echo "There are three possible execution levels:" + echo "option -s: Small inputs | --width 2, 16 | Only full PaSh config" + echo "option -m: Small inputs | --width 2, 16 | All PaSh configs" + echo "option -l: Big inputs | -- width 2, 4, 8, 16, 32, 64 | All PaSh configs" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +## TODO: Add a script that runs the parallel sort evaluation + +if [ "$evaluation_level" -eq 1 ]; then + echo "Executing small evaluation..." + n_inputs=( + 2 + 16 + ) + result_subdir="eurosys_small" + env_suffix="small" + intermediary_prefix="small_" + microbenchmarks=( + 'minimal_grep;-a' # EuroSys: nfa-regex + 'minimal_sort;-a' # EuroSys: sort + 'topn;-a' # EuroSys: top-n + 'wf;-a' # EuroSys: wf + 'spell;-a' # EuroSys: spell + 'diff;-a' # EuroSys: difference + 'bigrams;-a' # EuroSys: bi-grams + 'set-diff;-a' # EuroSys: set-difference + 'double_sort;-a' # EuroSys: sort-sort + 'shortest_scripts;-a' # EuroSys: shortest-scripts + ) +elif [ "$evaluation_level" -eq 2 ]; then + echo "Executing medium evaluation..." + n_inputs=( + 2 + 16 + ) + result_subdir="eurosys_small" + env_suffix="small" + intermediary_prefix="small_" + microbenchmarks=( + 'minimal_grep;-n;-a' # EuroSys: nfa-regex + 'minimal_sort;;-n;-a' # EuroSys: sort + 'topn;;-n;-a' # EuroSys: top-n + 'wf;;-n;-a' # EuroSys: wf + 'spell;-e;-a' # EuroSys: spell + 'diff;;-n;-a' # EuroSys: difference + 'bigrams;-e;-a' # EuroSys: bi-grams + 'set-diff;;-n;-a' # EuroSys: set-difference + 'double_sort;;-n;-e;-a' # EuroSys: sort-sort + 'shortest_scripts;;-n;-a' # EuroSys: shortest-scripts + ) +elif [ "$evaluation_level" -eq 3 ]; then + echo "Executing standard evaluation..." + n_inputs=( + 2 + 4 + 8 + 16 + 32 + 64 + ) + ## TODO: Maybe change the result_subdir for the full evaluation + result_subdir="eurosys_standard" + env_suffix="" + intermediary_prefix="" + microbenchmarks=( + 'minimal_grep;-n;-a' # EuroSys: nfa-regex + 'minimal_sort;;-n;-a' # EuroSys: sort + 'topn;;-n;-a' # EuroSys: top-n + 'wf;;-n;-a' # EuroSys: wf + 'spell;-e;-a' # EuroSys: spell + 'diff;;-n;-a' # EuroSys: difference + 'bigrams;-e;-a' # EuroSys: bi-grams + 'set-diff;;-n;-a' # EuroSys: set-difference + 'double_sort;;-n;-e;-a' # EuroSys: sort-sort + 'shortest_scripts;;-n;-a' # EuroSys: shortest-scripts + ) +else + echo "Unrecognizable execution level: $evaluation_level" + exit 1 +fi + +microbenchmarks_dir="$PASH_TOP/evaluation/microbenchmarks/" +intermediary_dir="$PASH_TOP/evaluation/${intermediary_prefix}intermediary/" +mkdir -p $intermediary_dir +mkdir -p "$PASH_TOP/evaluation/results/$result_subdir/" + +for microbenchmark_config in "${microbenchmarks[@]}"; do + IFS=";" read -r -a flags <<< "${microbenchmark_config}" + microbenchmark=${flags[0]} + echo "Executing: $microbenchmark" + # Execute the sequential script on the first run only + exec_seq="-s" + for n_in in "${n_inputs[@]}"; do + + ## Generate the intermediary script + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $microbenchmarks_dir $microbenchmark $n_in $intermediary_dir $env_suffix + + for flag in "${flags[@]:1}"; do + echo "Flag: ${flag}" + + ## Execute the intermediary script + "$PASH_TOP/evaluation/execute_compile_evaluation_script.sh" $exec_seq $flag "${microbenchmark}" "${n_in}" $result_subdir $intermediary_prefix > /dev/null 2>&1 + + ## Only run the sequential the first time around + exec_seq="" + done + done +done diff --git a/test/failing/execute_gnu_parallel_script.sh b/test/failing/execute_gnu_parallel_script.sh new file mode 100755 index 0000000..9a90a51 --- /dev/null +++ b/test/failing/execute_gnu_parallel_script.sh @@ -0,0 +1,52 @@ +#!/bin/bash + + +microbenchmark=$1 +n_in=$2 +results_subdir="gnu_parallel" + +experiment="${microbenchmark}_${n_in}" + +DISH_TOP=${DISH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +eval_directory="../evaluation/" +intermediary_directory="${eval_directory}/intermediary/" +results="${eval_directory}results/${results_subdir}/" +prefix="${intermediary_directory}${experiment}_gnu_parallel" + +mkdir -p results + +env_file="${prefix}_env.sh" +funs_file="${prefix}_funs.sh" +gnu_parallel_script="${prefix}.sh" + +gnu_parallel_scripts_dir="${eval_directory}/gnu_parallel_benchmarks/" +microbenchmarks_dir="${eval_directory}/microbenchmarks/" + +## Generate the intermediary gnu parallel scripts +python3 generate_gnu_parallel_intermediary_script.py "${gnu_parallel_scripts_dir}" "${microbenchmarks_dir}" \ + "${microbenchmark}" "${n_in}" "${intermediary_directory}" || +{ echo 'GNU parallel script generation failed' ; exit 1; } + +seq_output="${intermediary_directory}/${microbenchmark}_seq_output" +gnu_parallel_output="${intermediary_directory}/${microbenchmark}_gnu_parallel_output" + +echo "Environment:" +cat "$env_file" +. "$env_file" +export "$(cut -d= -f1 "$env_file")" + +## Export necessary functions +if [ -f "$funs_file" ]; then + source "$funs_file" +fi + +gnu_parallel_result_filename="${results}${experiment}_gnu_parallel.time" + +echo "GNU Parallel:" +cat "$gnu_parallel_script" +{ time /bin/bash "$gnu_parallel_script" > "$gnu_parallel_output" ; } 2> >(tee "$gnu_parallel_result_filename" >&2) + +echo "Checking for equivalence..." +diff -s "$seq_output" "$gnu_parallel_output" | tee -a "$gnu_parallel_result_filename" + diff --git a/test/failing/execute_max_temp_dish_evaluation.sh b/test/failing/execute_max_temp_dish_evaluation.sh new file mode 100755 index 0000000..0eefbc1 --- /dev/null +++ b/test/failing/execute_max_temp_dish_evaluation.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## There are two possible execution levels: +## option -s: end_year=2000 +## option -l: end_year=2004 (The EuroSys evaluation) +## option -e: Run extended (separate preprocessing from processing) (The EuroSys evaluation) +start_year=2000 +end_year=2000 # For the small evaluation +execute_separate_flag=0 # Whether to execute processing and preprocessing separately + +while getopts 'sleh' opt; do + case $opt in + s) end_year=2000 ;; + l) end_year=2004 ;; + e) execute_separate_flag=1 ;; + h) echo "There are three possible execution levels:" + echo "option -s: end_year=2000" + echo "option -l: end_year=2004 (The EuroSys evaluation)" + echo "option -e: Run extended (separate preprocessing from processing) (The EuroSys evaluation)" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + + +eval_dir="$PASH_TOP/evaluation/" +results_dir="${eval_dir}/results/" + + +echo "Running max-temp evaluation for years: $start_year-$end_year" + +max_temp_complete_script="${eval_dir}/scripts/max-temp-complete.sh" + +temp_dir=max_temp_tmp_results +mkdir -p $temp_dir + +seq_output="${temp_dir}/max_temp_seq_output" +pash_width_16_output="${temp_dir}/max_temp_pash_16_output" +seq_time="$results_dir/max-temp-complete-$start_year-$end_year-seq.time" +pash_width_16_time="$results_dir/max-temp-complete-$start_year-$end_year-16-pash.time" + +echo "Executing the complete max-temp script with bash..." +seq "$start_year" "$end_year" | { time /bin/bash $max_temp_complete_script > $seq_output ; } 2> >(tee "${seq_time}" >&2) + +echo "Executing the complete max-temp script with pash -w 16 (log in: ${temp_dir}/max_temp_pash_16_log)" +seq "$start_year" "$end_year" | { time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/max_temp_pash_16_log" --output_time $max_temp_complete_script ; } 1> "$pash_width_16_output" 2> >(tee "$pash_width_16_time" >&2) +echo "Checking for output equivalence..." +diff -s $seq_output $pash_width_16_output | head + +if [ "$execute_separate_flag" -eq 1 ]; then + echo "Extended: Executing preprocessing and processing separately" + + max_temp_preprocess_script="${eval_dir}/scripts/max-temp-preprocess.sh" + max_temp_process_script="${eval_dir}/scripts/max-temp-process.sh" + + seq_preprocess_time="$results_dir/max-temp-preprocess-$start_year-$end_year-seq.time" + pash_width_16_preprocess_time="$results_dir/max-temp-preprocess-$start_year-$end_year-16-pash.time" + seq_process_time="$results_dir/max-temp-process-$start_year-$end_year-seq.time" + pash_width_16_process_time="$results_dir/max-temp-process-$start_year-$end_year-16-pash.time" + preprocess_output="${temp_dir}/max-temp-preprocess-output" + + echo "Executing the preprocessing max-temp script with bash..." + seq "$start_year" "$end_year" | { time /bin/bash $max_temp_preprocess_script > $seq_output ; } 2> >(tee "${seq_preprocess_time}" >&2) + + echo "Executing the preprocessing max-temp script with pash -w 16 (log in: ${temp_dir}/pash_16_log)" + seq "$start_year" "$end_year" | { time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/pash_16_log" --output_time $max_temp_preprocess_script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_preprocess_time}" >&2) + ## This equivalence takes a very long time to check (uncomment with caution) + # echo "Checking for output equivalence..." + # diff -s $seq_output $pash_width_16_output | head + + ## Copy the sequential preprocess output to another file so that it doesn't get overwritten + echo "Copying intermediate file..." + split -n l/16 -d "$seq_output" ${preprocess_output}_16_ + + ## Export the input variable for the process script + export IN="${preprocess_output}_16_*" + + echo "Executing the processing max-temp script with bash..." + { time /bin/bash $max_temp_process_script > $seq_output ; } 2> >(tee "${seq_process_time}" >&2) + + echo "Executing the processing max-temp script with pash -w 16 (log in: ${temp_dir}/pash_16_log)" + { time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/pash_16_log" --output_time $max_temp_process_script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_process_time}" >&2) + echo "Checking for output equivalence..." + diff -s $seq_output $pash_width_16_output | head +fi diff --git a/test/failing/execute_web_index_dish_evaluation.sh b/test/failing/execute_web_index_dish_evaluation.sh new file mode 100755 index 0000000..921fd8b --- /dev/null +++ b/test/failing/execute_web_index_dish_evaluation.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## There are two possible execution levels: +## options -s: 1,000 urls (about 1.5 minutes in bash) +## options -l: 100,000 urls (a couple hours in bash) +input_number=1000 # About 1.5 minutes in bash +# input_number=100 # About 7 seconds in bash + +while getopts 'slh' opt; do + case $opt in + s) input_number=1000 ;; + l) input_number=100000 ;; + h) echo "There are two possible execution levels:" + echo "option -s: 1,000 urls (about 1.5 minutes in bash)" + echo "option -l: 100,000 urls (a couple hours in bash) (EuroSys evaluation)" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +eval_dir="$PASH_TOP/evaluation/" +directory="${eval_dir}/scripts/web-index/" +results_dir="${eval_dir}/results/" +input_dir="${HOME}/wikipedia/" + +export IN="$input_dir/index_h_${input_number}.txt" +export WIKI="${input_dir}" +export WEB_INDEX_DIR="${directory}" + +web_index_script="${eval_dir}/scripts/web-index.sh" + +temp_dir=web_index_tmp_results +mkdir -p "$temp_dir" + +seq_output="${temp_dir}/seq_output" +pash_width_2_output="${temp_dir}/pash_2_output" +pash_width_16_output="${temp_dir}/pash_16_output" +seq_time="$results_dir/web-index-${input_number}-seq.time" +pash_width_2_time="$results_dir/web-index-${input_number}-2-pash.time" +pash_width_16_time="$results_dir/web-index-${input_number}-16-pash.time" + +echo "Executing the script with bash..." +{ time /bin/bash $web_index_script > $seq_output ; } 2> >(tee "${seq_time}" >&2) + +echo "Executing the script with pash -w 2 (log in: ${temp_dir}/pash_2_log)" +{ time $PASH_TOP/pa.sh -w 2 --log_file "${temp_dir}/pash_2_log" --output_time $web_index_script ; } 1> "$pash_width_2_output" 2> >(tee "${pash_width_2_time}" >&2) +echo "Checking for output equivalence..." +diff -s $seq_output $pash_width_2_output | head + +echo "Executing the script with pash -w 16 (log in: ${temp_dir}/pash_16_log)" +{ time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/pash_16_log" --output_time $web_index_script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_time}" >&2) +echo "Checking for output equivalence..." +diff -s $seq_output $pash_width_16_output | head diff --git a/test/failing/exit_error.sh b/test/failing/exit_error.sh new file mode 100644 index 0000000..34c234c --- /dev/null +++ b/test/failing/exit_error.sh @@ -0,0 +1,10 @@ +( ( true ) 3>/dev/null/abc; echo $?; false); echo $? +({ true; } 3>/dev/null/abc; echo $?; false); echo $? +(for i in 1; do true; done 3>/dev/null/abc; echo $?; false); echo $? +(case x in (x) true ;; esac 3>/dev/null/abc; echo $?; false); echo $? +(if true; then true; fi 3>/dev/null/abc; echo $?; false); echo $? +(while false; do true; done 3>/dev/null/abc; echo $?; false); echo $? +(until true; do true; done 3>/dev/null/abc; echo $?; false); echo $? +(func() { true; } 3>/dev/null/abc && func; echo $?; false); echo $? +func() { true; }; (func 3>/dev/null/abc; echo $?; false); echo $? +(name_of_a_command_that_will_not_be_found; echo $?; false); echo $? \ No newline at end of file diff --git a/test/failing/genome-diff.sh b/test/failing/genome-diff.sh new file mode 100755 index 0000000..a269f9e --- /dev/null +++ b/test/failing/genome-diff.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Find differences between two genome sequences---a a paired Illumina sequencing +# read (FASTQ files) and an assembled reference genome from GenBank (e.g., +# Pasteurella multocida). The reads are aligned to the reference, and sorted by +# coordinate. Instead of saving the BAM file, we pipe it directly to a series of +# BCF tool steps. Note the use of -l 0 and -Ou to keep the piped data in an +# uncompressed form, to avoid repeated compression/decompression steps. The +# --min-MQ 60 ensures only uniquely mapped reads are used. The final filter step +# removes low quality variant calls, heterozygous calls (this is haploid +# bacteria), and any regions with less than 10 supporting reads. + +# Requires: samtools, minimap2, bcftools +# Data: http://ndr.md/data/bio/R1.fastq.gz http://ndr.md/data/bio/R2.fastq.gz http://ndr.md/data/bio/ref.fa + +# https://github.com/samtools/samtools/releases/latest +# https://github.com/lh3/minimap2 +# http://thegenomefactory.blogspot.com/2018/10/a-unix-one-liner-to-call-bacterial.html + +CPUS=1 +REF=./input/ref.fa +R1=./input/R1.fastq.gz +R2=./input/R2.fastq.gz +OUT=/dev/shm/out.txt + +BIO_TOOLS=~/biotools + +# These should be added to every script +export PATH="$PATH:$BIO_TOOLS/bcftools-1.9" +export PATH="$PATH:$BIO_TOOLS/samtools-1.9" +export PATH="$PATH:$BIO_TOOLS/htslib-1.9" +export PATH="$PATH:$BIO_TOOLS/minimap2-2.17_x64-linux" + +minimap2 -a -x sr -t "$CPUS" "$REF" "$R1" "$R2" | # align reads to the reference + samtools sort -l 0 --threads "$CPUS" | # sort reads by coordinate + bcftools mpileup -Ou -B --min-MQ 60 -f "$REF" - | # multi-way pileup producing genotype likelihoods + bcftools call -Ou -v -m - | # SNP/indel calling + bcftools norm -Ou -f "$REF" -d all - | # left-align and normalize indels + bcftools filter -Ov -e 'QUAL<40 || DP<10 || GT!="1/1"' | # removes low-quality variant calls, etc + bcftools stats | # produce VCF/BCF stats + grep '^SN' | # look for a starting pattern + cut -f3- > $OUT # only write third column diff --git a/test/failing/heredoc2.sh b/test/failing/heredoc2.sh new file mode 100644 index 0000000..115f361 --- /dev/null +++ b/test/failing/heredoc2.sh @@ -0,0 +1,6 @@ +for i in '#' +do + cat << EOF + x=\`printf '%s' \\$i\`; printf '%s\\n' "\$x" +EOF +done diff --git a/test/failing/incomplete-arith.sh b/test/failing/incomplete-arith.sh new file mode 100644 index 0000000..04be620 --- /dev/null +++ b/test/failing/incomplete-arith.sh @@ -0,0 +1,10 @@ +cat=1 +EOH=2 +echo $((cat <(true) + +echo "start"; ls -l . | grep '.sh' | wc -l; echo "..scripts found here" > $OUT +{ echo "start"; + echo $(ls -l .) | grep '.sh' | wc -l; + echo "..scripts found here" +} > $OUT + +{ ls -R ../ | sort -rn | uniq | head; } > /dev/null 2>&1 & + +tee >(wc -l >&2) < $( echo $OUT ) | gzip > $OUT.gz + +# "optional" AND and OR composition operators +[ -f 'pizza.123' ] && ( echo 'exists' >$OUT ) || { echo 'does not' >$OUT; } + +wait + diff --git a/test/failing/mk_meta.sh b/test/failing/mk_meta.sh new file mode 100755 index 0000000..22d1d18 --- /dev/null +++ b/test/failing/mk_meta.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -e + +LIB="$1" +: ${LIB:=$(opam var lib)/libdash} + +cat >META <&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +# echo "Mult by: $multiply_factor" + +temp_file="$(mktemp -u)" + +cat > temp_file + +for (( i = 0; i < $multiply_factor; i++ )); do + cat temp_file +done + +rm temp_file diff --git a/test/failing/pash_init_setup.sh b/test/failing/pash_init_setup.sh new file mode 100644 index 0000000..a2d41c9 --- /dev/null +++ b/test/failing/pash_init_setup.sh @@ -0,0 +1,238 @@ +# source the local pash config +source ~/.pash_init +## File directory +export RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") +## TODO: Is there a better way to do this? +export RUNTIME_LIBRARY_DIR="$RUNTIME_DIR/../runtime/" +export PASH_REDIR="&2" +export PASH_DEBUG_LEVEL=0 + +## Check flags +export pash_output_time_flag=1 +export pash_execute_flag=1 +export pash_speculation_flag=0 # By default there is no speculation +export pash_dry_run_compiler_flag=0 +export pash_assert_compiler_success_flag=0 +export pash_checking_speculation=0 +export pash_checking_log_file=0 +export pash_checking_debug_level=0 +export pash_avoid_pash_runtime_completion_flag=0 +export pash_profile_driven_flag=1 +export pash_daemon=1 +export pash_parallel_pipelines=0 +export pash_daemon_communicates_through_unix_pipes_flag=0 +export show_version=0 +export distributed_exec=0 + +for item in "$@" +do + if [ "$pash_checking_speculation" -eq 1 ]; then + export pash_checking_speculation=0 + if [ "no_spec" == "$item" ]; then + export pash_speculation_flag=0 + elif [ "quick_abort" == "$item" ]; then + ## TODO: Fix how speculation interacts with dry_run, assert_compiler_success + export pash_speculation_flag=1 + echo "$$: Error: Speculation quick-abort is currently unmaintained!" 1>&2 + echo "Exiting..." 1>&2 + exit 1 + else + echo "$$: Unknown value for option --speculation" 1>&2 + exit 1 + fi + fi + + if [ "$pash_checking_log_file" -eq 1 ]; then + export pash_checking_log_file=0 + export PASH_REDIR="$item" + fi + + if [ "$pash_checking_debug_level" -eq 1 ]; then + export pash_checking_debug_level=0 + export PASH_DEBUG_LEVEL=$item + fi + + # We output time always + # if [ "--output_time" == "$item" ]; then + # pash_output_time_flag=1 + # fi + if [ "--version" == "$item" ]; then + export show_version=1 + fi + if [ "--dry_run_compiler" == "$item" ]; then + export pash_dry_run_compiler_flag=1 + fi + + if [ "--assert_compiler_success" == "$item" ]; then + export pash_assert_compiler_success_flag=1 + fi + + if [ "--speculation" == "$item" ]; then + pash_checking_speculation=1 + fi + + if [ "--log_file" == "$item" ]; then + pash_checking_log_file=1 + fi + + if [ "--avoid_pash_runtime_completion" == "$item" ]; then + export pash_avoid_pash_runtime_completion_flag=1 + fi + + if [ "--profile_driven" == "$item" ]; then + export pash_profile_driven_flag=1 + fi + + if [ "-d" == "$item" ] || [ "--debug" == "$item" ]; then + pash_checking_debug_level=1 + fi + + if [ "--no_daemon" == "$item" ]; then + export pash_daemon=0 + fi + + if [ "--parallel_pipelines" == "$item" ]; then + export pash_parallel_pipelines=1 + fi + + if [ "--daemon_communicates_through_unix_pipes" == "$item" ]; then + export pash_daemon_communicates_through_unix_pipes_flag=1 + fi + + if [ "--distributed_exec" == "$item" ]; then + export distributed_exec=1 + fi +done + +## `pash_redir_output` and `pash_redir_all_output` are strictly for logging. +## +## They do not execute their arguments if there is no debugging. +if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then + pash_redir_output() + { + : + } + + pash_redir_all_output() + { + : + } + + pash_redir_all_output_always_execute() + { + > /dev/null 2>&1 "$@" + } + +else + if [ "$PASH_REDIR" == '&2' ]; then + pash_redir_output() + { + >&2 "$@" + } + + pash_redir_all_output() + { + >&2 "$@" + } + + pash_redir_all_output_always_execute() + { + >&2 "$@" + } + else + pash_redir_output() + { + >>"$PASH_REDIR" "$@" + } + + pash_redir_all_output() + { + >>"$PASH_REDIR" 2>&1 "$@" + } + + pash_redir_all_output_always_execute() + { + >>"$PASH_REDIR" 2>&1 "$@" + } + fi +fi + +export -f pash_redir_output +export -f pash_redir_all_output +export -f pash_redir_all_output_always_execute + + +if [ "$pash_daemon_communicates_through_unix_pipes_flag" -eq 1 ]; then + pash_communicate_daemon() + { + local message=$1 + pash_redir_output echo "Sending msg to daemon: $message" + echo "$message" > "$RUNTIME_IN_FIFO" + daemon_response=$(cat "$RUNTIME_OUT_FIFO") + pash_redir_output echo "Got response from daemon: $daemon_response" + echo "$daemon_response" + } + + pash_communicate_daemon_just_send() + { + local message=$1 + pash_redir_output echo "Sending msg to daemon: $message" + echo "$message" > "$RUNTIME_IN_FIFO" + } + + pash_wait_until_daemon_listening() + { + : + } +else + pash_communicate_daemon() + { + local message=$1 + pash_redir_output echo "Sending msg to daemon: $message" + daemon_response=$(echo "$message" | nc -U "$DAEMON_SOCKET") + pash_redir_output echo "Got response from daemon: $daemon_response" + echo "$daemon_response" + } + + pash_communicate_daemon_just_send() + { + pash_communicate_daemon "$1" + } + + pash_wait_until_daemon_listening() + { + ## Only wait for a limited amount of time. + ## If the daemon cannot start listening in ~ 1 second, + ## then it must have crashed or so. + i=0 + ## This is a magic number to make sure that we wait enough + maximum_retries=100 + ## For some reason, `nc -z` doesn't work on livestar (it always returns error) + ## and therefore we need to send something. + until echo "Daemon Start" 2> /dev/null | nc -U "$DAEMON_SOCKET" >/dev/null 2>&1 ; + do + ## TODO: Can we wait for the daemon in a better way? + sleep 0.01 + i=$((i+1)) + if [ $i -eq $maximum_retries ]; then + echo "Error: Maximum retries: $maximum_retries exceeded when waiting for daemon to bind to socket!" 1>&2 + echo "Exiting..." 1>&2 + exit 1 + fi + done + } +fi + +if [ "$distributed_exec" -eq 1 ]; then + pash_communicate_worker_manager() + { + local message=$1 + pash_redir_output echo "Sending msg to worker manager: $message" + manager_response=$(echo "$message" | nc -U "$DSPASH_SOCKET") + pash_redir_output echo "Got response from worker manager: $manager_response" + echo "$manager_response" + } +fi +export -f pash_communicate_daemon +export -f pash_communicate_daemon_just_send +export -f pash_wait_until_daemon_listening diff --git a/test/failing/pash_runtime.sh b/test/failing/pash_runtime.sh new file mode 100755 index 0000000..14ee3c1 --- /dev/null +++ b/test/failing/pash_runtime.sh @@ -0,0 +1,297 @@ +#!/bin/bash + +## +## High level design. +## +## (1) The `pash_runtime` should behave as a wrapper, saving all the necessary state: +## - previous exit code +## - previous set status +## - previous variables +## and then reverting to PaSh internal state +## +## (2) Then it should perform pash-internal work. +## +## (3) Then it should make sure to revert the exit code and `set` state to the saved values. +## +## (4) Then it should execute the inside script (either original or parallel) +## TODO: Figure out what could be different before (1), during (4), and after (7) +## +## (5) Then it save all necessary state and revert to pash-internal state. +## (At the moment this happens automatically because the script is ran in a subshell.) +## +## (6) Then it should do all left pash internal work. +## +## (7) Before exiting it should revert all exit state. +## +## Visually: +## +## -- bash -- | -- pash -- +## ... | +## \----(1)----\ +## | ... +## | (2) +## | ... +## /----(3)----/ +## ... | +## (4) | +## ... | +## +## (The rest of the steps happen only in debug mode) +## ... +## \----(5)----\ +## | ... +## | (6) +## | ... +## /----(7)----/ +## ... | + +## TODO: Make a list/properly define what needs to be saved at (1), (3), (5), (7) +## +## Necessary for pash: +## - PATH important for PaSh but might be changed in bash +## - IFS has to be kept default for PaSh to work +## +## Necessary for bash: +## - Last PID $! (TODO) +## - Last exit code $? +## - set state $- +## - File descriptors (TODO) +## - Loop state (?) Maybe `source` is adequate for this (TODO) +## - Traos (TODO) +## +## (maybe) TODO: After that, maybe we can create cleaner functions for (1), (3), (5), (7). +## E.g. we can have a correspondence between variable names and revert them using them + +## +## (1) +## + +## Store the previous exit status to propagate to the compiler +## export pash_previous_exit_status=$? +## The assignment now happens outside +export pash_previous_exit_status + +## Store the current `set` status to pash to the inside script +export pash_previous_set_status=$- + +pash_redir_output echo "$$: (1) Previous exit status: $pash_previous_exit_status" +pash_redir_output echo "$$: (1) Previous set state: $pash_previous_set_status" + +## Prepare a file with all shell variables +## +## This is only needed by PaSh to expand. +## +## TODO: Maybe we can get rid of it since PaSh has access to the environment anyway? +## TODO: Remove this call to pash_ptempfile_name.sh. Actually remove this file in general. +## PaSh should only generate temp files using $RANDOM$RANDOM$RANDOM +# pash_runtime_shell_variables_file="$($RUNTIME_DIR/pash_ptempfile_name.sh $distro)" +pash_runtime_shell_variables_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" +source "$RUNTIME_DIR/pash_declare_vars.sh" "$pash_runtime_shell_variables_file" +pash_redir_output echo "$$: (1) Bash variables saved in: $pash_runtime_shell_variables_file" + +## Abort script if variable is unset +pash_default_set_state="huB" + +## Revert the `set` state to not have spurious failures +pash_redir_output echo "$$: (1) Bash set state at start of execution: $pash_previous_set_status" +source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_previous_set_status" "$pash_default_set_state" +pash_redir_output echo "$$: (1) Set state reverted to PaSh-internal set state: $-" + +## +## (2) +## + +## The first argument contains the sequential script. Just running it should work for all tests. +pash_sequential_script_file=$1 + +## The second argument SHOULD be the file that contains the IR to be compiled +pash_input_ir_file=$2 + +## The parallel script will be saved in the following file if compilation is successful. +# pash_compiled_script_file="$($RUNTIME_DIR/pash_ptempfile_name.sh $distro)" +pash_compiled_script_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" + + +if [ "$pash_speculation_flag" -eq 1 ]; then + ## Count the execution time + pash_exec_time_start=$(date +"%s%N") + source "$RUNTIME_DIR/pash_runtime_quick_abort.sh" + pash_runtime_final_status=$? + ## For now this will fail!!! + exit 1 +else + + if [ "$pash_daemon" -eq 1 ]; then + ## TODO: Have a more proper communication protocol + ## TODO: Make a proper client for the daemon + pash_redir_output echo "$$: (2) Before asking the daemon for compilation..." + ## Send and receive from daemon + msg="Compile:${pash_compiled_script_file}| Variable File:${pash_runtime_shell_variables_file}| Input IR File:${pash_input_ir_file}" + daemon_response=$(pash_communicate_daemon "$msg") # Blocking step, daemon will not send response until it's safe to continue + + if [[ "$daemon_response" == *"OK:"* ]]; then + pash_runtime_return_code=0 + elif [ -z "$daemon_response" ]; then + ## Trouble... Daemon crashed, rip + pash_redir_output echo "$$: ERROR: (2) Daemon crashed!" + exit 1 + else + pash_runtime_return_code=1 + fi + + # Get assigned process id + # We need to split the daemon response into elements of an array by + # shell's field splitting. + # shellcheck disable=SC2206 + response_args=($daemon_response) + process_id=${response_args[1]} + else + pash_redir_all_output_always_execute python3 -S "$RUNTIME_DIR//pash_runtime.py" --var_file "${pash_runtime_shell_variables_file}" "${pash_compiled_script_file}" "${pash_input_ir_file}" "$@" + pash_runtime_return_code=$? + fi + + pash_redir_output echo "$$: (2) Compiler exited with code: $pash_runtime_return_code" + if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_assert_compiler_success_flag" -eq 1 ]; then + pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code: $pash_runtime_return_code while assert_compiler_success was enabled! Exiting PaSh..." + exit 1 + fi + + # store functions for distributed execution + if [ "$distributed_exec" -eq 1 ]; then + declared_functions="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" + declare -f > "$declared_functions" + export declared_functions + fi + + ## + ## (3) + ## + + ## Count the execution time + pash_exec_time_start=$(date +"%s%N") + + + ## If the compiler failed or if we dry_run the compiler, we have to run the sequential + if [ "$pash_runtime_return_code" -ne 0 ] || [ "$pash_dry_run_compiler_flag" -eq 1 ]; then + pash_script_to_execute="${pash_sequential_script_file}" + else + pash_script_to_execute="${pash_compiled_script_file}" + fi + + # ## + # ## (4) + # ## + + ## TODO: It might make sense to move these functions in pash_init_setup to avoid the cost of redefining them here. + function clean_up () { + if [ "$pash_daemon" -eq 1 ]; then + if [ "$parallel_script_time_start" == "None" ] || [ "$pash_profile_driven_flag" -eq 0 ]; then + exec_time="" + else + parallel_script_time_end=$(date +"%s%N") + parallel_script_time_ms=$(echo "scale = 3; ($parallel_script_time_end-$parallel_script_time_start)/1000000" | bc) + pash_redir_output echo " --- --- Execution time: $parallel_script_time_ms ms" + exec_time=$parallel_script_time_ms + fi + ## Send to daemon + msg="Exit:${process_id}|Time:$exec_time" + daemon_response=$(pash_communicate_daemon_just_send "$msg") + fi + } + + function run_parallel() { + trap clean_up SIGTERM SIGINT EXIT + if [ "$pash_profile_driven_flag" -eq 1 ]; then + parallel_script_time_start=$(date +"%s%N") + fi + source "$RUNTIME_DIR/pash_wrap_vars.sh" "$pash_script_to_execute" + internal_exec_status=$? + final_steps + clean_up + (exit $internal_exec_status) + } + + ## We only want to execute (5) and (6) if we are in debug mode and it is not explicitly avoided + function final_steps() { + if [ "$PASH_DEBUG_LEVEL" -ne 0 ] && [ "$pash_avoid_pash_runtime_completion_flag" -ne 1 ]; then + ## + ## (5) + ## + + ## Prepare a file for the output shell variables to be saved in + pash_output_var_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh" "$distro") + # pash_redir_output echo "$$: Output vars: $pash_output_var_file" + + ## Prepare a file for the `set` state of the inner shell to be output + pash_output_set_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh" "$distro") + + source "$RUNTIME_DIR/pash_runtime_shell_to_pash.sh" "$pash_output_var_file" "$pash_output_set_file" + + ## + ## (6) + ## + source "$RUNTIME_DIR/pash_runtime_complete_execution.sh" + fi + } + + ## TODO: Add a check that `set -e` is not on + + ## Check if there are traps set, and if so do not execute in parallel + ## + ## TODO: This might be an overkill but is conservative + traps_set=$(trap) + pash_redir_output echo "$$: (2) Traps set: $traps_set" + # Don't fork if compilation failed. The script might have effects on the shell state. + if [ "$pash_runtime_return_code" -ne 0 ] || + ## If parallel pipelines is not enabled we shouldn't fork + [ "$pash_parallel_pipelines" -eq 0 ] || + ## If parallel pipelines is explicitly disabled (e.g., due to context), no forking + [ "$pash_disable_parallel_pipelines" -eq 1 ] || + ## If traps are set, no forking + [ ! -z "$traps_set" ] || + [ "$pash_daemon" -eq 0 ]; then + # Early clean up in case the script effects shell like "break" or "exec" + # This is safe because the script is run sequentially and the shell + # won't be able to move forward until this is finished + + ## Needed to clear up any past script time start execution times. + parallel_script_time_start=None + clean_up + source "$RUNTIME_DIR/pash_wrap_vars.sh" "$pash_script_to_execute" + pash_runtime_final_status=$? + final_steps + else + # Should we redirect errors aswell? + # TODO: capturing the return state here isn't completely correct. + # Might need more complex design if this end up being a problem + run_parallel <&0 & + pash_runtime_final_status=$? + pash_redir_output echo "$$: (2) Running pipeline" + + ## Here we need to also revert the state back to bash state + ## since run_parallel will do that in a separate shell + ## + ## This happens right before we exit from pash_runtime! + + ## Recover the `set` state of the previous shell + # pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" + # pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" + pash_current_set_state=$- + source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" + pash_redir_output echo "$$: (5) Reverted to BaSh set state: $-" + + ## TODO: This might not be necessary + ## Recover the input arguments of the previous script + ## Note: We don't need to care about wrap_vars arguments because we have stored all of them already. + # + # This variable stores arguments as a space-separated stirng, so we + # need to unquote it and to split it into multiple strings by shell's + # field splitting. + # shellcheck disable=SC2086 + set -- $pash_input_args + pash_redir_output echo "$$: (5) Reverted to BaSh input arguments: $@" + + ## TODO: We probably need to exit with the exit code here or something! + fi +fi + diff --git a/test/failing/pash_runtime_quick_abort.sh b/test/failing/pash_runtime_quick_abort.sh new file mode 100644 index 0000000..b3b541b --- /dev/null +++ b/test/failing/pash_runtime_quick_abort.sh @@ -0,0 +1,283 @@ +#!/bin/bash + +## File directory +RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") + +still_alive() +{ + jobs -p | tr '\n' ' ' +} + +log() +{ + pash_redir_output echo "$$: (QAbort) " "$@" +} + +# Taken from: https://stackoverflow.com/a/20473191 +# list_include_item "10 11 12" "2" +list_include_item() { + local list="$1" + local item="$2" + if [[ $list =~ (^|[[:space:]])"$item"($|[[:space:]]) ]] ; then + # yes, list include item + result=0 + else + result=1 + fi + return $result +} + +## This spawns a buffer command to buffer inputs and outputs +## +## It writes the pid to stdout +spawn_eager() +{ + local name=$1 + local input=$2 + local output=$3 + local eager_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + ## Note: Using eager actually leads to some deadlock issues. It must have to do with eagers behavior when + ## its input or output closes. + # "$RUNTIME_DIR/../runtime/eager" "$input" "$output" "$eager_file" /dev/null 2>/dev/null & + "$RUNTIME_DIR/../runtime/dgsh_tee.sh" "$input" "$output" -I -f /dev/null 2>/dev/null & + local eager_pid=$! + log "Spawned $name eager: $eager_pid with:" + log " -- IN: $input" + log " -- OUT: $output" + log " -- INTERM: $eager_file" + echo "$eager_pid" +} + +## Kills the process group that belongs to the given pgid +kill_pg() +{ + local pg_lead_pid=$1 + /bin/kill -15 "-${pg_lead_pid}" 2> /dev/null +} + +## TODO: Make sure that this waits for all processes in the process group to finish executing. +wait_pg() +{ + local pg_lead_pid=$1 + wait "$pg_lead_pid" 2> /dev/null +} + +kill_wait_pg() +{ + kill_pg "$1" + wait_pg "$1" +} + +## Solution Schematic: +## +## (A) (B) (C) (D) (E) +## stdin --- tee --- eager --- seq.sh --- eager --- OUT_SEQ +## \ (F) +## \--- eager --- PAR_IN +## +## (1) If compiler fails, or sequential is done executing: +## - cat OUT_SEQ > stdout +## +## (2) If compiler succeeds: +## - USR1 to reroute so that it redirects to /dev/null +## - PAR_IN redirect to par stdin. +## +## Simplifying assumptions: +## - Not worrying about stderr +## - Not worrying about other inputs at the moment (assuming they are files if compiler succeeds) +## - Not worrying about other outputs +## + assuming that the parallel implementation will overwrite them +## + Assuming that the DFG outputs are not appended +## +## TODO: A first TODO would be to check them in the compilation process +## +## TODO: An alternative TODO would be to let preprocessing give us information about them, allowing us to +## have a finer tuned execution plan depending on this information. For example, if we see that script +## has append to some file we can be carefull and buffer its output using eager. + +## NOTE: The intuition about why quick-abort works is that if the compilation succeeds, then the +## script is a DFG, meaning that we know exactly how it affects its environment after completing. +## Therefore, we can go back and stop the already running script without risking unsafe behavior. + +## TODO: We also want to avoid executing the compiled script if it doesn't contain any improvement. + +## TODO: Maybe the reroute needs to be put around (C) and not (D) + +## TODO: Improve the happy path (very fast sequential) execution time + +## TODO: Use reroute around dgsh_tees to make sure that they do not use storage unnecessarily +## (if their later command is done). + +if [ "$pash_execute_flag" -eq 1 ]; then + # set -x + ## (A) Redirect stdin to `tee` + pash_tee_stdin=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_tee_stdin" + ## The redirections below are necessary to ensure that the background `cat` reads from stdin. + { setsid cat > "$pash_tee_stdin" <&3 3<&- & } 3<&0 + pash_input_cat_pid=$! + log "Spawned input cat with pid: $pash_input_cat_pid" + + ## (B) A `tee` that duplicates input to both the sequential and parallel + pash_tee_stdout1=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + pash_tee_stdout2=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_tee_stdout1" "$pash_tee_stdout2" + tee "$pash_tee_stdout1" > "$pash_tee_stdout2" < "$pash_tee_stdin" & + + ## (C) The sequential input eager + pash_seq_eager_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_seq_eager_output" + seq_input_eager_pid=$(spawn_eager "sequential input" "$pash_tee_stdout1" "$pash_seq_eager_output") + + ## (D) Sequential command + pash_seq_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_seq_output" + setsid "$RUNTIME_DIR/pash_wrap_vars.sh" \ + "$pash_runtime_shell_variables_file" \ + "$pash_output_variables_file" \ + "$pash_output_set_file" \ + "$pash_sequential_script_file" \ + > "$pash_seq_output" < "$pash_seq_eager_output" & + pash_seq_pid=$! + log "Sequential pid: $pash_seq_pid" + + ## (E) The sequential output eager + pash_seq_eager2_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_seq_eager2_output" + seq_output_eager_pid=$(spawn_eager "sequential output" "$pash_seq_output" "$pash_seq_eager2_output") + + ## (F) Second eager + pash_par_eager_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_par_eager_output" + par_eager_pid=$(spawn_eager "parallel input" "$pash_tee_stdout2" "$pash_par_eager_output") + + ## Run the compiler + setsid python3 "$RUNTIME_DIR/pash_runtime.py" "$pash_compiled_script_file" --var_file "$pash_runtime_shell_variables_file" "${@:2}" & + pash_compiler_pid=$! + log "Compiler pid: $pash_compiler_pid" + + ## Wait until one of the two (original script, or compiler) die + alive_pids=$(still_alive) + log "Still alive: $alive_pids" + while `list_include_item "$alive_pids" "$pash_seq_pid"` && `list_include_item "$alive_pids" "$pash_compiler_pid"` ; do + ## Wait for either of the two to complete + wait -n "$pash_seq_pid" "$pash_compiler_pid" + completed_pid_status=$? + log "Process exited with return code: $completed_pid_status" + alive_pids=$(still_alive) + log "Still alive: $alive_pids" + done + + ## If the sequential is still alive we want to see if the compiler succeeded + if `list_include_item "$alive_pids" "$pash_seq_pid"` ; then + pash_runtime_return_code=$completed_pid_status + log "Compilation was done first with return code: $pash_runtime_return_code" + + ## We only want to run the parallel if the compiler succeeded. + if [ "$pash_runtime_return_code" -eq 0 ]; then + + ## TODO: Is this necessary + ## Redirect the sequential output to /dev/null + cat "$pash_seq_eager2_output" > /dev/null & + seq_cat_pid=$! + log "seq to /dev/null cat pid: $seq_cat_pid" + + ## Kill the sequential process tree + log "Killing sequential pid: $pash_seq_pid..." + kill_pg "$pash_seq_pid" + kill_status=$? + wait_pg "$pash_seq_pid" + seq_exit_status=$? + log "Sequential pid: $pash_seq_pid was killed successfully returning status $seq_exit_status." + log "Still alive: $(still_alive)" + + ## If kill failed it means it was already completed, + ## and therefore we do not need to run the parallel. + ## + ## TOOD: Enable this optimization + if true || [ "$kill_status" -eq 0 ]; then + ## (2) Run the parallel + log "Run parallel:" + log " -- Runtime vars: $pash_runtime_shell_variables_file" + log " -- Output vars: $pash_output_variables_file" + log " -- Output set: ${pash_output_set_file}" + log " -- Compiled script: ${pash_compiled_script_file}" + log " -- Input: $pash_par_eager_output" + + "$RUNTIME_DIR/pash_wrap_vars.sh" \ + "$pash_runtime_shell_variables_file" \ + "$pash_output_variables_file" \ + "$pash_output_set_file" \ + "$pash_compiled_script_file" \ + < "$pash_par_eager_output" & + ## Note: For some reason the above redirection used to create some issues, + ## but no more after we started using dgsh-tee + + pash_par_pid=$! + log "Parallel is running with pid: $pash_par_pid..." + # strace -p $pash_par_pid 2>> $PASH_REDIR + wait "$pash_par_pid" + pash_runtime_final_status=$? + log "Parallel is done with status: $pash_runtime_final_status" + else + ## TODO: Handle that case properly by enabling the optimization above. + log "ERROR: Shouldn't have reached that" + exit 1 + fi + else + ## If the compiler failed we just wait until the sequential is done. + + ## (1) Redirect the seq output to stdout + cat "$pash_seq_eager2_output" & + seq_output_cat_pid=$! + log "STDOUT cat pid: $seq_output_cat_pid" + + log "Waiting for sequential: $pash_seq_pid" + wait "$pash_seq_pid" + pash_runtime_final_status=$? + log "DONE Sequential: $pash_seq_pid exited with status: $pash_runtime_final_status" + + ## TODO: It is not clear if we also need to wait for the output cat to end. + log "Waiting for sequential output cat: $seq_output_cat_pid" + wait "$seq_output_cat_pid" + log "DONE Waiting for sequential output cat: $seq_output_cat_pid" + + fi + else + pash_runtime_final_status=$completed_pid_status + log "Sequential was done first with return code: $pash_runtime_final_status" + + ## (1) Redirect the seq output to stdout + cat "$pash_seq_eager2_output" & + final_cat_pid=$! + log "STDOUT cat pid: $final_cat_pid" + + ## We need to kill the compiler to not get delayed log output + ## If this fails (meaning that compilation is done) we do not care + kill_wait_pg "$pash_compiler_pid" + + wait "$final_cat_pid" + fi + + ## TODO: Not clear if this is needed or if it doesn indeed kill all the + ## processes and cleans up everything properly + ## Kill the input process + log "Killing the input cat process: $pash_input_cat_pid" + kill_wait_pg "$pash_input_cat_pid" + # kill -9 $pash_input_cat_pid 2> /dev/null + # wait $pash_input_cat_pid 2> /dev/null + log "The input cat: $pash_input_cat_pid died!" + + + ## TODO: This (and the above) should not be needed actually, everything should be already done due to + ## sequential and parallel both having exited. + ## Kill every spawned process + still_alive_pids="$(still_alive)" + log "Killing all the still alive: $still_alive_pids" + kill -15 "$still_alive_pids" 2> /dev/null + wait "$still_alive_pids" 2> /dev/null + log "All the alive pids died: $still_alive_pids" + + ## Return the exit code + (exit "$pash_runtime_final_status") +fi diff --git a/test/failing/pash_set_from_to.sh b/test/failing/pash_set_from_to.sh new file mode 100644 index 0000000..c27a884 --- /dev/null +++ b/test/failing/pash_set_from_to.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +from_set=${1?From set not given} +to_set=${2?To set not given} + +## Finds the difference of set variables (removing the c, s one since it cannot be actually set and unset) +pash_redir_output echo "From set: $from_set" +pash_redir_output echo "To set: $to_set" +IFS=',' read -r pash_set_to_remove pash_set_to_add <<<"$("$RUNTIME_LIBRARY_DIR/set-diff" "$from_set" "$to_set")" +pash_redir_output echo "To add: $pash_set_to_add" +pash_redir_output echo "To remove: $pash_set_to_remove" +pash_redir_all_output_always_execute set "-$pash_set_to_add" +pash_redir_all_output_always_execute set "+$pash_set_to_remove" diff --git a/test/failing/pash_source_declare_vars.sh b/test/failing/pash_source_declare_vars.sh new file mode 100755 index 0000000..06d654f --- /dev/null +++ b/test/failing/pash_source_declare_vars.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +## This sources variables that were produced from `declare -p` + +## TODO: Fix this to not source read only variables +## TODO: Does this work with arrays + +## TODO: Fix this to not source pash variables so as to not invalidate PaSh progress + +## TODO: Fix this filtering + +filter_vars_file() +{ + cat "$1" | grep -v "^declare -\([A-Za-z]\|-\)* \(pash\|BASH\|LINENO\|EUID\|GROUPS\)" +} + +## TODO: Error handling if the argument is empty? +if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then + > /dev/null 2>&1 "$@" +else + if [ "$PASH_REDIR" == '&2' ]; then + >&2 source <(filter_vars_file "$1") + else + >>"$PASH_REDIR" 2>&1 source <(filter_vars_file "$1") + fi +fi diff --git a/test/failing/pash_wrap_vars.sh b/test/failing/pash_wrap_vars.sh new file mode 100755 index 0000000..4b2139a --- /dev/null +++ b/test/failing/pash_wrap_vars.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +## File directory +RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") + +script_source="$1" + +#ONLY WAY OUT IS TO TREAT EXEC in special way + +## Recover the `set` state of the previous shell +# pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" +# pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" +export pash_current_set_state=$- +source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" +pash_redir_output echo "$$: (3) Reverted to BaSh set state: $-" + +## Recover the input arguments of the previous script +## Note: We don't need to care about wrap_vars arguments because we have stored all of them already. +# +# This variable stores arguments as a space-separated stirng, so we need to +# unquote it and to split it into multiple strings by shell's field splitting. +# shellcheck disable=SC2086 +set -- $pash_input_args +pash_redir_output echo "$$: (3) Reverted to BaSh input arguments: $@" + +## Execute the script +pash_redir_output echo "$$: (4) Restoring previous exit code: ${pash_previous_exit_status}" +pash_redir_output echo "$$: (4) Will execute script in ${script_source}:" +pash_redir_output cat "${script_source}" + +## Note: We run the `exit` in a checked position so that we don't simply exit when we are in `set -e`. +if (exit "$pash_previous_exit_status") +then +{ + source "${script_source}" + internal_exec_status=$? + ## Make sure that any input argument changes are propagated outside + export pash_input_args="$@" + (exit "$internal_exec_status") +} +else +{ + source "${script_source}" + internal_exec_status=$? + ## Make sure that any input argument changes are propagated outside + export pash_input_args="$@" + (exit "$internal_exec_status") +} +fi diff --git a/test/failing/pay_respects.sh b/test/failing/pay_respects.sh new file mode 100644 index 0000000..e566c8b --- /dev/null +++ b/test/failing/pay_respects.sh @@ -0,0 +1,13 @@ +#!/bin/sh + + +cat < results.db # 3.2GB +sqlite3 results.db <(echo $QUERY) | csvcut -c 4 pipelines.csv | awk '{$1=$1};1' | sort | uniq tee >( + # Schwartzian transform + awk -F'|' '{print NF,$0}' file | sort -nr | cut -d' ' -f2- > likely-longest-pipelines.txt + ) >( + tr '|' '\n' | awk '{$1=$1};1' | awk '{print $1}' | tr -cs 'A-Za-z' '\n' | sort | uniq -c | sort -rn > freq-commands.txt + ) diff --git a/test/failing/quickcheck.sh b/test/failing/quickcheck.sh new file mode 100755 index 0000000..4b17f27 --- /dev/null +++ b/test/failing/quickcheck.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +cd $PASH_TOP + +echo confirms the necessary components for running the artifact +echo +echo Git commit ID: $(git rev-parse --short HEAD) +echo \$PASH_TOP: $(echo $PASH_TOP) +echo pash executable: $PASH_TOP/pa.sh + +echo +$PASH_TOP/pa.sh --help + +echo "Testing graph generation" +$PASH_TOP/pa.sh -c 'echo Pash Installation is complete!' diff --git a/test/failing/remote_exec_graph.sh b/test/failing/remote_exec_graph.sh new file mode 100755 index 0000000..768aa8a --- /dev/null +++ b/test/failing/remote_exec_graph.sh @@ -0,0 +1,10 @@ +ir_file=$1 + +# pash_redir_output echo "Sending msg to worker manager: $message" +response=($(echo "Exec-Graph: $ir_file $declared_functions" | nc -U "$DSPASH_SOCKET")) +# pash_redir_output echo "Got response from worker manager: $response" + +status=${response[0]} #do something if false +script_to_execute=${response[1]} + +source "$script_to_execute" diff --git a/test/failing/run-experiment.sh b/test/failing/run-experiment.sh new file mode 100755 index 0000000..ca0a0c0 --- /dev/null +++ b/test/failing/run-experiment.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +eval_dir="$PASH_TOP/evaluation/buses/" +results_dir="${eval_dir}/results/" + +mkdir -p $results_dir + +for i in 1 2 3 4 +do + script="${eval_dir}/${i}.sh" + echo "Executing $script..." + + seq_output=/tmp/seq_output + pash_width_16_no_cat_split_output=/tmp/pash_16_no_cat_split_output + pash_width_16_output=/tmp/pash_16_output + + seq_time="${results_dir}/${i}_2_seq.time" + pash_width_16_no_cat_split_time="${results_dir}/${i}_16_distr_auto_split_fan_in_fan_out.time" + pash_width_16_time="${results_dir}/${i}_16_distr_auto_split.time" + + echo "Executing the script with bash..." + { time /bin/bash $script > $seq_output ; } 2> >(tee "${seq_time}" >&2) + + echo "Executing the script with pash -w 16 without the cat-split optimization (log in: /tmp/pash_16_log)" + { time $PASH_TOP/pa.sh -w 16 -d 1 --log_file /tmp/pash_16_no_cat_split_log --no_cat_split_vanish --output_time $script ; } 1> "$pash_width_16_no_cat_split_output" 2> >(tee "${pash_width_16_no_cat_split_time}" >&2) + echo "Checking for output equivalence..." + diff -s $seq_output $pash_width_16_no_cat_split_output | head + + echo "Executing the script with pash -w 16 (log in: /tmp/pash_16_log)" + { time $PASH_TOP/pa.sh -w 16 -d 1 --log_file /tmp/pash_16_log --output_time $script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_time}" >&2) + echo "Checking for output equivalence..." + diff -s $seq_output $pash_width_16_output | head + +done diff --git a/test/failing/run.par.sh b/test/failing/run.par.sh new file mode 100644 index 0000000..7be2127 --- /dev/null +++ b/test/failing/run.par.sh @@ -0,0 +1,601 @@ +#!/bin/bash + +# time: print real in seconds, to simplify parsing +TIMEFORMAT="%3R" # %3U %3S" + +if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 +fi + +source "$PASH_TOP/scripts/utils.sh" + +oneliners_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "oneliners/$times_file" ]; then + echo "skipping oneliners/$times_file" + return 0 + fi + + cd oneliners/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + scripts_inputs=( + "nfa-regex;100M.txt" + "sort;3G.txt" + "top-n;1G.txt" + "wf;3G.txt" + "spell;1G.txt" + "diff;3G.txt" + "bi-grams;1G.txt" + "set-diff;3G.txt" + "sort-sort;1G.txt" + "shortest-scripts;all_cmdsx100.txt" + ) + + touch "$times_file" + echo executing one-liners with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + for script_input in ${scripts_inputs[@]} + do + IFS=";" read -r -a script_input_parsed <<< "${script_input}" + script="${script_input_parsed[0]}" + input="${script_input_parsed[1]}" + source_var $1 $input + printf -v pad %30s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + + cd .. +} + +unix50_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "unix50/${times_file}" ]; then + echo "skipping unix50/${times_file}" + return 0 + fi + + cd unix50/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + touch "$times_file" + echo executing Unix50 $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for number in `seq 36` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +web-index_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "web-index/${times_file}" ]; then + echo "skipping web-index/${times_file}" + return 0 + fi + + cd web-index/ + + install_deps_source_setup $1 + + source_var $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + touch "$times_file" + echo executing web index with pash $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/web-index.${outputs_suffix}" + pash_log="${pash_logs_dir}/web-index.pash.log" + single_time_file="${outputs_dir}/web-index.${time_suffix}" + + ## FIXME: There is a bug when running with r_split at the moment. r_wrap cannot execute bash_functions + echo -n "web-index.sh:" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" web-index.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + cd .. +} + +max-temp_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "max-temp/${times_file}" ]; then + echo "skipping max-temp/${times_file}" + return 0 + fi + cd max-temp/ + + install_deps_source_setup + + source_var + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + touch "$times_file" + echo executing max temp with pash $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/temp-analytics.${outputs_suffix}" + pash_log="${pash_logs_dir}/temp-analytics.pash.log" + single_time_file="${outputs_dir}/temp-analytics.${time_suffix}" + + echo -n "temp-analytics.sh:" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" temp-analytics.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + cd .. +} + +analytics-mts_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "analytics-mts/${times_file}" ]; then + echo "skipping analytics-mts/${times_file}" + return 0 + fi + + cd analytics-mts/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + touch "$times_file" + echo executing MTS analytics with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + ## FIXME 5.sh is not working yet + for number in `seq 4` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + source_var $1 + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +nlp_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "nlp/${times_file}" ]; then + echo "skipping nlp/${times_file}" + return 0 + fi + + cd nlp/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "1syllable_words;6_4" + "2syllable_words;6_5" + "4letter_words;6_2" + "bigrams_appear_twice;8.2_2" + "bigrams;4_3" + "compare_exodus_genesis;8.3_3" + "count_consonant_seq;7_2" + # "count_morphs;7_1" + "count_trigrams;4_3b" + "count_vowel_seq;2_2" + "count_words;1_1" + "find_anagrams;8.3_2" + "merge_upper;2_1" + "sort;3_1" + "sort_words_by_folding;3_2" + "sort_words_by_num_of_syllables;8_1" + "sort_words_by_rhyming;3_3" + # "trigram_rec;6_1" + "uppercase_by_token;6_1_1" + "uppercase_by_type;6_1_2" + "verses_2om_3om_2instances;6_7" + "vowel_sequencies_gr_1K;8.2_1" + "words_no_vowels;6_3" + ) + + touch "$times_file" + echo executing Unix-for-nlp with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + + + +# everything under this line is WIP + + +dgsh_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "dgsh/${times_file}" ]; then + echo "skipping dgsh/${times_file}" + return 0 + fi + + cd dgsh/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "compressionbench;1" + "gitstats;2" + "cmetrics;3" + "dublicatefiles;4" + "highlightwords;5" + # "wordproperties;6" + # "weatherreport;7" + "textproperties;8" + "staticsymbols;9" + # "hierarchymap;10" + # "plotgit;11" + "parallelword;12" + # "venuauthor;13" + # "2dfourier;14" + # "nuclear;15" + # "fft;16" + "reordercol;17" + "dirlisting;18" + ) + + touch "$times_file" + echo executing DGSH with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + export VOC=/usr/share/dict/words + export IN=$PASH_TOP/evaluation/benchmarks/dgsh/input/ + export FULL=$IN/dblp.xml + export MINI=$IN/mini.xml + export OUT=$PASH_TOP/evaluation/benchmarks/dgsh/input/ + export BIN=/usr/local/bin + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + export IN="$PASH_TOP/evaluation/benchmarks/dgsh/input/genesis" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + + +aliases_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "aliases/${times_file}" ]; then + echo "skipping aliases/${times_file}" + return 0 + fi + + cd aliases/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "compressionbench;1" + "gitstats;2" + "cmetrics;3" + "dublicatefiles;4" + "highlightwords;5" + # "wordproperties;6" + # "weatherreport;7" + "textproperties;8" + "staticsymbols;9" + # "hierarchymap;10" + # "plotgit;11" + "parallelword;12" + # "venuauthor;13" + # "2dfourier;14" + # "nuclear;15" + # "fft;16" + "reordercol;17" + "dirlisting;18" + ) + + touch "$times_file" + echo executing aliases with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + export IN="$PASH_TOP/evaluation/benchmarks/aliases/input/genesis" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + + + +posh_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "posh/${times_file}" ]; then + echo "skipping posh/${times_file}" + return 0 + fi + + cd posh/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "discat;1" + "convert;2" + "raytracing;3" + # "zannotate;4" where is zannotate binary + ) + + touch "$times_file" + echo executing posh with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + export OUT=$PASH_TOP/evaluation/benchmarks/posh/input/output + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +bio_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "bio/${times_file}" ]; then + echo "skipping bio/${times_file}" + return 0 + fi + + cd bio/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "bio4.sh;bio4" + ) + + touch "$times_file" + echo executing bio with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + export OUT=$PASH_TOP/evaluation/benchmarks/bio/input/output + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +dependency_untangling_pash() { + times_file="par.res" + outputs_suffix="par.out" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "dependency_untangling/${times_file}" ]; then + echo "skipping dependency_untangling/${times_file}" + return 0 + fi + + cd dependency_untangling/ + + rm -rf input/output/ + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "MediaConv1;img_convert" + "MediaConv2;to_mp3" + "Program_Inference;proginf" + "LogAnalysis1;nginx" + "LogAnalysis2;pcap" + "Genomics_Computation;genomics" + "AurPkg;pacaur" + "FileEnc1;compress_files" + "FileEnc2;encrypt_files" + ) + + touch "$times_file" + echo executing dependency_untangling with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} diff --git a/test/failing/run.seq.sh b/test/failing/run.seq.sh new file mode 100755 index 0000000..a561abd --- /dev/null +++ b/test/failing/run.seq.sh @@ -0,0 +1,498 @@ +#!/bin/bash + +# FIXME: skip running if output file exists (using tee?) + +## FIX: We should not have a set -e in a script that is supposed to be sourced. +# set -e + +# time: print real in seconds, to simplify parsing +TIMEFORMAT="%3R" # %3U %3S" + +if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 +fi + +source "$PASH_TOP/scripts/utils.sh" + +oneliners(){ + seq_times_file="seq.res" + seq_outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "oneliners/$seq_times_file" ]; then + echo "skipping oneliners/$seq_times_file" + return 0 + fi + + cd oneliners/ + # we need to download the whole dataset to generate the small input as well + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + scripts_inputs=( + "nfa-regex;100M.txt" + "sort;3G.txt" + "top-n;1G.txt" + "wf;3G.txt" + "spell;1G.txt" + "diff;3G.txt" + "bi-grams;1G.txt" + "set-diff;3G.txt" + "sort-sort;1G.txt" + "shortest-scripts;all_cmdsx100.txt" + ) + + touch "$seq_times_file" + echo executing one-liners $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + for script_input in ${scripts_inputs[@]} + do + IFS=";" read -r -a script_input_parsed <<< "${script_input}" + script="${script_input_parsed[0]}" + input="${script_input_parsed[1]}" + # source the required variables from setup.sh + source_var $1 $input + printf -v pad %30s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:30} + + seq_outputs_file="${outputs_dir}/${script}.${seq_outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$seq_outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + + cd .. +} + +unix50(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "unix50/${times_file}" ]; then + echo "skipping unix50/${times_file}" + return 0 + fi + + cd unix50/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + touch "$times_file" + echo executing Unix50 $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for number in `seq 36` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$times_file" + done + cd .. +} + +web-index(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "web-index/${times_file}" ]; then + echo "skipping web-index/${times_file}" + return 0 + fi + + cd web-index/ + + install_deps_source_setup $1 + + source_var $1 + + mkdir -p "$outputs_dir" + + touch "$times_file" + echo executing web index $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/web-index.${outputs_suffix}" + echo web-index.sh: $({ time ./web-index.sh > "${outputs_file}"; } 2>&1) | tee -a "$times_file" + cd .. +} + +max-temp(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "max-temp/${times_file}" ]; then + echo "skipping max-temp/${times_file}" + return 0 + fi + cd max-temp/ + + install_deps_source_setup + + source_var + mkdir -p "$outputs_dir" + touch "$times_file" + echo executing max temp $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/temp-analytics.${outputs_suffix}" + echo max-temp.sh: $({ time ./temp-analytics.sh > "${outputs_file}"; } 2>&1) | tee -a "$times_file" + cd .. +} + +analytics-mts(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "analytics-mts/${times_file}" ]; then + echo "skipping analytics-mts/${times_file}" + return 0 + fi + + cd analytics-mts/ + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + touch "$times_file" + echo executing MTS analytics $(date) | tee -a "$times_file" + echo '' >> "$times_file" + ## FIXME 5.sh is not working yet + for number in `seq 4` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + # select the respective input + source_var $1 + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$times_file" + done + cd .. +} + +nlp(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "nlp/${times_file}" ]; then + echo "skipping nlp/${times_file}" + return 0 + fi + + cd nlp/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + names_scripts=( + "1syllable_words;6_4" + "2syllable_words;6_5" + "4letter_words;6_2" + "bigrams_appear_twice;8.2_2" + "bigrams;4_3" + "compare_exodus_genesis;8.3_3" + "count_consonant_seq;7_2" + # "count_morphs;7_1" + "count_trigrams;4_3b" + "count_vowel_seq;2_2" + "count_words;1_1" + "find_anagrams;8.3_2" + "merge_upper;2_1" + "sort;3_1" + "sort_words_by_folding;3_2" + "sort_words_by_num_of_syllables;8_1" + "sort_words_by_rhyming;3_3" + # "trigram_rec;6_1" + "uppercase_by_token;6_1_1" + "uppercase_by_type;6_1_2" + "verses_2om_3om_2instances;6_7" + "vowel_sequencies_gr_1K;8.2_1" + "words_no_vowels;6_3" + ) + + touch "$times_file" + echo executing Unix-for-nlp $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$times_file" + done + cd .. +} + +aliases(){ + seq_times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "aliases/${seq_times_file}" ]; then + echo "skipping aliases/${seq_times_file}" + return 0 + fi + + cd aliases/ + + cd input/ + ./setup.sh + ./install-deps.sh + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + #"tomp3;1.tomp3" + #"unrtf;2.unrtf" + #"convertjpg;3.resiz" + # "gitkernel;4.gitkernel" # needs complex grep command + "apachelog;5.apachelog" + "msg;6.msg" + "nginx;7.nginx" + "varlog;8.varlog" + ) + + touch "$seq_times_file" + echo executing aliases $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export WAV=$PASH_TOP/evaluation/benchmarks/aliases/input/wav + export JPG=$PASH_TOP/evaluation/benchmarks/aliases/input/jpg + export RTF=$PASH_TOP/evaluation/benchmarks/aliases/input/rtf + export GIT=$PASH_TOP/evaluation/benchmarks/aliases/input/linux + export IN=$PASH_TOP/evaluation/benchmarks/aliases/input/ + export OUT=$PASH_TOP/evaluation/benchmarks/aliases/input/out + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +bio() { + seq_times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "aliases/${seq_times_file}" ]; then + echo "skipping aliases/${seq_times_file}" + return 0 + fi + + cd bio/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + "bio4.sh;bio4" + ) + + touch "$seq_times_file" + echo executing bio $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export IN=$PASH_TOP/evaluation/benchmarks/bio/ + # takes too many files to download + export IN_N=input_all.txt + export OUT=$PASH_TOP/evaluation/benchmarks/bio/output + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +# everything under this line is WIP + +dgsh() { + seq_times_file="seq.res" + seq_outpus_suffix="seq.out" + outputs_dir="outputs" + if [ -e "dgsh/$seq_times_file" ]; then + echo "skipping dgsh/$seq_times_file" + return 0 + fi + + cd dgsh + + cd ./input/ + ./setup.sh -full + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + "compressionbench;1" + "gitstats;2" + "cmetrics;3" + "dublicatefiles;4" + "highlightwords;5" + # "wordproperties;6" + # "weatherreport;7" + "textproperties;8" + "staticsymbols;9" + # "hierarchymap;10" + # "plotgit;11" + "parallelword;12" + # "venuauthor;13" + # "2dfourier;14" + # "nuclear;15" + # "fft;16" + "reordercol;17" + "dirlisting;18" + ) + + + touch "$seq_times_file" + echo executing DGSH $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export VOC=/usr/share/dict/words + export IN=$PASH_TOP/evaluation/benchmarks/dgsh/input + export FULL=$IN/dblp.xml + export MINI=$IN/mini.xml + export OUT=$PASH_TOP/evaluation/benchmarks/dgsh/input + export BIN=/usr/local/bin + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +posh() { + seq_times_file="seq.res" + seq_outpus_suffix="seq.out" + outputs_dir="outputs" + if [ -e "posh/$seq_times_file" ]; then + echo "skipping posh/$seq_times_file" + return 0 + fi + + cd posh + + cd ./input/ + ./setup.sh -full + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + "discat;1" + "convert;2" + "raytracing;3" + # "zannotate;4" where is zannotate binary + ) + + touch "$seq_times_file" + echo executing posh $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export OUT=$PASH_TOP/evaluation/benchmarks/posh/input/output + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +dependency_untangling() { + seq_times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "dependency_untangling/${seq_times_file}" ]; then + echo "skipping dependency_untangling/${seq_times_file}" + return 0 + fi + + cd dependency_untangling/ + + rm -rf input/output + install_deps_source_setup $1 + mkdir -p "$outputs_dir" + + names_scripts=( + "MediaConv1;img_convert" + "MediaConv2;to_mp3" + "Program_Inference;proginf" + "LogAnalysis1;nginx" + "LogAnalysis2;pcap" + "Genomics_Computation;genomics" + "AurPkg;pacaur" + "FileEnc1;compress_files" + "FileEnc2;encrypt_files" + ) + + touch "$seq_times_file" + echo executing dependency_untangling $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + source_var + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} diff --git a/test/failing/run.sh b/test/failing/run.sh new file mode 100755 index 0000000..718a604 --- /dev/null +++ b/test/failing/run.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# time: print real in seconds, to simplify parsing +TIMEFORMAT="%3R" # %3U %3S" + +if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 +fi + +eval_dir="$PASH_TOP/evaluation/benchmarks/runtime-overhead/" + +bash_outputs_suffix="bash.out" +par_outputs_suffix="par.out" +outputs_dir="$eval_dir/outputs" +pash_logs_dir="$eval_dir/pash_logs" + +mkdir -p "$outputs_dir" +mkdir -p "$pash_logs_dir" + +times_file="$eval_dir/time.res" + +script_name="for-echo" +script="${script_name}.sh" + +# The number of loop iterations +export N=100 + +printf -v pad %40s + +## Bash +bash_outputs_file="${outputs_dir}/${script_name}.${bash_outputs_suffix}" +config="bash:${pad}" +config=${config:0:40} +echo "${config}" $({ time bash ${script} > "$bash_outputs_file" ; } 2>&1) | tee "$times_file" + +run_pash() +{ + local config="$1" + local PASH_FLAGS="$2" + config_padded="$config:${pad}" + config_padded=${config_padded:0:40} + par_outputs_file="${outputs_dir}/${script_name}.${config}.${par_outputs_suffix}" + pash_log="${pash_logs_dir}/${script_name}.${config}.pash.log" + + ## We don't want -d 1 since it adds overhead! + echo "${config_padded}" $({ time "$PASH_TOP/pa.sh" $PASH_FLAGS --log_file "${pash_log}" ${script} > "$par_outputs_file"; } 2>&1) | tee -a "$times_file" + diff -q "$bash_outputs_file" "$par_outputs_file" +} + +config="PaSh_no_daemon" +PASH_FLAGS="--no_daemon" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_bash_mirror" +PASH_FLAGS="--expand_using_bash_mirror" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon" +PASH_FLAGS="" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_fifos" +PASH_FLAGS="--daemon_communicates_through_unix_pipes" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_par_pipelines" +PASH_FLAGS="--parallel_pipelines" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_par_pipelines_fifos" +PASH_FLAGS="--parallel_pipelines --daemon_communicates_through_unix_pipes" + +run_pash "$config" "$PASH_FLAGS" diff --git a/test/failing/run_alias.sh b/test/failing/run_alias.sh new file mode 100644 index 0000000..7e2b037 --- /dev/null +++ b/test/failing/run_alias.sh @@ -0,0 +1,25 @@ +# parses the generated.file, and creates a log of the commands that were executed +# successfully (succ.txt) and the failed ones (err.txt) + +cd $PASH_TOP/evaluation/scripts/input/ +# we could read the file iteratively with IFS, but the environment was affected +IFS=$'\r\n' GLOBIGNORE='*' command eval 'cmd_array=($(cat generated.file))' +lc=$(cat generated.file | wc -l) +for i in $(seq 0 $lc) +do + # get the entry from the array + p=${cmd_array[$i]} + # add a timeout to our script + timeout --signal=SIGINT 50s /bin/bash -e $p >> /dev/null 2>&1 #./cmd.sh #eval "bash ./cmd.sh" + ## get status ## + status=$? + if [ $status -eq 0 ]; then + echo $p >> $PASH_TOP/evaluation/scripts/input/succ.txt + else + echo $p >> $PASH_TOP/evaluation/scripts/input/err.txt + fi + if ! ((i % 100)); then + echo $i + fi +done +echo "Done" diff --git a/test/failing/run_all.sh b/test/failing/run_all.sh new file mode 100755 index 0000000..d6d26f0 --- /dev/null +++ b/test/failing/run_all.sh @@ -0,0 +1,109 @@ +#!/bin/bash +RES_FOLDER=${PWD}/eval_results/run +# go to benchmark directory +cd ${PASH_TOP}/evaluation/benchmarks +# use the small input for the benchmarks +setup_flags='--small' +if [ "$1" = "--full" ]; then + setup_flags="--full" + echo "Using full input" +elif [ "$1" = "--small" ] || [ "$#" -eq "0" ]; then + echo "Using small input" +fi + +# run all the scripts using bash +run_bash() { + ## This script is necessary to ensure that sourcing happens with bash + source run.seq.sh + bench_len=$((${#PASH_BENCHMARK[@]} -1)) + array_len=$((${#PASH_ALL_FLAGS[@]} -1)) + for i in $(seq 0 $bench_len) + do + export IN= + export IN_PRE= + bench=${PASH_BENCHMARK[$i]} + echo 'Running bash:' ${bench} + bdir=${RES_FOLDER}/bash/${bench} + mkdir -p ${bdir} + # run the benchmark + ${bench} ${setup_flags} + # copy the time file + mv ${bench}/seq.res ${bdir}/ + done +} + +# run all the scripts using different configurations of PaSh JIT/PaSh AOT +run_bench() { + ## This script is necessary to ensure that sourcing happens with bash + source run.par.sh + bench_len=$((${#PASH_BENCHMARK[@]} -1)) + array_len=$((${#PASH_ALL_FLAGS[@]} -1)) + for i in $(seq 0 $bench_len) + do + bench=${PASH_BENCHMARK[$i]} + # remove all the time files + for j in $(seq 0 $array_len) + do + export IN= + export IN_PRE= + export mode=${PASH_MODE[$j]} + export PASH_FLAGS=${PASH_ALL_FLAGS[$j]} + pdir=${RES_FOLDER}/${mode}/${bench} + ${bench}_pash ${setup_flags} + mkdir -p ${pdir} + # move the folder to our dest + rm -rf ${bench}/outputs + # copy the time file + mv ${bench}/par.res ${pdir}/ + done + done +} + +run_all_benchmarks() { + # generate output folder for each run + export RES_FOLDER=$1 + # clean previous runs + rm -rf ${RES_FOLDER} + mkdir -p ${RES_FOLDER} + cd ${PASH_TOP}/evaluation/benchmarks + # remove all res files from previous runs + find . -type d -name "outputs" 2> /dev/null | xargs rm -rf + # do not remove any input from the node_modules dataset + find . -type d -not -path "*/node_modules/*" -name "output" 2> /dev/null | xargs rm -rf + find . -type d -name "pash_logs" 2> /dev/null | xargs rm -rf + find . -type f -name "*.res" 2> /dev/null | xargs rm -f + # start preparing from execution + export PASH_ALL_FLAGS=(" " + "--r_split --dgsh_tee --r_split_batch_size 1000000 --parallel_pipelines --profile_driven") + export PASH_BENCHMARK=("oneliners" "unix50" "analytics-mts" "nlp" "max-temp" "web-index" "dependency_untangling") + export PASH_MODE=("pash_aot" + "pash_jit") + + echo 'Running all bash benchmarks' + time run_bash + echo 'Running PaSh JIT/PaSh AOT benchmarks' + time run_bench + + ##### Figure 6 + export PASH_ALL_FLAGS=("--r_split --dgsh_tee --r_split_batch_size 1000000" + "--r_split --dgsh_tee --r_split_batch_size 1000000 --parallel_pipelines" ) + export PASH_BENCHMARK=("nlp" "max-temp" "dependency_untangling") + export PASH_MODE=("pash_jit_no_prof_no_du" + "pash_jit_no_prof") + + time run_bench + + ##### Figure 7 + export PASH_ALL_FLAGS=( + #"--dgsh_tee # omitted until it's fixed + "--parallel_pipelines --profile_driven" ) + export PASH_BENCHMARK=("oneliners" "unix50" "analytics-mts" "max-temp" "web-index") + export PASH_MODE=("pash_jit_no_comm") + + time run_bench + + # kill the hanging processes + pkill -f cat +} +# run all the tests and store the results $RES_FOLDER +run_all_benchmarks ${RES_FOLDER} diff --git a/test/failing/run_all_benchmarks_ci.sh b/test/failing/run_all_benchmarks_ci.sh new file mode 100755 index 0000000..99d7f8f --- /dev/null +++ b/test/failing/run_all_benchmarks_ci.sh @@ -0,0 +1,88 @@ +#!/bin/bash +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} +## This script is necessary to ensure that sourcing happens with bash +source run.seq.sh +source run.par.sh +# total tests +total=0 +# number of tests that passed +passed=0 +compare_outputs(){ + dir=$1 + outputs=$(ls $dir | grep "seq" | sed 's/.seq.out$//') + for out in $outputs; + do + seq_output="${dir}/${out}.seq.out" + pash_output="${dir}/${out}.par.out" + res=$(diff -q "$seq_output" "$pash_output") + if [[ "${res}" -eq "" ]]; then + passed=$((passed + 1)) + fi + total=$((total + 1)) + done +} + +EXPERIMENTAL=1 +if [ "$EXPERIMENTAL" -eq 1 ]; then + configurations=( + # "" # Commenting this out since the tests take a lot of time to finish + "--r_split" + "--dgsh_tee" + "--r_split --dgsh_tee" + # "--speculation quick_abort" + ) +else + configurations=( + "" + ) +fi + + +n_inputs=( + 2 + 8 + 16 +) +# Array to store all the execution results +EXEC=() +# cleanup +rm -f $1/*.res +# run bash +$1 > /dev/null +# execute the bash script and fetch the script_name, time +b=$(cat $1/seq.res | awk '{if (NR>2) {print $1","$2}}' | sed 's\.sh:\\g') +labels="group,Bash" +for conf in "${configurations[@]}"; do + for n_in in "${n_inputs[@]}"; do + # cleanup all the files generated by pash + trash=$(find /tmp/ -group dkarnikis | grep sg | xargs -n1 rm -f 2> /dev/null) + # on each run, clean all the res files + rm -f $1/par.res + # re-export the new config + export PASH_FLAGS="${conf} -w ${n_in}" + # append the new labels for the plot + labels="${labels},${conf}_${n_in}" + # execute the pash with the new config + $1_pash > /dev/null + res=$(awk '{if (NR>2) {print $2}}' $1/par.res) + # store the results + EXEC+=("${res}") + done +done +# concat all the results and merge them to create the final data for plotting +labels=$(echo $labels | sed 's\--\\g' | sed -e 's/ /_/g') +res="$b" +for i in "${EXEC[@]}" +do + res=$(paste -d'@' <(echo "$res") <(echo "$i")) +done +# write the labels to the file +echo "$labels" > results.time +# write the data formatted +echo -e "$res" | sed 's\@\,\g' >> results.time +# compare the results +compare_outputs "$1/outputs" +# this is going to be written on the UI output log +cat results.time +# this is going to be written on the UI output log / CLI output +echo "Summary: ${passed}/${total} tests passed." diff --git a/test/failing/run_parser_on_scripts.sh b/test/failing/run_parser_on_scripts.sh new file mode 100755 index 0000000..42996ee --- /dev/null +++ b/test/failing/run_parser_on_scripts.sh @@ -0,0 +1,10 @@ +#! /bin/bash + +SCRIPTS_DIR="../scripts/" + +for script in "$SCRIPTS_DIR"*.sh +do + echo "Parsing $script..." + output=${script/"scripts"/"scripts/json"}.json + ./parse_to_json.native "$script" > "$output" +done diff --git a/test/failing/safe6.sh b/test/failing/safe6.sh new file mode 100644 index 0000000..6518f90 --- /dev/null +++ b/test/failing/safe6.sh @@ -0,0 +1 @@ +x=5 ; { x=6 ; echo $x; } | { x=7; echo $x; } diff --git a/test/failing/setup-pash.sh b/test/failing/setup-pash.sh new file mode 100755 index 0000000..7194507 --- /dev/null +++ b/test/failing/setup-pash.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash + +set -e + +cd "$(dirname "$0")" +# check the git status of the project +if git rev-parse --git-dir > /dev/null 2>&1; then + # we have cloned from the git repo, so all the .git related files/metadata are available + git submodule init + git submodule update + # set PASH_TOP + PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +else + # set PASH_TOP to the root folder of the project if it is not available + PASH_TOP=${PASH_TOP:-$PWD/..} + # remove previous installation if it exists + rm -rf $PASH_TOP/compiler/parser/libdash + # we are in package mode, no .git information is available + git clone https://github.com/angelhof/libdash/ $PASH_TOP/compiler/parser/libdash +fi +cd $PASH_TOP +. "$PASH_TOP/scripts/utils.sh" +read_cmd_args $@ + +LOG_DIR=$PASH_TOP/install_logs +mkdir -p $LOG_DIR +PYTHON_PKG_DIR=$PASH_TOP/python_pkgs +# remove the folder in case it exists +rm -rf $PYTHON_PKG_DIR +# create the new folder +mkdir -p $PYTHON_PKG_DIR + +echo "Building parser..." +cd compiler/parser + +if type lsb_release >/dev/null 2>&1 ; then + distro=$(lsb_release -i -s) +elif [ -e /etc/os-release ] ; then + distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) +fi + +echo "|-- making libdash..." +# convert to lowercase +distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') +# save distro in the init file +echo "export distro=$distro" > ~/.pash_init +# now do different things depending on distro +case "$distro" in + freebsd*) + gsed -i 's/ make/ gmake/g' Makefile + gmake libdash &> $LOG_DIR/make_libdash.log + echo "Building runtime..." + # Build runtime tools: eager, split + cd ../../runtime/ + gmake &> $LOG_DIR/make.log + ;; + *) + make libdash &> $LOG_DIR/make_libdash.log + echo "Building runtime..." + # Build runtime tools: eager, split + cd ../../runtime/ + make &> $LOG_DIR/make.log + if [ -f /.dockerenv ]; then + # issue with docker only + python3 -m pip install -U --force-reinstall pip + cp "$PASH_TOP"/pa.sh /usr/bin/ + fi + ;; +esac + +## This was the old parser installation that required opam. +# # Build the parser (requires libtool, m4, automake, opam) +# echo "Building parser..." +# eval $(opam config env) +# cd compiler/parser +# echo "|-- installing opam dependencies..." +# make opam-dependencies &> $LOG_DIR/make_opam_dependencies.log +# echo "|-- making libdash... (requires sudo)" +# ## TODO: How can we get rid of that `sudo make install` in here? +# make libdash &> $LOG_DIR/make_libdash.log +# make libdash-ocaml &>> $LOG_DIR/make_libdash.log +# echo "|-- making parser..." +# make &> $LOG_DIR/make.log +# cd ../../ + +cd ../ + +echo "Installing python dependencies..." + +python3 -m pip install jsonpickle --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_jsonpickle.log +python3 -m pip install pexpect --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_pexpect.log +python3 -m pip install graphviz --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_graphviz.log +python3 -m pip install numpy --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_numpy.log +python3 -m pip install matplotlib --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_matplotlib.log + +# clean the python packages +cd $PYTHON_PKG_DIR +# can we find a better alternative to that +pkg_path=$(find . \( -name "site-packages" -or -name "dist-packages" \) -type d) +mv ${pkg_path}/* ${PYTHON_PKG_DIR}/ + +echo "Generating input files..." +$PASH_TOP/evaluation/tests/input/setup.sh + +# export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" +echo " * * * " +echo "Do not forget to export PASH_TOP before using pash: \`export PASH_TOP=$PASH_TOP\`" +echo '(optionally, you can update PATH to include it: `export PATH=$PATH:$PASH_TOP`)' +echo " * * * " +# in case we are running on docker or CI, installation is complete at this moment +if [[ -f /.dockerenv || -f /.githubenv ]]; then + exit 0 +fi +## append PASH Configuration paths to the respective rc files +rc_configs=(~/.shrc ~/.bashrc ~/.zshrc ~/.cshrc ~/.kshrc) # add more shell configs here +for config in "${rc_configs[@]}" +do + ## if the config exists + ## check if it contains an old entry of Pash + if [ -e "$config" ]; then + # get the shell name + shell_name=$(echo $(basename $config) | sed 's/rc//g' | sed 's/\.//g') + echo "Do you want to append \$PASH_TOP to $shell_name ($config) (y/n)?" + read answer + if [ "$answer" != "${answer#[Yy]}" ] ;then + tmpfile=$(mktemp -u /tmp/tmp.XXXXXX) + # create a backup of the shell config + cp $config ${config}.backup + # remove all the entries pointing to PASH_TOP and PATH + grep -ve "export PASH_TOP" $config > $tmpfile + mv $tmpfile $config + path_ans=0 + # check if PATH contains PASH_TOP reference + # we need to store it in a variable otherwise is messes up with the + # existing environment + var=$(grep -e "export PATH" $config | grep -e '$PASH_TOP') || path_ans=$? + # if the return code is 0 -> there is a reference of $PASH_TOP in + # PATH, remove it + if [ "$path_ans" == 0 ]; then + # remove previous references to PASH_TOP from PATH + grep -v 'export PATH=$PATH:$PASH_TOP' $config > $tmpfile + mv $tmpfile $config + fi + ## there isn't a previous Pash installation, append the configuration + echo "export PASH_TOP="$PASH_TOP >> $config + echo 'export PATH=$PATH:$PASH_TOP' >> $config + fi + fi +done + +# running simple test that everything installed fine +$PASH_TOP/pa.sh -c 'echo PaSh installation complete!' diff --git a/test/failing/sieve.sh b/test/failing/sieve.sh new file mode 100755 index 0000000..5781bcf --- /dev/null +++ b/test/failing/sieve.sh @@ -0,0 +1,20 @@ +#!/bin/bash + + +# Doug McIlroy's implementation of Sieve of Eratosthenes + +# A combination of: +# https://swtch.com/~rsc/thread/ +# https://stackoverflow.com/questions/14927895/sieve-of-eratosthenes-unix-script + +OUT=./output/out.txt + +limit=10000 +sieve="$(seq 2 $limit | sort)" + +for n in 2 $(seq 3 2 $limit) +do + sieve="$(comm -23 <(echo "$sieve") <(seq $(($n * $n)) $n $limit|sort))" +done + +echo "$sieve" | sort -n > $OUT diff --git a/test/failing/split-unix50.sh b/test/failing/split-unix50.sh new file mode 100755 index 0000000..a0afe14 --- /dev/null +++ b/test/failing/split-unix50.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +awk -v RS= '{print > (NR ".txt")}' unix50.sh + +for file in *.txt; do + fname=$(basename -- "$file") + fscript="${fname%.*}".sh + echo $fscript + echo '#!/bin/bash' > $fscript + + echo 'export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input}' >> $fscript + input=$(grep -o 'IN..' $file) + grep "^$(echo $input | xargs)=" unix50.sh >> $fscript + cat $file >> $fscript + echo '' >> $fscript +done + diff --git a/test/failing/split_pipe.sh b/test/failing/split_pipe.sh new file mode 100644 index 0000000..aebef9d --- /dev/null +++ b/test/failing/split_pipe.sh @@ -0,0 +1,11 @@ +BATCH_SIZE=$1 +VIRTUAL_DIR=$2 +OUTPUT1=$3 +OUTPUT2=$4 + +tee >( + head -n "$BATCH_SIZE" > "${VIRTUAL_DIR}/${OUTPUT1}"; + "$PASH_TOP"/evaluation/tools/drain_stream.sh & + cat "${VIRTUAL_DIR}/${OUTPUT1}" > "${OUTPUT1}") | + ( tail -n $((BATCH_SIZE+1)) > "${OUTPUT2}"; + "$PASH_TOP"/evaluation/tools/drain_stream.sh) diff --git a/test/failing/sq.sh b/test/failing/sq.sh new file mode 100755 index 0000000..bce2a72 --- /dev/null +++ b/test/failing/sq.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Clever trick that uses the /dev/fd/xx pseudo-file system +# https://stackoverflow.com/questions/40244/how-to-make-a-pipe-loop-in-bash + +# MMG 2022-06-30 the `function` kw is a bash-ism; leaving it in to not disrupt what gets optimized in previous evaluations +function calc() { + # calculate sum of squares of numbers 0,..,10 + + sum=0 + for ((i=0; i<10; i++)); do + echo $i # "request" the square of i + + read ii # read the square of i + echo "got $ii" >&2 # debug message + + let sum=$sum+$ii + done + + echo "sum $sum" >&2 # output result to stderr +} + +function square() { + # square numbers + + read j # receive first "request" + while [ "$j" != "" ]; do + let jj=$j*$j + echo "square($j) = $jj" >&2 # debug message + + echo $jj # send square + + read j # receive next "request" + done +} + +read | { calc | square; } >/dev/fd/0 diff --git a/test/failing/statistics.sh b/test/failing/statistics.sh new file mode 100755 index 0000000..4bf6faf --- /dev/null +++ b/test/failing/statistics.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# This classification is wrt to their operation, not its input---i.e., whether +# input contains the use of fs identifiers (identifiers can be fs or not fs) + +# We need to think about how to translate DFS commands +# What is a distributed fs? Directories are simply keys? + +# everything else (i.e., side-effectful) just needs to be converted to location independent commands + +p="../c_stats/" +A=${1:-${p}posix.txt} +B=${2:-${p}coreutils.txt} + +# Take commands that are shared and use existing distributability descriptions +comm -12 <(cat $A | grep 'Mandatory' | cut -d ' ' -f 1 | sort ) <( cut -d ' ' -f 1 $B | sort) | + sed s/^/\^/ | + xargs -n 1 -I {} grep -w {} ./coreutils.txt | + sort -b -k2,2 -k1,1 # > posix_mandatory1.txt # commenting out this redirection will overwrite! + +# Analyze mandatory commands not in the second, and not built-ins +comm -23 <(cat $A | grep 'Mandatory' | cut -d ' ' -f 1 | sort ) <( cut -d ' ' -f 1 $B | sort) | + comm -23 - <(cat ../c_stats/builtins.txt | sed 's/ */ /g' | cut -d ' ' -f 1 | sort) | + sed s/^/\^/ | + xargs -n 1 -I {} grep -w {} $A | + sed s/Mandatory// | + sort -b -k2,2 -k1,1 # > posix_mandatory2.txt # commenting out this redirection will overwrite! diff --git a/test/failing/superoptimize.sh b/test/failing/superoptimize.sh new file mode 100755 index 0000000..dc31c91 --- /dev/null +++ b/test/failing/superoptimize.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +echo "Superotmizer run!" + diff --git a/test/failing/test-bsd.sh b/test/failing/test-bsd.sh new file mode 100755 index 0000000..6ed04a9 --- /dev/null +++ b/test/failing/test-bsd.sh @@ -0,0 +1,53 @@ +./test-common.sh grep "'\.'" ../bin/grep +./test-common.sh grep "'[A-Z]'" ../bin/grep +./test-common.sh grep "'x'" ../bin/grep +./test-common.sh grep "'Bell'" ../bin/grep +./test-common.sh grep "-c '^[A-Z]'" ../bin/grep +./test-common.sh grep "-c '^....$'" ../bin/grep +./test-common.sh grep "gz" ../bin/grep +./test-common.sh grep "1969" ../bin/grep +./test-common.sh grep "-vi '[aeiou]'" ../bin/grep +./test-common.sh grep "-vc 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-v '^0$'" ../bin/grep +./test-common.sh grep "-v '[KQRBN]'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*$'" ../bin/grep +./test-common.sh grep "-c 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-c 'light.\*light'" ../bin/grep +./test-common.sh grep "'print'" ../bin/grep +./test-common.sh grep "'light.\*light'" ../bin/grep +./test-common.sh grep "'\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4'" ../bin/grep +./test-common.sh grep "'[KQRBN]'" ../bin/grep +./test-common.sh grep "'UNIX'" ../bin/grep +./test-common.sh grep "'AT&T'" ../bin/grep + +./test-common.sh tr "'[a-z]' '\n'" ../bin/tr +./test-common.sh tr "A-Z a-z" ../bin/tr +./test-common.sh tr "-cud A-Z" ../bin/tr +./test-common.sh tr "-c '[A-Z]' '\n'" ../bin/tr +./test-common.sh tr "-d '\n'" ../bin/tr + +./test-common.sh wc "" ../bin/wc +./test-common.sh wc "-l" ../bin/wc +./test-common.sh wc "-w" ../bin/wc +./test-common.sh wc "-c" ../bin/wc +./test-common.sh wc "-m" ../bin/wc +./test-common.sh wc "-L" ../bin/wc +./test-common.sh wc "-lcm" ../bin/wc +./test-common.sh wc "-mlw" ../bin/wc +./test-common.sh wc "-mLc" ../bin/wc +./test-common.sh wc "-L -mc -w" ../bin/wc + +./test-common.sh uniq "" ../bin/uniq +./test-common.sh uniq "-c" ../bin/uniq +./test-common.sh uniq "--count" ../bin/uniq + +# These tests are run during PASH_TOP/scripts/run_tests.sh +# Make sure to build the aggregators using PASH_TOP/scripts/setup-pash.sh first +# +# More tests can be added like this: +# ./test-common.sh cmd args agg +# where +# cmd - is a shell command like uniq +# args - are arguements like -c +# agg - is an aggregator like ./uniq-c diff --git a/test/failing/test-exclam.sh b/test/failing/test-exclam.sh new file mode 100755 index 0000000..8fb0eee --- /dev/null +++ b/test/failing/test-exclam.sh @@ -0,0 +1,3 @@ +#!/bin/sh +echo "!" + diff --git a/test/failing/test-linux.sh b/test/failing/test-linux.sh new file mode 100755 index 0000000..f7bd59b --- /dev/null +++ b/test/failing/test-linux.sh @@ -0,0 +1,57 @@ +./test-common.sh grep "'\.'" ../bin/grep +./test-common.sh grep "'[A-Z]'" ../bin/grep +./test-common.sh grep "'x'" ../bin/grep +./test-common.sh grep "'Bell'" ../bin/grep +./test-common.sh grep "-c '^[A-Z]'" ../bin/grep +./test-common.sh grep "-c '^....$'" ../bin/grep +./test-common.sh grep "gz" ../bin/grep +./test-common.sh grep "1969" ../bin/grep +./test-common.sh grep "-vi '[aeiou]'" ../bin/grep +./test-common.sh grep "-vc 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-v '^0$'" ../bin/grep +./test-common.sh grep "-v '[KQRBN]'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*$'" ../bin/grep +./test-common.sh grep "-c 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-c 'light.\*light'" ../bin/grep +./test-common.sh grep "'print'" ../bin/grep +./test-common.sh grep "'light.\*light'" ../bin/grep +./test-common.sh grep "'\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4'" ../bin/grep +./test-common.sh grep "'[KQRBN]'" ../bin/grep +./test-common.sh grep "'UNIX'" ../bin/grep +./test-common.sh grep "'AT&T'" ../bin/grep + +./test-common.sh tr "'[a-z]' '\n'" ../bin/tr +./test-common.sh tr "A-Z a-z" ../bin/tr +./test-common.sh tr "-c '[A-Z]' '\n'" ../bin/tr +./test-common.sh tr "--complement '[1-9]\n*' '[a-z][A-Z]" ../bin/tr +./test-common.sh tr "--complement -t '[1-9]\n*' '[a-z][A-Z]" ../bin/tr +./test-common.sh tr "-d '\n'" ../bin/tr +./test-common.sh tr "-tcd '[1-9][a-z][A-Z]\n'" ../bin/tr + +./test-common.sh wc "" ../bin/wc +./test-common.sh wc "-l" ../bin/wc +./test-common.sh wc "-w" ../bin/wc +./test-common.sh wc "-c" ../bin/wc +./test-common.sh wc "-m" ../bin/wc +./test-common.sh wc "-L" ../bin/wc +./test-common.sh wc "-lcm" ../bin/wc +./test-common.sh wc "-mlw" ../bin/wc +./test-common.sh wc "-mLc" ../bin/wc +./test-common.sh wc "-L -mc -w" ../bin/wc +./test-common.sh wc "--bytes -c --chars -L" ../bin/wc +./test-common.sh wc "-L --lines --words" ../bin/wc + +./test-common.sh uniq "" ../bin/uniq +./test-common.sh uniq "-c" ../bin/uniq +./test-common.sh uniq "--count" ../bin/uniq + +# These tests are run during PASH_TOP/scripts/run_tests.sh +# Make sure to build the aggregators using PASH_TOP/scripts/setup-pash.sh first +# +# More tests can be added like this: +# ./test-common.sh cmd args agg +# where +# cmd - is a shell command like uniq +# args - are arguements like -c +# agg - is an aggregator like ./uniq-c diff --git a/test/failing/test-shlex-aux.sh b/test/failing/test-shlex-aux.sh new file mode 100644 index 0000000..50857ed --- /dev/null +++ b/test/failing/test-shlex-aux.sh @@ -0,0 +1,9 @@ +comment_fun() +{ + cat > /dev/null #Consume data from pipe so writers don't get SIGPIPE +} + +bad_quote_fun() +{ + echo ${asf"asd} +} diff --git a/test/failing/test_JSON_to_shell2.sh b/test/failing/test_JSON_to_shell2.sh new file mode 100644 index 0000000..45c99f9 --- /dev/null +++ b/test/failing/test_JSON_to_shell2.sh @@ -0,0 +1,62 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=/pash/compiler/parser/parse_to_json.native +JSON_TO_SHELL_OCAML=/pash/compiler/parser/json_to_shell.native +JSON_TO_SHELL_C=./json_to_shell2 + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json.$$ +if [ $? -ne 0 ] +then + echo "INVALID_INPUT_1: '$testFile' | Unable to run '$SHELL_TO_JSON_OCAML' on '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "INVALID_INPUT_2: '$testFile' | Unable to run '$JSON_TO_SHELL_OCAML' on '/tmp/json.$$'" + exit 1 +fi + +"$JSON_TO_SHELL_C" < /tmp/json.$$ > /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + echo "ABORT: '$testFile' | Unable to run '$JSON_TO_SHELL_C' on '/tmp/json.$$'" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_c.$$ + if [ $? -ne 0 ] + then + echo "FAIL: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + else + echo "FAIL_WHITESPACE: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" diff --git a/test/failing/test_ast2shell_py.sh b/test/failing/test_ast2shell_py.sh new file mode 100644 index 0000000..a765aec --- /dev/null +++ b/test/failing/test_ast2shell_py.sh @@ -0,0 +1,66 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=/pash/compiler/parser/parse_to_json.native +JSON_TO_SHELL_OCAML=/pash/compiler/parser/json_to_shell.native + +RT_PY="rt.py" + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" + exit 1 +fi + +# python3 "$RT_PY" < "$testFile" > /tmp/rt_py.$$ +python3 "$RT_PY" "$testFile" > /tmp/rt_py.$$ +if [ $? -ne 0 ] +then + echo "ABORT: '$testFile'" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_py.$$ > /dev/null +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_py.$$ > /dev/null + if [ $? -ne 0 ] + then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_py.$$ + echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" + else + diff /tmp/rt_ocaml.$$ /tmp/rt_py.$$ + echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" diff --git a/test/failing/test_evaluation_scripts.sh b/test/failing/test_evaluation_scripts.sh new file mode 100755 index 0000000..18fa408 --- /dev/null +++ b/test/failing/test_evaluation_scripts.sh @@ -0,0 +1,233 @@ +#!/bin/bash +# time: print real in seconds, to simplify parsing +## Necessary to set PASH_TOP +cd $(dirname $0) +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} +export DEBUG=0 +export PASH_LOG=1 +# export DEBUG=1 # Uncomment to print pash output +## Determines whether the experimental pash flags will be tested. +## By default they are not. +export EXPERIMENTAL=0 +for item in $@ +do + if [ "--debug" == "$item" ] || [ "-d" == "$item" ]; then + export DEBUG=1 + fi + if [ "--no-pash-log" == "$item" ]; then + export PASH_LOG=0 + fi + if [ "--experimental" == "$item" ]; then + export EXPERIMENTAL=1 + fi +done + +microbenchmarks_dir="${PASH_TOP}/evaluation/tests" +intermediary_dir="${PASH_TOP}/evaluation/tests/test_intermediary" +test_results_dir="${PASH_TOP}/evaluation/tests/results" +results_time="$test_results_dir/results.time" +results_time_bash=${results_time}_bash +results_time_pash=${results_time}_pash + +echo "Deleting eager intermediate files..." +rm -rf "$test_results_dir" +rm -rf "$intermediary_dir" +mkdir -p $intermediary_dir +mkdir -p "$test_results_dir" + +echo "Generating inputs..." +cd "$microbenchmarks_dir/input" +./setup.sh +cd - + +n_inputs=( + 2 + 8 +) + +if [ "$EXPERIMENTAL" -eq 1 ]; then + configurations=( + # "" # Commenting this out since the tests take a lot of time to finish + "--r_split" + "--dgsh_tee" + # "--r_split --dgsh_tee" + # "--speculation quick_abort" + "--parallel_pipelines" + ) +else + configurations=( + "--r_split --dgsh_tee --parallel_pipelines --profile_driven" + ) +fi + + +## Tests where the compiler will not always succeed (e.g. because they have mkfifo) +script_microbenchmarks=( + diff # (quick-abort) BUG: Might have to do with the named pipes, and the fact that they are reused for parallel and sequential script. + set-diff # TODO: Handle redirection after reduce + export_var_script # Tests whether exported variables in the scripts that are processed by PaSh runtime are visible to the rest of the script. + comm-par-test # Small comm test to ensure non-parallelizability + comm-par-test2 # Small comm test with input redirection and hyphen + tee_web_index_bug # Tests a tee bug from web index + fun-def # Tests whether PaSh can handle a simple function definition + bigrams # One-liner + spell-grep # Spell variant with `grep -f` instead of `comm` +) + +pipeline_microbenchmarks=( + grep # One-liner + minimal_sort # One-liner + minimal_grep # One-liner + topn # One-liner + wf # One-liner + spell # One-liner + shortest_scripts # One-liner + alt_bigrams # One-liner + deadlock_test # Test to check deadlock prevention using drain_stream + double_sort # Checks maximum peformance gains from split + no_in_script # Tests whether a script can be executed by our infrastructure without having its input in a file called $IN + for_loop_simple # Tests whether PaSh can handle a for loop where the body is parallelizable + minimal_grep_stdin # Tests whether PaSh can handle a script that reads from stdin + micro_10 # A small version of the pipeline above for debugging. + sed-test # Tests all sed occurences in our evaluation to make sure that they work + tr-test # Tests all possible behaviors of tr that exist in our evaluation + grep-test # Tests some interesting grep invocations + ann-agg # Tests custom aggregators in annotations + # # # # micro_1000 # Not being run anymore, as it is very slow. Tests whether the compiler is fast enough. It is a huge pipeline without any computation. +) + + + +execute_pash_and_check_diff() { + TIMEFORMAT="%3R" # %3U %3S" + if [ "$DEBUG" -eq 1 ]; then + { time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2> >(tee -a "${pash_time}" >&2) && + diff -s "$seq_output" "$pash_output" | head | tee -a "${pash_time}" >&2 + else + + { time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2>> "${pash_time}" && + b=$(cat "$pash_time"); + test_diff_ec=$(cmp -s "$seq_output" "$pash_output" && echo 0 || echo 1) + # differ + script=$(basename $script_to_execute) + if [ $test_diff_ec -ne 0 ]; then + c=$(diff -s "$seq_output" "$pash_output" | head) + echo "$c$b" > "${pash_time}" + echo "$script are not identical" >> $test_results_dir/result_status + else + echo "Files $seq_output and $pash_output are identical" > "${pash_time}" + echo "$script are identical" >> $test_results_dir/result_status + fi + + fi +} + +execute_tests() { + assert_correctness="$1" + microbenchmarks=("${@:2}") + + microbenchmark_configs=( ) + for i in "${!microbenchmarks[@]}"; do + all_flags=${test_flags[@]} + microbenchmark_configs[$i]="${microbenchmarks[$i]};${all_flags// /;}" + done + + ## This is almost the same loop as the one in execute_evaluation_scripts + for microbenchmark_config in "${microbenchmark_configs[@]}"; do + IFS=";" read -r -a flags <<< "${microbenchmark_config}" + microbenchmark=${flags[0]} + echo "Executing test: $microbenchmark" + # Execute the sequential script on the first run only + + prefix="${microbenchmarks_dir}/${microbenchmark}" + + export seq_output="${intermediary_dir}/${microbenchmark}_seq_output" + seq_time="$test_results_dir/${microbenchmark}_seq.time" + + export script_to_execute="${prefix}.sh" + env_file="${prefix}_env_test.sh" + funs_file="${prefix}_funs.sh" + input_file="${prefix}_test.in" + + if [ -f "$env_file" ]; then + . $env_file + vars_to_export=$(cut -d= -f1 $env_file) + if [ ! -z "$vars_to_export" ]; then + export $vars_to_export + fi + else + echo "|-- Does not have env file" + fi + + ## Export necessary functions + if [ -f "$funs_file" ]; then + source $funs_file + fi + + ## Redirect the input if there is an input file + stdin_redir="/dev/null" + if [ -f "$input_file" ]; then + stdin_redir="$(cat "$input_file")" + echo "|-- Has input file: $stdin_redir" + fi + + TIMEFORMAT="${microbenchmark%%.*}:%3R" # %3U %3S" + echo -n "|-- Executing the script with bash..." + { time /bin/bash "$script_to_execute" > $seq_output ; } \ + < "$stdin_redir" 2>> "${seq_time}" + echo " exited with $?" + tail -n1 ${seq_time} >> ${results_time_bash} + for conf in "${configurations[@]}"; do + for n_in in "${n_inputs[@]}"; do + echo "|-- Executing with pash --width ${n_in} ${conf}..." + export pash_time="${test_results_dir}/${microbenchmark}_${n_in}_distr_$(echo ${conf} | tr -d ' ').time" + export pash_output="${intermediary_dir}/${microbenchmark}_${n_in}_pash_output" + export script_conf=${microbenchmark}_${n_in} + echo '' > "${pash_time}" + # do we need to write the PaSh output ? + cat $stdin_redir | + execute_pash_and_check_diff -d $PASH_LOG $assert_correctness ${conf} --width "${n_in}" --output_time $script_to_execute + tail -n1 "${pash_time}" >> "${results_time_pash}_${n_in}" + done + done + done +} + +execute_tests "" "${script_microbenchmarks[@]}" +execute_tests "--assert_compiler_success" "${pipeline_microbenchmarks[@]}" + +#cat ${results_time} | sed 's/,/./' > /tmp/a +#cat /tmp/a | sed 's/@/,/' > ${results_time} + + +if type lsb_release >/dev/null 2>&1 ; then + distro=$(lsb_release -i -s) +elif [ -e /etc/os-release ] ; then + distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) +fi + +distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') +# now do different things depending on distro +case "$distro" in + freebsd*) + # change sed to gsed + sed () { + gsed $@ + } + ;; + *) + ;; +esac + +echo "group,Bash,Pash2,Pash8" > ${results_time} +paste -d'@' $test_results_dir/results.time_* | sed 's\,\.\g' | sed 's\:\,\g' | sed 's\@\,\g' >> ${results_time} + +#echo "Below follow the identical outputs:" +#grep "are identical" "$test_results_dir"/result_status | awk '{print $1}' + +echo "Below follow the non-identical outputs:" +grep "are not identical" "$test_results_dir"/result_status | awk '{print $1}' + +TOTAL_TESTS=$(cat "$test_results_dir"/result_status | wc -l) +PASSED_TESTS=$(grep -c "are identical" "$test_results_dir"/result_status) +echo "Summary: ${PASSED_TESTS}/${TOTAL_TESTS} tests passed." diff --git a/test/failing/test_parse_to_JSON2.sh b/test/failing/test_parse_to_JSON2.sh new file mode 100644 index 0000000..5edc87f --- /dev/null +++ b/test/failing/test_parse_to_JSON2.sh @@ -0,0 +1,75 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=../parse_to_json.native + +PRETTYPRINT_JSON=./prettyprint_json + +SHELL_TO_JSON_C=./parse_to_json2 + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +json_ocaml="/tmp/json_ocaml.$$" +json_ocaml_pretty="/tmp/json_ocaml_pretty.$$" +json_c="/tmp/json_c.$$" + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > "${json_ocaml}" +if [ $? -ne 0 ] +then + echo "INVALID_INPUT: '$testFile' | Unable to run '$SHELL_TO_JSON_OCAML' on '$testFile'" + exit 1 +fi + +"$SHELL_TO_JSON_C" < "$testFile" > "${json_c}" +if [ $? -ne 0 ] +then + echo "ABORT: '$testFile' | Unable to run '$SHELL_TO_JSON_C' on '$testFile'" + exit 1 +fi + + +diff "${json_ocaml}" "${json_c}" > /dev/null +if [ $? -ne 0 ] +then + for f in "${json_ocaml}" "${json_c}" + do + "$PRETTYPRINT_JSON" < "${f}" > "${f}.pretty" + if [ $? -ne 0 ] + then + echo "PRETTYPRINT_FAIL: '$testFile' | Unable to run '$PRETTYPRINT_JSON' on '${f}'" + exit 1 + fi + done + + diff -w "${json_ocaml}.pretty" "${json_c}.pretty" > /dev/null + if [ $? -ne 0 ] + then + diff -w "${json_ocaml}.pretty" "${json_c}.pretty" + echo "FAIL: '$testFile' | ${json_ocaml} ${json_c} ${json_ocaml}.pretty ${json_c}.pretty" + else + diff "${json_ocaml}" "${json_c}" + echo "FAIL_WHITESPACE: '$testFile' | ${json_ocaml} ${json_c} ${json_ocaml}.pretty ${json_c}.pretty" + fi + exit 1 +fi + +echo "PASS: '$testFile' | ${json_ocaml} ${json_c} ${json_ocaml}.pretty ${json_c}.pretty" diff --git a/test/failing/test_rt.sh b/test/failing/test_rt.sh new file mode 100644 index 0000000..81f67f3 --- /dev/null +++ b/test/failing/test_rt.sh @@ -0,0 +1,71 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=../parse_to_json.native +JSON_TO_SHELL_OCAML=../json_to_shell.native + +SHELL_TO_JSON_C=./parse_to_json2 +JSON_TO_SHELL_C=./json_to_shell2 + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" + exit 1 +fi + +"$SHELL_TO_JSON_C" < "$testFile" > /tmp/json_c.$$ +if [ $? -ne 0 ] +then + echo "ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_C" < /tmp/json_c.$$ > /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + echo "ABORT_2: '$testFile' | /tmp/json_c.$$" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_c.$$ + if [ $? -ne 0 ] + then + echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + else + echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" diff --git a/test/failing/test_rt_py.sh b/test/failing/test_rt_py.sh new file mode 100644 index 0000000..a95e0b3 --- /dev/null +++ b/test/failing/test_rt_py.sh @@ -0,0 +1,65 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=../parse_to_json.native +JSON_TO_SHELL_OCAML=../json_to_shell.native + +RT_PYTHON=./ceda_rt.py + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" + exit 1 +fi + +python3 "$RT_PYTHON" < "$testFile" > /tmp/rt_python.$$ +if [ $? -ne 0 ] +then + echo "ABORT_1: '$testFile'" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_python.$$ > /dev/null +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_python.$$ > /dev/null + if [ $? -ne 0 ] + then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_python.$$ + echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" + else + diff /tmp/rt_ocaml.$$ /tmp/rt_python.$$ + echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" diff --git a/test/failing/timing-JSON.sh b/test/failing/timing-JSON.sh new file mode 100644 index 0000000..00485e3 --- /dev/null +++ b/test/failing/timing-JSON.sh @@ -0,0 +1,27 @@ +#!/bin/sh + + +input_script='/pash/compiler/parser/libdash/ltmain.sh' + + +if [ $# -eq 1 ] +then + input_script="$1" +fi + + +echo "Input script: $input_script" +echo + +echo "OCaml:" +time (../parse_to_json.native "$input_script" | tee /tmp/json.$$ | md5sum) +echo + +echo "C:" +time (./parse_to_json2 "$input_script" | tee /tmp/json.$$ | md5sum) +echo + +echo "Python (ROUND-TRIP):" +time (python3 ceda_rt.py "$input_script" | md5sum) +echo + diff --git a/test/failing/timing.sh b/test/failing/timing.sh new file mode 100644 index 0000000..1d79364 --- /dev/null +++ b/test/failing/timing.sh @@ -0,0 +1,31 @@ +#!/bin/sh + + +input_script='/pash/compiler/parser/libdash/ltmain.sh' + + +if [ $# -eq 1 ] +then + input_script="$1" +fi + + +echo "Input script: $input_script" +echo + +echo "OCaml (dash C AST -> libdash OCaml AST -> JSON -> Pash Python AST -> JSON -> shell:" +time (../parse_to_json.native "$input_script" > /tmp/json.$$; cat /tmp/json.$$ | ../json_to_shell.native | md5sum) +echo + +echo "C (dash C AST -> libdash C AST -> JSON -> Pash Python AST -> JSON -> shell):" +time (./parse_to_json2 "$input_script" > /tmp/json.$$ 2>/dev/null; cat /tmp/json.$$ | ./json_to_shell2 | md5sum) +echo + +echo "Python (dash C AST -> libdash C AST -> JSON -> Pash Python AST -> JSON -> shell):" +time (python3 ./parse_to_json2.py "$input_script" > /tmp/json.$$ 2>/dev/null; cat /tmp/json.$$ | python3 ./json_to_shell2.py | md5sum) +echo + +echo "Python (dash C AST -> Pash Python AST -> shell):" +time (python3 ceda_rt.py "$input_script" | md5sum) +echo + diff --git a/test/failing/unzip-1.sh b/test/failing/unzip-1.sh new file mode 100644 index 0000000..df6ae8f --- /dev/null +++ b/test/failing/unzip-1.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# https://gist.github.com/noamross/86fba413e0769069e3955d1c9bc530ae +funzip $1| # uncompress first file in zip +tr -d '\000' | #remove null characters +sed "/^\s*$/d; s/ \{1,\}\t/\t/g; s/\t \{1,\}/\t/g; s/\r//" | #removes empty lines, whitespace around tabs, extra newlines +cut -s -f 1,3,4,5,6,8,12,13,14,15,16,17,18,19,20,21,23,24,25,26,34,35,36,38,40,42,44,45,46,85,86,87,88,89 #| #only select certain columns +pv -N Process -c | +gzip -9 | +pv -N Compress -c > $1.gz diff --git a/test/failing/up.sh b/test/failing/up.sh new file mode 100755 index 0000000..b817491 --- /dev/null +++ b/test/failing/up.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env sh + +# clone and setup pash +# N.b. This is a .sh script + +set -e + +# will install dependencies locally. +PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') +URL='https://github.com/binpash/pash/archive/refs/heads/main.zip' +VERSION='latest' +DL=$(command -v curl >/dev/null 2>&1 && echo curl || echo 'wget -qO-') + +cmd_exists () { + command -v $1 >/dev/null 2>&1 && echo 'true' || echo 'false'; +} + +if [ "$PLATFORM" = "darwin" ]; then + echo 'PaSh is not yet well supported on OS X' + exit 1 +fi + +set +e +git clone git@github.com:binpash/pash.git +if [ $? -ne 0 ]; then + echo 'SSH clone failed; attempting HTTPS' + git clone https://github.com/andromeda/pash.git +fi +set -e + +cd pash/scripts +# git checkout s3 # FIXME only for testing while PR is up + +if [ $(groups $(whoami) | grep -c "sudo\|root\|admin") -ge 1 ]; then + # only run this if we are in the sudo group (or it's doomed to fail) + bash distro-deps.sh +fi +bash setup-pash.sh diff --git a/test/failing/utils.sh b/test/failing/utils.sh new file mode 100755 index 0000000..4c5974b --- /dev/null +++ b/test/failing/utils.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash + +# #Check that we are in the appropriate directory where setup.sh is +# #https://stackoverflow.com/a/246128 +# DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# echo "changing to $DIR to run setup.sh" +# cd $DIR + +# another solution for capturing HTTP status code +# https://superuser.com/a/590170 + +eexit(){ + echo $1 'please email pash-devs@googlegroups.com' + exit 1 +} + +nargs(){ + echo $# $1 $2 +} + +rm-files(){ + echo "${@}" + rm -r "${@}" + exit 0 +} + +append_nl_if_not(){ + ## Adds a newline at the end of a file if it doesn't already end in a newline. + ## Used to prepare inputs for PaSh. + if [ -z "$1" ]; then + echo "No file argument given!" + exit 1 + else + if [ ! -f "$1" ]; then + echo "File $1 doesn't exist!" + exit 1 + else + tail -c 1 "$1" | od -ta | grep -q nl + if [ $? -eq 1 ]; then + echo >> "$1" + fi + fi + fi +} + +install_deps_source_setup() { + # move to the input directory + cd input/ + # check if there are dependencies + if [ -e install-deps.sh ]; then + echo "Installing dependencies" + bash install-deps.sh + fi + # source the setup file + # it contains the fetch dataset function + # and the export variable function for IN, IN_PRE + source setup.sh + # fetch the dataset + setup_dataset $1 > /dev/null + cd .. +} +######################### +# The command line help # +######################### +usage() { + echo "Usage: `basename $0` [option...] -- shell script to build PaSh" + echo + echo " -h, --help Show this help message" + echo " -o, --opt-agg Install g++-10 and switch to it as main compiler. Build the optimized c++ aggregators (run with sudo)" + echo " -s, --show-deps Show all the required dependencies (does not setup/deploy PaSh nor its dependencies)" + echo " -e, --install-eval Install all the dependencies needed for reproducing the evaluation figures (uses sudo, only for Ubuntu/Debian currently)" + echo + exit 1 +} + +########################################## +# Install all the required libraries and # +# dependencies for PaSh evaluation # +########################################## +install_eval_deps() { + echo "Installing evaluation dependencies (needs sudo)" + # needed for majority of the benchmarks (not available in docker instances) + sudo apt-get install unzip + paths="$(find $PASH_TOP/evaluation/benchmarks -name install-deps.sh)" + for f in $(echo $paths); do + path=$(dirname $(readlink -f $f)) + cd $path + bash install-deps.sh + cd - > /dev/null + done + echo "Generating PDF plots of the evaluation results is optional and requires R-packages" + echo "Follow Installation Guide from: $PASH_TOP/evaluation/eval_script/README.md" +} + +########################################## +# parse and read the command line args # +########################################## +read_cmd_args() { + # Transform long options to short ones + for arg in "$@"; do + shift + case "$arg" in + "--opt-agg") set -- "$@" "-o" ;; + "--show-deps") set -- "$@" "-s" ;; + "--install-eval") set -- "$@" "-e" ;; + "--help") set -- "$@" "-h" ;; + *) set -- "$@" "$arg" + esac + done + + while getopts 'opsreh' opt; do + case $opt in + # passthrough the variable to the Makefile for libdash + o) export optimized_agg_flag=1 ;; + s) export show_deps=1 ;; + r) export show_eval_deps=1 ;; + e) export install_eval=1 ;; + h) usage >&2 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac + done +} diff --git a/test/failing/wc.2.sh b/test/failing/wc.2.sh new file mode 100755 index 0000000..deb0db2 --- /dev/null +++ b/test/failing/wc.2.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Part of a distributed-`wc` wrapper, merging two `wc` results +# FIXME needs correct padding + +paste -d '+' + <(cat "$1" | + wc | + tr -s ' ' '\n' | + tail -n +2) + <(cat "$2" | + wc | + tr -s ' ' '\n' | + tail -n +2) | + bc | + tr -s '\n' ' ' | + sed 's/^/ /' | + sed 's/$/\ /' diff --git a/test/failing/web-log-stats.sh b/test/failing/web-log-stats.sh new file mode 100755 index 0000000..a746610 --- /dev/null +++ b/test/failing/web-log-stats.sh @@ -0,0 +1,147 @@ +#!/bin/sh +# Automatically generated file +# Source file example/web-log-stats.sh +#!/usr/bin/env sgsh -s /bin/bash +# +# SYNOPSIS Web log statistics +# DESCRIPTION +# Provides continuous statistics over web log stream data. +# Demonstrates stream processing. +# Provide as an argument either the name of a growing web log file +# or -s and a static web log file, which will be processed at a rate +# of about 10 lines per second. +# +# Copyright 2013 Diomidis Spinellis +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Size of the window to report in seconds +WINDOW=10 +WINDOW_OLD=$(expr $WINDOW \* 2) + +# Update interval in seconds +UPDATE=2 + +# Print the sum of the numbers read from the standard input +sum() +{ + awk '{ sum += $1 } END {print sum}' +} + +# Print the rate of change as a percentage +# between the first (old) and second (new) value +change() +{ + # Can't use bc, because we have numbers in scientific notation + awk "END {OFMT=\"%.2f%%\"; print ($2 - $1) * 100 / $1}" &2 + + # Stop key-value stores + + # Kill processes we have launched in the background + kill $SGPID 2>/dev/null + + # Remove temporary directory + rm -rf "$SGDIR" + + # Propagate real signals and exit with non-0 + if [ $SIGNAL != EXIT ] + then + trap - $SIGNAL EXIT + kill -s $SIGNAL $$ + fi + + # Exit with the original exit value + exit + + } + + for sig in HUP INT QUIT TERM EXIT + do + trap "cleanup $sig" $sig + done + + mkdir $SGDIR + cat <&3 3<&- >$SGDIR/npi-0.0.0 +ln $SGDIR/npi-0.0.0 $SGDIR/npi-0.1.0 +page=$( { awk -Winteractive '{print $7}' +} <$SGDIR/npi-0.0.0 ) + { awk -Winteractive '{print $10}' +} <$SGDIR/npi-0.1.0 >$SGDIR/npi-1.0.0 +ln $SGDIR/npi-1.0.0 $SGDIR/npi-1.1.0 +ln $SGDIR/npi-1.0.0 $SGDIR/npi-1.2.0 +ln $SGDIR/npi-1.0.0 $SGDIR/npi-1.3.0 +total_bytes=$( { awk -Winteractive '{ s += $1; print s}' +} <$SGDIR/npi-1.0.0 ) +total_pages=$( { awk -Winteractive '{print ++n}' +} <$SGDIR/npi-1.1.0 ) +bytes=$( { +} <$SGDIR/npi-1.2.0 ) +bytes_old=$( { +} <$SGDIR/npi-1.3.0 ) + +# Gather the results + # Produce periodic reports + while : + do + WINDOW_PAGES=$(echo ${bytes} -c | wc -l) + WINDOW_BYTES=$(echo ${bytes} -c | sum ) + WINDOW_PAGES_OLD=$(echo ${bytes_old} -c | wc -l) + WINDOW_BYTES_OLD=$(echo ${bytes_old} -c | sum) + clear + cat </dev/null) || call-with-active-ec2 "$@" diff --git a/test/pash_tests/1.sh b/test/pash_tests/1.sh new file mode 100755 index 0000000..5cfe0b9 --- /dev/null +++ b/test/pash_tests/1.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.0: extract the last name +cat $IN1 | cut -d ' ' -f 2 + diff --git a/test/pash_tests/10.sh b/test/pash_tests/10.sh new file mode 100755 index 0000000..30d1f6c --- /dev/null +++ b/test/pash_tests/10.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.4: histogram of Belle's captures (-pawns) by each type of piece +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep '[KQRBN]' | cut -c 1-1 | sort | uniq -c | sort -nr + diff --git a/test/pash_tests/11.sh b/test/pash_tests/11.sh new file mode 100755 index 0000000..46954d8 --- /dev/null +++ b/test/pash_tests/11.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.5: 4.4 + pawns +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort | uniq -c | sort -nr + diff --git a/test/pash_tests/12.sh b/test/pash_tests/12.sh new file mode 100755 index 0000000..8bbb75d --- /dev/null +++ b/test/pash_tests/12.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.6: piece used the most by Belle +cat $IN4 | tr ' ' '\n' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort -r | uniq | head -n 3 | tail -n 1 + diff --git a/test/pash_tests/13.sh b/test/pash_tests/13.sh new file mode 100755 index 0000000..6ba69f7 --- /dev/null +++ b/test/pash_tests/13.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN5=$IN_PRE/5.txt +# 5.1: extract hello world +cat $IN5 | grep 'print' | cut -d "\"" -f 2 | cut -c 1-12 + diff --git a/test/pash_tests/14.sh b/test/pash_tests/14.sh new file mode 100755 index 0000000..b7b54a8 --- /dev/null +++ b/test/pash_tests/14.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN6=$IN_PRE/6.txt +# 6.1: order the bodies by how easy it would be to land on them in Thompson's Space Travel game when playing at the highest simulation scale +cat $IN6 | awk "{print \$2, \$0}" | sort -nr | cut -d ' ' -f 2 + diff --git a/test/pash_tests/15.sh b/test/pash_tests/15.sh new file mode 100755 index 0000000..b23c044 --- /dev/null +++ b/test/pash_tests/15.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN7=$IN_PRE/7.txt +# 7.1: identify number of AT&T unix versions +cat $IN7 | cut -f 1 | grep 'AT&T' | wc -l + diff --git a/test/pash_tests/16.sh b/test/pash_tests/16.sh new file mode 100755 index 0000000..bbcebc2 --- /dev/null +++ b/test/pash_tests/16.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN7=$IN_PRE/7.txt +# 7.2: find most frequently occurring machine +cat $IN7 | cut -f 2 | sort -n | uniq -c | sort -nr | head -n 1 | tr -s ' ' '\n' | tail -n 1 + diff --git a/test/pash_tests/17.sh b/test/pash_tests/17.sh new file mode 100755 index 0000000..289baff --- /dev/null +++ b/test/pash_tests/17.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN7=$IN_PRE/7.txt +# 7.3: all the decades in which a unix version was released +cat $IN7 | cut -f 4 | sort -n | cut -c 3-3 | uniq | sed s/\$/'0s'/ + diff --git a/test/pash_tests/18.sh b/test/pash_tests/18.sh new file mode 100755 index 0000000..260ef13 --- /dev/null +++ b/test/pash_tests/18.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.1: count unix birth-year +cat $IN8 | tr ' ' '\n' | grep 1969 | wc -l + diff --git a/test/pash_tests/19.sh b/test/pash_tests/19.sh new file mode 100755 index 0000000..f36dafe --- /dev/null +++ b/test/pash_tests/19.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.2: find Bell Labs location where Dennis Ritchie had his office +cat $IN8 | grep 'Bell' | awk 'length <= 45' | cut -d ',' -f 2 | awk "{\$1=\$1};1" + diff --git a/test/pash_tests/1_1.sh b/test/pash_tests/1_1.sh new file mode 100755 index 0000000..a4eadbe --- /dev/null +++ b/test/pash_tests/1_1.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# tag: count_words + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/1_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/2.sh b/test/pash_tests/2.sh new file mode 100755 index 0000000..2f95466 --- /dev/null +++ b/test/pash_tests/2.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.1: extract names and sort +cat $IN1 | cut -d ' ' -f 2 | sort + diff --git a/test/pash_tests/2.unrtf.sh b/test/pash_tests/2.unrtf.sh new file mode 100755 index 0000000..ae19227 --- /dev/null +++ b/test/pash_tests/2.unrtf.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +#tag: rtf-to-txt +set -e +IN=${RTF:-$PASH_TOP/evaluation/benchmarks/aliases/input/rtf} +OUT=${OUT:-PASH_TOP/evaluation/benchmarks/aliases/input/out} +find $IN -name '*.rtf' | xargs -I {} unrtf {} --text > /dev/null diff --git a/test/pash_tests/20.sh b/test/pash_tests/20.sh new file mode 100755 index 0000000..50cf615 --- /dev/null +++ b/test/pash_tests/20.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.3: find names of the four people most involved with unix +cat $IN8 | grep '(' | cut -d '(' -f 2 | cut -d ')' -f 1 | head -n 1 + diff --git a/test/pash_tests/21.sh b/test/pash_tests/21.sh new file mode 100755 index 0000000..c0fc2c5 --- /dev/null +++ b/test/pash_tests/21.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.4: find longest words without hyphens +cat $IN8 | tr -c "[a-z][A-Z]" '\n' | sort | awk "length >= 16" + diff --git a/test/pash_tests/22.sh b/test/pash_tests/22.sh new file mode 100755 index 0000000..6a93fa9 --- /dev/null +++ b/test/pash_tests/22.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# # 8.5: Find second-most-freq 8-character word(s) without hyphens +# cat $IN8 > /dev/null + diff --git a/test/pash_tests/23.sh b/test/pash_tests/23.sh new file mode 100755 index 0000000..d23500d --- /dev/null +++ b/test/pash_tests/23.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN91=$IN_PRE/9.1.txt +# 9.1: extract the word PORT +cat $IN91 | tr ' ' '\n' | grep '[A-Z]' | tr '[a-z]' '\n' | grep '[A-Z]' | tr -d '\n' | cut -c 1-4 + diff --git a/test/pash_tests/24.sh b/test/pash_tests/24.sh new file mode 100755 index 0000000..94d8229 --- /dev/null +++ b/test/pash_tests/24.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN92=$IN_PRE/9.2.txt +# 9.2: extract the word BELL +cat $IN92 | cut -c 1-1 | tr -d '\n' + diff --git a/test/pash_tests/25.sh b/test/pash_tests/25.sh new file mode 100755 index 0000000..4da223c --- /dev/null +++ b/test/pash_tests/25.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN93=$IN_PRE/9.3.txt +# 9.3: animal that used to decorate the Unix room +cat $IN93 | cut -c 1-2 | tr -d '\n' + diff --git a/test/pash_tests/26.sh b/test/pash_tests/26.sh new file mode 100755 index 0000000..dd3aff0 --- /dev/null +++ b/test/pash_tests/26.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN94=$IN_PRE/9.4.txt +# 9.4: four corners with E centered, for an "X" configuration +cat $IN94 | tr ' ' '\n' | grep "\"" | sed 4d | cut -d "\"" -f 2 | tr -d '\n' + diff --git a/test/pash_tests/27.sh b/test/pash_tests/27.sh new file mode 100755 index 0000000..99a34c6 --- /dev/null +++ b/test/pash_tests/27.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN95=$IN_PRE/9.5.txt +# # 9.5: backwards running clock, in a backwards poem +# cat $IN95 > /dev/null + diff --git a/test/pash_tests/28.sh b/test/pash_tests/28.sh new file mode 100755 index 0000000..89798a7 --- /dev/null +++ b/test/pash_tests/28.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN96=$IN_PRE/9.6.txt +# 9.6: Follow the directions for grep +cat $IN96 | tr ' ' '\n' | grep '[A-Z]' | sed 1d | sed 3d | sed 3d | tr '[a-z]' '\n' | grep '[A-Z]' | sed 3d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/29.sh b/test/pash_tests/29.sh new file mode 100755 index 0000000..2ecf13a --- /dev/null +++ b/test/pash_tests/29.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN97=$IN_PRE/9.7.txt +# 9.7: Four corners +cat $IN97 | sed 2d | sed 2d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/2_1.sh b/test/pash_tests/2_1.sh new file mode 100755 index 0000000..1c0f399 --- /dev/null +++ b/test/pash_tests/2_1.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# tag: merge_upper +# set -e + +# Merge upper and lower counts +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/2_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr '[a-z]' '[A-Z]' | tr -sc '[A-Z]' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/2_2.sh b/test/pash_tests/2_2.sh new file mode 100755 index 0000000..95cf055 --- /dev/null +++ b/test/pash_tests/2_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: count_vowel_seq +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/2_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr 'a-z' '[A-Z]' | tr -sc 'AEIOU' '[\012*]'| sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/3.sh b/test/pash_tests/3.sh new file mode 100755 index 0000000..1c53bca --- /dev/null +++ b/test/pash_tests/3.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.2: extract names and sort +cat $IN1 | head -n 2 | cut -d ' ' -f 2 + diff --git a/test/pash_tests/30.sh b/test/pash_tests/30.sh new file mode 100755 index 0000000..c6f6ccf --- /dev/null +++ b/test/pash_tests/30.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN98=$IN_PRE/9.8.txt +# 9.8: TELE-communications +cat $IN98 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 2d | sed 3d | sed 4d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/31.sh b/test/pash_tests/31.sh new file mode 100755 index 0000000..a564879 --- /dev/null +++ b/test/pash_tests/31.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN99=$IN_PRE/9.9.txt +# 9.9: +cat $IN99 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 1d | sed 2d | sed 3d | sed 5d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/32.sh b/test/pash_tests/32.sh new file mode 100755 index 0000000..dd041df --- /dev/null +++ b/test/pash_tests/32.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN10=$IN_PRE/10.txt +# 10.1: count Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 | wc -l + diff --git a/test/pash_tests/33.sh b/test/pash_tests/33.sh new file mode 100755 index 0000000..07f0fe9 --- /dev/null +++ b/test/pash_tests/33.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN10=$IN_PRE/10.txt +# 10.2: list Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 + diff --git a/test/pash_tests/34.sh b/test/pash_tests/34.sh new file mode 100755 index 0000000..55067fc --- /dev/null +++ b/test/pash_tests/34.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN10=$IN_PRE/10.txt +# 10.3: extract Ritchie's username +cat $IN10 | grep 'Bell' | cut -f 2 | head -n 1 | fmt -w1 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + diff --git a/test/pash_tests/35.sh b/test/pash_tests/35.sh new file mode 100755 index 0000000..421267a --- /dev/null +++ b/test/pash_tests/35.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN11=$IN_PRE/11.txt +# 11.1: year Ritchie and Thompson receive the Hamming medal +cat $IN11 | grep 'UNIX' | cut -f 1 + diff --git a/test/pash_tests/36.sh b/test/pash_tests/36.sh new file mode 100755 index 0000000..cdc3fa8 --- /dev/null +++ b/test/pash_tests/36.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN11=$IN_PRE/11.txt +# 11.2: most repeated first name in the list? +cat $IN11 | cut -f 2 | cut -d ' ' -f 1 | sort | uniq -c | sort -nr | head -n 1 | fmt -w1 | sed 1d + diff --git a/test/pash_tests/3_1.sh b/test/pash_tests/3_1.sh new file mode 100755 index 0000000..9d32b82 --- /dev/null +++ b/test/pash_tests/3_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: sort +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | sort -nr > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/3_2.sh b/test/pash_tests/3_2.sh new file mode 100755 index 0000000..0ce3011 --- /dev/null +++ b/test/pash_tests/3_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: sort_words_by_folding +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | sort -f > ${OUT}/${input} +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/3_3.sh b/test/pash_tests/3_3.sh new file mode 100755 index 0000000..f24f2d9 --- /dev/null +++ b/test/pash_tests/3_3.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: sort_words_by_rhyming.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | rev | sort | rev > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/4.gitkernel.sh b/test/pash_tests/4.gitkernel.sh new file mode 100755 index 0000000..89cb678 --- /dev/null +++ b/test/pash_tests/4.gitkernel.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# First command is almost always a generator +set -e +IN=${GIT:-$PASH_TOP/evaluation/benchmarks/aliases/input/linux} + +#FIXME define a complex expression +COMPLEX="" +# linux git +cd ${IN}/linux +git ls-tree --name-only -z -r HEAD | grep -z -Z -E '\.(cc|h|cpp|hpp|c|txt|java)$' | xargs -0 -n1 git blame --line-porcelain | grep ${COMPLEX} + diff --git a/test/pash_tests/4.sh b/test/pash_tests/4.sh new file mode 100755 index 0000000..da460c0 --- /dev/null +++ b/test/pash_tests/4.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.3: sort top first names +cat $IN1 | cut -d ' ' -f 1 | sort | uniq -c | sort -r + diff --git a/test/pash_tests/4_3.sh b/test/pash_tests/4_3.sh new file mode 100755 index 0000000..0ea61f2 --- /dev/null +++ b/test/pash_tests/4_3.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# tag: bigrams.sh +# set -e + +# Bigrams (contrary to our version, this uses intermediary files) +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/4_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.input.words + tail +2 ${OUT}/${input}.input.words > ${OUT}/${input}.input.nextwords + paste ${OUT}/${input}.input.words ${OUT}/${input}.input.nextwords | sort | uniq -c > ${OUT}/${input}.input.bigrams +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/4_3b.sh b/test/pash_tests/4_3b.sh new file mode 100755 index 0000000..36fcbdf --- /dev/null +++ b/test/pash_tests/4_3b.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#tag: count_trigrams.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/4_3b/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.words + tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords + tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords2 + paste ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 | + sort | uniq -c +} +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.trigrams +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/5.apachelog.sh b/test/pash_tests/5.apachelog.sh new file mode 100755 index 0000000..64247e6 --- /dev/null +++ b/test/pash_tests/5.apachelog.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# fetch hit count for each ip ? +set -e +IN=${IN:-$PASH_TOP/evaluation/benchmarks/aliases/input/} + +# 405 original +# cat ${IN}/apache.log | grep "\->" | grep -o "from [^ ]*" | cut -d ' ' -f2 | sort | uniq -c | sort -nr | less +# FIXME need apache error logs .. +cat ${IN}apache.log | grep -o "from [^ ]*" | cut -d ' ' -f2 | sort | uniq -c | sort -nr diff --git a/test/pash_tests/5.sh b/test/pash_tests/5.sh new file mode 100755 index 0000000..015f384 --- /dev/null +++ b/test/pash_tests/5.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN2=$IN_PRE/2.txt +# 2.1: get all Unix utilities +cat $IN2 | cut -d ' ' -f 4 | tr -d ',' + diff --git a/test/pash_tests/6.msg.sh b/test/pash_tests/6.msg.sh new file mode 100755 index 0000000..36b15a5 --- /dev/null +++ b/test/pash_tests/6.msg.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# First command is almost always a generator +set -e +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/aliases/input/out} + + +# grep -iv ': starting\|kernel: .*: Power Button\|watching system buttons\|Stopped Cleaning Up\|Started Crash recovery kernel' /var/log/messages /var/log/syslog /var/log/* 2> /dev/null | grep -iw 'recover[a-z]*\|power[a-z]*\|shut[a-z ]*down\|rsyslogd\|ups' > /tmp/__shutdown.log && echo 'File written to /tmp__shutdown.log' +# doesn't do much :/ +grep -iv ': starting\|kernel: .*: Power Button\|watching system buttons\|Stopped Cleaning Up\|Started Crash recovery kernel' /var/log/messages /var/log/syslog /var/log/* 2> /dev/null | + grep --regex 'recover[a-z]*\|power[a-z]*\|shut[a-z ]*down\|rsyslogd\|ups' > ${OUT}/shutdown.log diff --git a/test/pash_tests/6.sh b/test/pash_tests/6.sh new file mode 100755 index 0000000..9e2ba9d --- /dev/null +++ b/test/pash_tests/6.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN3=$IN_PRE/3.txt +# 3.1: get lowercase first letter of last names (awk) +cat $IN3 | cut -d ' ' -f 2 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + diff --git a/test/pash_tests/6_1.sh b/test/pash_tests/6_1.sh new file mode 100755 index 0000000..1ea70e4 --- /dev/null +++ b/test/pash_tests/6_1.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# tag: trigram_rec +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +trigrams() { + input=$1 + tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.words + tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords + tail +3 ${OUT}/${input}.words > ${OUT}/${input}.nextwords2 + paste ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 | sort | uniq -c + rm -f ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 +} +export -f trigrams + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN"/"$input | grep 'the land of' | trigrams $IN/${input} | sort -nr | sed 5q > ${OUT}/${input}.out0 + cat $IN"/"$input | grep 'And he said' | trigrams $IN/${input} | sort -nr | sed 5q > ${OUT}/${input}.out1 +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_1_1.sh b/test/pash_tests/6_1_1.sh new file mode 100755 index 0000000..d62bec1 --- /dev/null +++ b/test/pash_tests/6_1_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: uppercase_by_token +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -c '^[A-Z]' > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/6_1_2.sh b/test/pash_tests/6_1_2.sh new file mode 100755 index 0000000..2e60990 --- /dev/null +++ b/test/pash_tests/6_1_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: uppercase_by_type +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u | grep -c '^[A-Z]' > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/6_2.sh b/test/pash_tests/6_2.sh new file mode 100755 index 0000000..09f145d --- /dev/null +++ b/test/pash_tests/6_2.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# tag: four-letter words +# set -e + +# the original script has both versions +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -c '^....$' > ${OUT}/${input}.out0 + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u | grep -c '^....$' > ${OUT}/${input}.out1 +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_3.sh b/test/pash_tests/6_3.sh new file mode 100755 index 0000000..4430419 --- /dev/null +++ b/test/pash_tests/6_3.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: words_no_vowels +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -vi '[aeiou]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_4.sh b/test/pash_tests/6_4.sh new file mode 100755 index 0000000..bfd38a3 --- /dev/null +++ b/test/pash_tests/6_4.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: 1-syllable words +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_4/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat ${IN}/${input} | tr -sc '[A-Z][a-z]' '[\012*]' | grep -i '^[^aeiou]*[aeiou][^aeiou]*$' | sort | uniq -c | sed 5q > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_5.sh b/test/pash_tests/6_5.sh new file mode 100755 index 0000000..d4f8a6f --- /dev/null +++ b/test/pash_tests/6_5.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: 2-syllable words +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_5/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' ' [\012*]' | grep -i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$' | sort | uniq -c | sed 5q > ${OUT}${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_7.sh b/test/pash_tests/6_7.sh new file mode 100755 index 0000000..fa93b9a --- /dev/null +++ b/test/pash_tests/6_7.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# tag: verse_2om_3om_2instances +# set -e +# verses with 2 or more, 3 or more, exactly 2 instances of light. + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_7/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | grep -c 'light.\*light' > ${OUT}/${input}.out0 + cat $IN/$input | grep -c 'light.\*light.\*light' > ${OUT}/${input}.out1 + cat $IN/$input | grep 'light.\*light' | grep -vc 'light.\*light.\*light' > ${OUT}/${input}.out2 +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/7.nginx.sh b/test/pash_tests/7.nginx.sh new file mode 100755 index 0000000..7711fd6 --- /dev/null +++ b/test/pash_tests/7.nginx.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# fetch hit count for each ip ? +set -e +IN=${IN:-${PASH_TOP}/evaluation/benchmarks/aliases/input} +# original command tail -10000 /var/log/nginx/access.log | cut -d "" "" -f1 | sort | uniq -c | sort -n | tail -n 30 | sort -nrk 1 | awk +cat ${IN}/access.log | cut -d ' ' -f1 | sort | uniq -c | sort -n | tail -n 30 | sort -nrk 1 diff --git a/test/pash_tests/7.sh b/test/pash_tests/7.sh new file mode 100755 index 0000000..7ad1b97 --- /dev/null +++ b/test/pash_tests/7.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.1: find number of rounds +cat $IN4 | tr ' ' '\n' | grep '\.' | wc -l + diff --git a/test/pash_tests/7_1.sh b/test/pash_tests/7_1.sh new file mode 100755 index 0000000..147eac6 --- /dev/null +++ b/test/pash_tests/7_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: count_morphs +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/7_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | sed 's/ly$/-ly/g' | sed 's/ .*//g' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm ${OUT} diff --git a/test/pash_tests/7_2.sh b/test/pash_tests/7_2.sh new file mode 100755 index 0000000..a0baff3 --- /dev/null +++ b/test/pash_tests/7_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# set -e +# tag: count_consonant_sequences + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/7_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr '[a-z]' '[A-Z]' | tr -sc 'BCDFGHJKLMNPQRSTVWXYZ' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/8.2_1.sh b/test/pash_tests/8.2_1.sh new file mode 100755 index 0000000..b9519c7 --- /dev/null +++ b/test/pash_tests/8.2_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: vowel_sequences_gr_1K.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.2_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | tr -sc 'AEIOUaeiou' '[\012*]' | sort | uniq -c | awk "\$1 >= 1000" > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.2_2.sh b/test/pash_tests/8.2_2.sh new file mode 100755 index 0000000..169a0fb --- /dev/null +++ b/test/pash_tests/8.2_2.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# tag: bigrams_appear_twice.sh +# set -e + +# Calculate the bigrams (based on 4_3.sh script) +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.2_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + input=$1 + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.input.words + tail +2 ${OUT}/${input}.input.words > ${OUT}/${input}.input.nextwords + paste ${OUT}/${input}.input.words ${OUT}/${input}.input.nextwords | sort | uniq -c > ${OUT}/${input}.input.bigrams + awk "\$1 == 2 {print \$2, \$3}" ${OUT}/${input}.input.bigrams +} + +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.3_2.sh b/test/pash_tests/8.3_2.sh new file mode 100755 index 0000000..3450bd7 --- /dev/null +++ b/test/pash_tests/8.3_2.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# tag: find_anagrams.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.3_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + input=$1 + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}.types + rev < ${OUT}/${input}.types > ${OUT}/${input}.types.rev + sort ${OUT}/${input}.types ${OUT}/${input}.types.rev | uniq -c | awk "\$1 >= 2 {print \$2}" +} + +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.3_3.sh b/test/pash_tests/8.3_3.sh new file mode 100755 index 0000000..f774d83 --- /dev/null +++ b/test/pash_tests/8.3_3.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# tag: compare_exodus_genesis.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +INPUT2=${INPUT2:-$PASH_TOP/evaluation/benchmarks/nlp/input/exodus} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.3_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p $OUT + +run_tests() { + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}1.types + tr -sc '[A-Z][a-z]' '[\012*]' < ${INPUT2} | sort -u > ${OUT}/${input}2.types + sort $OUT/${input}1.types ${OUT}/${input}2.types ${OUT}/${input}2.types | uniq -c | head + +} +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.sh b/test/pash_tests/8.sh new file mode 100755 index 0000000..9dde970 --- /dev/null +++ b/test/pash_tests/8.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.2: find pieces captured by Belle +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l + diff --git a/test/pash_tests/8.varlog.sh b/test/pash_tests/8.varlog.sh new file mode 100755 index 0000000..4b808f3 --- /dev/null +++ b/test/pash_tests/8.varlog.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +# 1308; or line above, w/ -vE +# Doesn't do much +find /var/log -type f -exec file {} \; | grep 'text' | cut -d' ' -f1 | sed -e's/:$//g' | grep -v '[0-9]$' | xargs tail diff --git a/test/pash_tests/8_1.sh b/test/pash_tests/8_1.sh new file mode 100755 index 0000000..518e5ed --- /dev/null +++ b/test/pash_tests/8_1.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# tag: sort_words_by_num_of_syllables +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}.words + tr -sc '[AEIOUaeiou\012]' ' ' < ${OUT}/${input}.words | awk '{print NF}' > ${OUT}/${input}.syl + paste ${OUT}/${input}.syl ${OUT}/${input}.words | sort -nr | sed 5q +} +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/9.sh b/test/pash_tests/9.sh new file mode 100755 index 0000000..d6d1f07 --- /dev/null +++ b/test/pash_tests/9.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.3: find pieces captured by Belle with a pawn +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep -v '[KQRBN]' | wc -l + diff --git a/test/pash_tests/add.sh b/test/pash_tests/add.sh new file mode 100755 index 0000000..d0ecaa4 --- /dev/null +++ b/test/pash_tests/add.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +paste -d+ "$@" | bc diff --git a/test/pash_tests/alt_bigrams.sh b/test/pash_tests/alt_bigrams.sh new file mode 100644 index 0000000..121abe8 --- /dev/null +++ b/test/pash_tests/alt_bigrams.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Find all 2-grams in a piece of text +# FIXME: does not calculate frequencies + +cat $IN | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + alt_bigrams_aux + diff --git a/test/pash_tests/alt_bigrams_env_test.sh b/test/pash_tests/alt_bigrams_env_test.sh new file mode 100644 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/alt_bigrams_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/alt_bigrams_funs.sh b/test/pash_tests/alt_bigrams_funs.sh new file mode 100644 index 0000000..5e9bbed --- /dev/null +++ b/test/pash_tests/alt_bigrams_funs.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +alt_bigrams_aux() +{ + s2=$(mktemp -u) + ( mkfifo $s2 > /dev/null ) ; + + tee $s2 | + tail -n +2 | + paste $s2 - | + sed '$d' | + sort | + uniq + rm $s2 +} + +alt_bigram_aux_reduce() +{ + IN1=$1 + IN2=$2 + + sort -m $IN1 $IN2 | + uniq +} + +export -f alt_bigrams_aux +export -f alt_bigram_aux_reduce diff --git a/test/pash_tests/ann-agg-2.sh b/test/pash_tests/ann-agg-2.sh new file mode 100755 index 0000000..94a604a --- /dev/null +++ b/test/pash_tests/ann-agg-2.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +## Test contains command aliases with annotations that point to custom aggregators + +FILE="$PASH_TOP/evaluation/tests/input/ab.txt" + +test_uniq_1() { + uniq +} + +test_uniq_2() { + uniq -c +} + +cat $FILE | sort | test_uniq_1 | tr 'a' 'b' | test_uniq_2 + diff --git a/test/pash_tests/ann-agg.sh b/test/pash_tests/ann-agg.sh new file mode 100644 index 0000000..b8030ba --- /dev/null +++ b/test/pash_tests/ann-agg.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +## Test contains command aliases with annotations that point to custom aggregators + +FILE="$PASH_TOP/evaluation/tests/input/1M.txt" + +test_one() { + cat +} + +test_two() { + cat +} + +cat $FILE | test_one | test_two diff --git a/test/pash_tests/archive.sh b/test/pash_tests/archive.sh new file mode 100644 index 0000000..dbfe627 --- /dev/null +++ b/test/pash_tests/archive.sh @@ -0,0 +1,6 @@ +timestamp=`date +"%Y%m%d%H%M%S"` + +tar cf cdash.tar \ + *.c *.h *.sh *.py Makefile + +cp -p cdash.tar "cdash-${timestamp}.tar" diff --git a/test/pash_tests/args_with_spaces.sh b/test/pash_tests/args_with_spaces.sh new file mode 100644 index 0000000..283fd07 --- /dev/null +++ b/test/pash_tests/args_with_spaces.sh @@ -0,0 +1,2 @@ +echo $1 +echo $2 diff --git a/test/pash_tests/autogen.sh b/test/pash_tests/autogen.sh new file mode 100755 index 0000000..bbc5667 --- /dev/null +++ b/test/pash_tests/autogen.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +libtoolize \ +&& aclocal \ +&& autoheader \ +&& automake --add-missing \ +&& autoconf diff --git a/test/pash_tests/bam_to_sam.sh b/test/pash_tests/bam_to_sam.sh new file mode 100644 index 0000000..fec09f6 --- /dev/null +++ b/test/pash_tests/bam_to_sam.sh @@ -0,0 +1,4 @@ +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/bam} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +find . -name "*.bam" | xargs -I {} samtools view -h -o ${OUTPUT} {} diff --git a/test/pash_tests/bell_grep.sh b/test/pash_tests/bell_grep.sh new file mode 100755 index 0000000..031370a --- /dev/null +++ b/test/pash_tests/bell_grep.sh @@ -0,0 +1,54 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +batchSize=10000000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +# mkfifo $file7 +# mkfifo $file8 +# mkfifo $file9 + + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + + +$PASH_TOP/runtime/dgsh-tee -I -i $file1 -o $file5 -b 10M & +$PASH_TOP/runtime/dgsh-tee -I -i $file2 -o $file6 -b 10M & + +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file5 > $file3 & +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file6 > $file4 & +# ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +$PASH_TOP/runtime/r_merge $file3 $file4 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out diff --git a/test/pash_tests/bi-gram.aux.sh b/test/pash_tests/bi-gram.aux.sh new file mode 100755 index 0000000..5f66058 --- /dev/null +++ b/test/pash_tests/bi-gram.aux.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Auxiliary functions for bi-grams + +bigrams_aux() +{ + s2=$(mktemp -u) + mkfifo $s2 + tee $s2 | + tail -n +2 | + paste $s2 - | + sed '$d' + rm $s2 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + ## New way of doing it using an intermediate file. This is slow + ## but doesn't deadlock + cat $IN > $temp + + sed '$d' $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail -n +2 | paste $aux3 - > $OUT & + + # ## Old way of doing it + # cat $IN | + # tee $s2 $aux1 $aux2 | + # tail -n +2 | + # paste $s2 - > $OUT & + + # ## The goal of this is to write the first line of $IN in the $AUX_HEAD + # ## stream and the last line of $IN in $AUX_TAIL + + # cat $aux1 | ( head -n 1 > $AUX_HEAD; $PASH_TOP/evaluation/tools/drain_stream.sh ) & + # # while IFS= read -r line + # # do + # # old_line=$line + # # done < $aux2 + # # echo "$old_line" > $AUX_TAIL + # ( tail -n 1 $aux2 > $AUX_TAIL; $PASH_TOP/evaluation/tools/drain_stream.sh ) & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + +export -f bigrams_aux +export -f bigram_aux_map +export -f bigram_aux_reduce diff --git a/test/pash_tests/bi-grams.sh b/test/pash_tests/bi-grams.sh new file mode 100755 index 0000000..c98f583 --- /dev/null +++ b/test/pash_tests/bi-grams.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Find all 2-grams in a piece of text + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +. bi-gram.aux.sh + +cat $IN | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq + + diff --git a/test/pash_tests/bigrams.sh b/test/pash_tests/bigrams.sh new file mode 100755 index 0000000..a3990e1 --- /dev/null +++ b/test/pash_tests/bigrams.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +bigrams_aux() +{ + s2=$(mktemp -u) + mkfifo $s2 + tee $s2 | + tail -n +2 | + paste $s2 - | + sed "\$d" + rm $s2 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + cat $IN > $temp + + sed "\$d" $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail -n +2 | paste $aux3 - > $OUT & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + +export -f bigrams_aux +export -f bigram_aux_map +export -f bigram_aux_reduce + +cat $IN | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq + + diff --git a/test/pash_tests/bigrams_aux_map.sh b/test/pash_tests/bigrams_aux_map.sh new file mode 100755 index 0000000..bead44f --- /dev/null +++ b/test/pash_tests/bigrams_aux_map.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +## By making tee | tail | paste its own function, we can implement it +## as a pure command separated into a generalized map and a +## reduce. Following the ParSynt work, a generalized map also keeps +## some auxiliary variables (in our case streams) to enable parallelization. + +## +## Map +## + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + + mkfifo "$s2" + mkfifo "$aux1" + mkfifo "$aux2" + cat "$IN" | + tee "$s2" "$aux1" "$aux2" | + tail +2 | + paste "$s2" - > "$OUT" & + + ## The goal of this is to write the first line of $IN in the $AUX_HEAD + ## stream and the last line of $IN in $AUX_TAIL + + ## TODO: I am not sure if using head/tail like this works or breaks + ## the pipes + cat "$aux1" | ( head -n 1 > "$AUX_HEAD"; dd of=/dev/null > /dev/null 2>&1 ) & + tail -n 1 "$aux2" > "$AUX_TAIL" & + + wait + + rm "$s2" + rm "$aux1" + rm "$aux2" +} + +## +## Reduce: +## + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo "$temp" + + cat "$AUX_HEAD1" > "$AUX_HEAD_OUT" & + cat "$AUX_TAIL2" > "$AUX_TAIL_OUT" & + paste "$AUX_TAIL1" "$AUX_HEAD2" > "$temp" & + cat "$IN1" "$temp" "$IN2" > "$OUT" & + + wait + + rm "$temp" +} + +##VTODO: Deplete the aux outputs of the last reduce diff --git a/test/pash_tests/bigrams_env_test.sh b/test/pash_tests/bigrams_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/bigrams_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/bio4.sh b/test/pash_tests/bio4.sh new file mode 100755 index 0000000..9b3413b --- /dev/null +++ b/test/pash_tests/bio4.sh @@ -0,0 +1,30 @@ +# create bam files with regions +################### 1KG SAMPLES +IN=${INPUT:-$PASH_TOP/evaluation/benchmarks/bio} +IN_NAME=${IN_N:-input_all.txt} +OUT=${OUTPUT:-$PASH_TOP/evaluation/benchmarks/bio/output} +cat ${IN}/${IN_NAME}|while read s_line; + do + sample=$(echo $s_line |cut -d " " -f 2); + pop=$(echo $s_line |cut -f 1 -d " "); + link=$(echo $s_line |cut -f 3 -d " "); + ### correcting labeling of chromosomes so that all are 1,2,3.. instead of chr1,chr2 or chromosome1 etc + echo 'Processing Sample '${IN}/input/$sample' '; + # uniform the chromosomes in the file due to inconsistencies + samtools view -H "${IN}/input/$sample".bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' \ + | samtools reheader - "${IN}/input/$sample".bam > "${OUT}/$sample"_corrected.bam ; + # create bai file + samtools index -b "${OUT}/$sample"_corrected.bam ; + ### Isolating each relevant chromosome based on Gen_locs + cut -f 2 ./Gene_locs.txt |sort |uniq |while read chr; + do + echo 'Isolating Chromosome '$chr' from sample '${OUT}/$sample', '; + samtools view -b "${OUT}/$sample"_corrected.bam chr"$chr" > "${OUT}/$pop"_"$sample"_"$chr".bam ; + echo 'Indexing Sample '$pop'_'${OUT}/$sample' '; + samtools index -b "${OUT}/$pop"_"$sample"_"$chr".bam; + #sleep 2 + done; + #rm "${OUT}/$sample"_corrected.bam; + #rm "${OUT}/$sample"_corrected.bam.bai; + #rm "${OUT}/$sample".bam +done; diff --git a/test/pash_tests/braces_amp.sh b/test/pash_tests/braces_amp.sh new file mode 100644 index 0000000..fc82664 --- /dev/null +++ b/test/pash_tests/braces_amp.sh @@ -0,0 +1,5 @@ +for x in foo; do a & b & c & d & done +echo a & echo b +for y in foo; do a & b & done +while false; do a & b & done +until true; do forever & ever & ever & done diff --git a/test/pash_tests/buggy_non_newline_input.sh b/test/pash_tests/buggy_non_newline_input.sh new file mode 100755 index 0000000..a97b6bc --- /dev/null +++ b/test/pash_tests/buggy_non_newline_input.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +## No newline before EOF bug +echo -n "popo" > /tmp/in +IN=/tmp/in + +cat $IN $IN | grep "popopopo" > /tmp/seq.out + +rm -f s1 s2 +mkfifo s1 s2 + +cat $IN | grep "popopopo" > s1 & +cat $IN | grep "popopopo" > s2 & +cat s1 s2 > /tmp/buggy.out + +rm -f s1 s2 + +diff /tmp/seq.out /tmp/buggy.out \ No newline at end of file diff --git a/test/pash_tests/call_distrib_planner_example.sh b/test/pash_tests/call_distrib_planner_example.sh new file mode 100644 index 0000000..694bc60 --- /dev/null +++ b/test/pash_tests/call_distrib_planner_example.sh @@ -0,0 +1 @@ +python3 distr_plan.py "/tmp/dish_temp_ir_file0" diff --git a/test/pash_tests/cat-redir-fail.sh b/test/pash_tests/cat-redir-fail.sh new file mode 100644 index 0000000..fc6116b --- /dev/null +++ b/test/pash_tests/cat-redir-fail.sh @@ -0,0 +1 @@ +cat < no.such.file diff --git a/test/pash_tests/cat_output_files.sh b/test/pash_tests/cat_output_files.sh new file mode 100755 index 0000000..506ab1a --- /dev/null +++ b/test/pash_tests/cat_output_files.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +distr_output_dir=$1 + +cat "$distr_output_dir"/* diff --git a/test/pash_tests/circus.sh b/test/pash_tests/circus.sh new file mode 100755 index 0000000..30eed07 --- /dev/null +++ b/test/pash_tests/circus.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# https://unix.stackexchange.com/questions/193441/how-can-i-implement-a-circular-flow-of-data-among-interconnected-commands + +echo 1 >file + +rm s1 +mkfifo s1 +tail -f file | + sed -u 's/^/1 + /' | + tee -a s1 > /dev/null & + +cat s1 | + xargs -0 -n 1 -d '\n' expr | + tee -a file diff --git a/test/pash_tests/clone_compress_repo.sh b/test/pash_tests/clone_compress_repo.sh new file mode 100755 index 0000000..ecbfa9d --- /dev/null +++ b/test/pash_tests/clone_compress_repo.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +OUTPUT=${1:="pash.tar.gz"} +BRANCH=${2:="master"} + +# TODO: Make the temp_repo_dir be variable and random named + +mkdir temp_repo_dir +cd temp_repo_dir +git clone --recursive git@github.com:andromeda/pash.git +cd pash +git checkout $BRANCH +cd ../ +tar -czf ../$OUTPUT pash +cd ../ +rm -rf temp_repo_dir diff --git a/test/pash_tests/cmd_sbst.sh b/test/pash_tests/cmd_sbst.sh new file mode 100644 index 0000000..e3d8234 --- /dev/null +++ b/test/pash_tests/cmd_sbst.sh @@ -0,0 +1,6 @@ +echo $(Testvar=set + unset Testvar + echo $Testvar${Testvar-sh_352.10}${Testvar+set} + ) +x=$(set one two three; echo sh_352.11 $1 $2 $3 $# $* "$@"); echo "$x" +x=$(set one "twoA twoB"; echo sh_352.12 $1 "$2" $3 $# $* "$@"); echo "$x" \ No newline at end of file diff --git a/test/pash_tests/cmd_sbst_subscript.sh b/test/pash_tests/cmd_sbst_subscript.sh new file mode 100644 index 0000000..f78ef72 --- /dev/null +++ b/test/pash_tests/cmd_sbst_subscript.sh @@ -0,0 +1,2 @@ +echo 'eval echo $?' +exit 123 \ No newline at end of file diff --git a/test/pash_tests/comm-par-test.sh b/test/pash_tests/comm-par-test.sh new file mode 100644 index 0000000..6ec878b --- /dev/null +++ b/test/pash_tests/comm-par-test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Tests the parallelization of comm with a configuration input and a stream input. +mkfifo s1 s2 + +cat $IN > s1 & +cat $IN | grep "king" > s2 & +comm -23 s1 s2 + +rm s1 s2 diff --git a/test/pash_tests/comm-par-test2.sh b/test/pash_tests/comm-par-test2.sh new file mode 100644 index 0000000..6481251 --- /dev/null +++ b/test/pash_tests/comm-par-test2.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Tests the parallelization of comm with a configuration input and a stream input. +mkfifo s1 s2 + +cat $IN > s1 & +cat $IN | grep "king" > s2 & +comm -23 - s2 < s1 + +rm s1 s2 diff --git a/test/pash_tests/comm-par-test2_env_test.sh b/test/pash_tests/comm-par-test2_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/comm-par-test2_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/comm-par-test_env_test.sh b/test/pash_tests/comm-par-test_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/comm-par-test_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/compile.sh b/test/pash_tests/compile.sh new file mode 100755 index 0000000..e53312a --- /dev/null +++ b/test/pash_tests/compile.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Find markdown files in the current directory tree, compile them to HTML, and +# serve them over the network + +# Requires: pandoc + +IN=./input/ +OUT=./output/out.txt + +find $IN -name '*.md' | # Parallelizable, given a distributed FS + xargs pandoc | # xargs is higher-order, pandoc is third-party + gzip > $OUT # Compress the result +# nc -l 80 # netcat could default-but-configurably parallelizable + + diff --git a/test/pash_tests/compress_files.sh b/test/pash_tests/compress_files.sh new file mode 100755 index 0000000..e3b178c --- /dev/null +++ b/test/pash_tests/compress_files.sh @@ -0,0 +1,6 @@ +# compress all the files in a directory using dd and tar +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/rtf} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +cd $INPUT +# get all rtf and compress them +find . -name "*.rtf" | xargs -P16 -I {} sh -c "dd if={} bs=1 status=none > '{}f'; tar -zcf {}.tar.gz {}f; rm {}f; mv {}.tar.gz $OUTPUT" sh {} diff --git a/test/pash_tests/concat.sh b/test/pash_tests/concat.sh new file mode 100755 index 0000000..3c0bf6d --- /dev/null +++ b/test/pash_tests/concat.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +cat "$@" diff --git a/test/pash_tests/convert_to_fast.sh b/test/pash_tests/convert_to_fast.sh new file mode 100644 index 0000000..e66a954 --- /dev/null +++ b/test/pash_tests/convert_to_fast.sh @@ -0,0 +1,6 @@ +# convert fastq to fasta format +# It recognizes the extension .fasta and it converts the input to fasta.gz format +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -o ${OUTPUT}/{}.fasta.gz {} diff --git a/test/pash_tests/count.sh b/test/pash_tests/count.sh new file mode 100755 index 0000000..d1d0ef4 --- /dev/null +++ b/test/pash_tests/count.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +awk '{ count[$2] += $1 } END { for(e in count) print count[e], e }' "$@" diff --git a/test/pash_tests/count_packets.sh b/test/pash_tests/count_packets.sh new file mode 100644 index 0000000..7cba598 --- /dev/null +++ b/test/pash_tests/count_packets.sh @@ -0,0 +1,3 @@ +# count the packet number in a pcap file +INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} +tcpdump -nn -r ${INPUT} | wc -l diff --git a/test/pash_tests/deadlock_test.sh b/test/pash_tests/deadlock_test.sh new file mode 100644 index 0000000..edbc09b --- /dev/null +++ b/test/pash_tests/deadlock_test.sh @@ -0,0 +1 @@ +cat $IN | tr A-Z a-z | head -n 1 diff --git a/test/pash_tests/deadlock_test_env_test.sh b/test/pash_tests/deadlock_test_env_test.sh new file mode 100644 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/deadlock_test_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/demo-spell.sh b/test/pash_tests/demo-spell.sh new file mode 100755 index 0000000..9883c82 --- /dev/null +++ b/test/pash_tests/demo-spell.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +cd "$(dirname $0)" + +[ -z $PASH_TOP ] && { + echo "PASH_TOP not set, maybe $(git rev-parse --show-toplevel)?" + exit +} +FILE="input/100M.txt" +DICT="input/sorted_words" + +cat "$FILE" | tr A-Z a-z | tr -cs A-Za-z '\n' | sort | uniq | comm -13 $DICT - diff --git a/test/pash_tests/dfs_split_reader.sh b/test/pash_tests/dfs_split_reader.sh new file mode 100755 index 0000000..339e948 --- /dev/null +++ b/test/pash_tests/dfs_split_reader.sh @@ -0,0 +1 @@ +"$PASH_TOP/runtime/dspash/file_reader/dfs_split_reader" --config "$@" diff --git a/test/pash_tests/dgsh-raw-sort.sh b/test/pash_tests/dgsh-raw-sort.sh new file mode 100755 index 0000000..4fd349c --- /dev/null +++ b/test/pash_tests/dgsh-raw-sort.sh @@ -0,0 +1,49 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file5 & +$PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file6 & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/dgsh-sort.sh b/test/pash_tests/dgsh-sort.sh new file mode 100755 index 0000000..984fb71 --- /dev/null +++ b/test/pash_tests/dgsh-sort.sh @@ -0,0 +1,52 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file3 -b 64K & +$PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file4 -b 64K & + +$PASH_TOP/runtime/r_unwrap < $file3 > $file5 & +$PASH_TOP/runtime/r_unwrap < $file4 > $file6 & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/dgsh-wc.sh b/test/pash_tests/dgsh-wc.sh new file mode 100755 index 0000000..dbfc6f5 --- /dev/null +++ b/test/pash_tests/dgsh-wc.sh @@ -0,0 +1,45 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +testFile=$PASH_TOP/evaluation/scripts/input/1G.txt +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file3 -b 10M & +$PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file4 -b 10M & +# $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & +# $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & + +wc $file3 > $file5 & +wc $file4 > $file6 & + +./merge-wc.sh $file5 $file6 + + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/diff.sh b/test/pash_tests/diff.sh new file mode 100644 index 0000000..27caa33 --- /dev/null +++ b/test/pash_tests/diff.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Compares two streams element by element +# Taken from https://crashingdaily.wordpress.com/2008/03/06/diff-two-stdout-streams/ +# shuf() { awk 'BEGIN {srand(); OFMT="%.17f"} {print rand(), $0}' "$@" | sort -k1,1n | cut -d ' ' -f2-; } + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +mkfifo s1 s2 + +cat $IN | + # shuf | + tr [:lower:] [:upper:] | + sort > s1 & + +cat $IN | + # shuf | + tr [:upper:] [:lower:] | + sort > s2 & + +diff -B s1 s2 +rm s1 s2 diff --git a/test/pash_tests/diff_env_test.sh b/test/pash_tests/diff_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/diff_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/distributed.sh b/test/pash_tests/distributed.sh new file mode 100755 index 0000000..a57ebe6 --- /dev/null +++ b/test/pash_tests/distributed.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Schematic for distributing computations +# To try server client: + +./client.js local 'ls' +./client.js local | jq . +./client.js local 'sleep 5; echo "yo"' +./client.js local | jq . +sleep 5 +./client.js local | jq . + +# The format is as follows: +# cat FIFOs > $OUT & +# nc -l -p STAR_RESULT_PORT > FIFO & +# ./client.js WORKER 'nc -l WORKER_DATA_PORT | PROGRAM | nc -C 158.130.4.212 STAR_RESULT_PORT' +# cat $IN | nc -N WORKER WORKER_DATA_PORT + + +cat fifo5555 fifo5556 > "$OUT" +nc -l -p 5555 > fifo5555 & +nc -l -p 5556 > fifo5556 & +# beta runs: `nc -l 5000 | grep -v "onetwo" | tr '[:lower:]' '[:upper:]' | nc -C 158.130.4.212 5555` +./client.js beta 'nc -l 5000 | grep -v "onetwo" | tr "[:lower:]" "[:upper:]" | nc -C 158.130.4.212 5555' +# gamma runs: `nc -l 5000 | grep -v "onetwo" | tr '[:lower:]' '[:upper:]' | nc -C 158.130.4.212 5555` +./client.js gamma 'nc -l 5000 | grep -v "onetwo" | tr "[:lower:]" "[:upper:]" | nc -C 158.130.4.212 5556' +cat "$IN" | nc -N beta.ndr.md 5000 +cat "$IN" | nc -N gamma.ndr.md 5000 + + +# Collect results +# Implement: `socat` can listen for multiple connections +nc -l -p 5555 > s1 & +nc -l -p 5556 > s2 & +nc -l -p 5557 > r3 & +nc -l -p 5558 > r4 & + +# Things are complicated by the fact that machines default to different +# versions of the BSD netcat (not sure why Debian and Ubuntu default to +# the BSD version of `nc`) +# -N stops after EOF + +# Receiver should run +nc -l -p 5000 | tr '[:lower:]' '[:upper:]' | nc "$DSTAR" 5555 + +# Distribute load +cat ./a/b | tr 'x' 'x' | nc "$B" 5000 +cat ./a/b | tr 'x' 'x' | nc "$C" 5000 +cat ./a/b | tr 'x' 'x' | nc "$D" 5000 diff --git a/test/pash_tests/distro-deps.sh b/test/pash_tests/distro-deps.sh new file mode 100755 index 0000000..65e1cb4 --- /dev/null +++ b/test/pash_tests/distro-deps.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +set -e +cd $(dirname $0) +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +. "$PASH_TOP/scripts/utils.sh" + +if [[ $(uname) == 'Darwin' ]]; then + echo 'Currently pash can run only on Linux' + exit 1 +fi + +read_cmd_args $@ +cd $PASH_TOP + +LOG_DIR=install_logs +mkdir -p $LOG_DIR + +# if we aren't running in docker, use sudo to install packages +if [ ! -f /.dockerenv ]; then + export SUDO="sudo" +fi + +if type lsb_release >/dev/null 2>&1 ; then + distro=$(lsb_release -i -s) +elif [ -e /etc/os-release ] ; then + distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) +fi + +# convert to lowercase +distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') +# compile the list of the shared required packages +pkgs="automake bc curl gcc git graphviz libtool m4 python sudo wget" +# now do different things depending on distro +case "$distro" in + ubuntu*) + pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config python3 python3-pip python3-setuptools python3-testresources wamerican-insane" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Running preparation apt install:" + echo "|-- running apt update..." + $SUDO apt-get update &> $LOG_DIR/apt_update.log + echo "|-- running apt install..." + $SUDO apt-get install -y $pkgs &>> $LOG_DIR/apt_install.log + if [[ "$optimized_agg_flag" == 1 ]]; then + echo "|-- installing g++-10..." + $SUDO apt-get install software-properties-common -y &> $LOG_DIR/apt_install.log + $SUDO add-apt-repository ppa:ubuntu-toolchain-r/test -y &> $LOG_DIR/apt_install.log + $SUDO apt-get install g++-10 -y &> $LOG_DIR/apt_install.log + $SUDO update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100 &> $LOG_DIR/apt_install.log + $SUDO update-alternatives --set g++ /usr/bin/g++-10 &> $LOG_DIR/apt_install.log + fi + ;; + debian*) + pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config procps python3 python3-pip python3-setuptools python3-testresources wamerican-insane" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Running preparation apt install:" + echo "|-- running apt update..." + $SUDO apt-get update &> $LOG_DIR/apt_update.log + echo "|-- running apt install..." + $SUDO apt-get install -y $pkgs &> $LOG_DIR/apt_install.log + ;; + fedora*) + pkgs="$pkgs autoconf diffutils gcc-c++ glibc-langpack-en hostname libjpeg-devel make nc pip procps python-devel python3-pip python3-setuptools python3-setuptools python3-testresources zlib-devel" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "|-- running dnf install...." + $SUDO dnf install -y $pkgs &> $LOG_DIR/dnf_install.log + ;; + arch*) + pkgs="$pkgs autoconf inetutils libffi make openbsd-netcat pkg-config python-pip" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Updating mirrors" + $SUDO pacman -Sy &> $LOG_DIR/pacman_update.log + echo "|-- running pacman install...." + yes | $SUDO pacman -S $pkgs &> $LOG_DIR/pacman_install.log + ;; + freebsd*) + pkgs="$pkgs autoconf gmake gsed libffi py38-pip" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Updating mirrors" + $SUDO pkg update &> $LOG_DIR/pkg_update.log + echo "|-- running pkg install...." + # TODO add python3-testresources dep + yes | $SUDO pkg install $pkgs + ;; + *) echo "unknown distro: '$distro'" ; exit 1 ;; +esac diff --git a/test/pash_tests/distrotest.sh b/test/pash_tests/distrotest.sh new file mode 100644 index 0000000..5f0440a --- /dev/null +++ b/test/pash_tests/distrotest.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# This script runs 'buildtest' on each of several distros +# via Docker. +set -o pipefail + +exec 3>&1 4>&2 + +[ -e "${SHELLCHECK_DIR}/ShellCheck.cabal" ] || die "ShellCheck.cabal not in this dir" + +# [ "$1" = "--run" ] || { +# cat << EOF +# This script pulls multiple distros via Docker and compiles +# ShellCheck and dependencies for each one. It takes hours, +# and is still highly experimental. + +# Make sure you're plugged in and have screen/tmux in place, +# then re-run with $0 --run to continue. + +# Also note that dist* will be deleted. +# EOF +# exit 0 +# } + +echo "Deleting 'dist' and 'dist-newstyle'..." +rm -rf dist dist-newstyle + +log=$(mktemp) || die "Can't create temp file" +date >> "$log" || die "Can't write to log" + +echo "Logging to $log" >&3 +## If I keep this on, the script output (together with Dish output) is +## redirected +# exec >> "$log" 2>&1 + +final=0 + +cat $IN | distrotest_loop + +# distrotest_loop << EOF +# # Docker tag Setup command +# debian:stable apt-get update && apt-get install -y cabal-install +# debian:testing apt-get update && apt-get install -y cabal-install +# ubuntu:latest apt-get update && apt-get install -y cabal-install +# haskell:latest true +# opensuse/leap:latest zypper install -y cabal-install ghc +# fedora:latest dnf install -y cabal-install ghc-template-haskell-devel findutils +# archlinux/base:latest pacman -S -y --noconfirm cabal-install ghc-static base-devel + +# # Other versions we want to support +# ubuntu:18.04 apt-get update && apt-get install -y cabal-install + +# # Misc Haskell including current and latest Stack build +# ubuntu:18.04 set -e; apt-get update && apt-get install -y curl && curl -sSL https://get.haskellstack.org/ | sh -s - -f && cd /mnt && exec test/stacktest +# EOF + +exit "$final" diff --git a/test/pash_tests/distrotest_env.sh b/test/pash_tests/distrotest_env.sh new file mode 100644 index 0000000..098b025 --- /dev/null +++ b/test/pash_tests/distrotest_env.sh @@ -0,0 +1,2 @@ +SHELLCHECK_DIR=/home/nikos/shellcheck/ +IN=../evaluation/usecases/shellcheck/temp_input.txt diff --git a/test/pash_tests/distrotest_funs.sh b/test/pash_tests/distrotest_funs.sh new file mode 100644 index 0000000..2849273 --- /dev/null +++ b/test/pash_tests/distrotest_funs.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +die() +{ + echo "$*" >&4; + exit 1; +} + +distrotest_loop() +{ + while read -r distro setup + do + [[ "$distro" = "#"* || -z "$distro" ]] && continue + + printf '%s ' "$distro" # >&3 + docker pull "$distro" || die "Can't pull $distro" + printf 'pulled. ' # >&3 + + tmp=$(mktemp -d) || die "Can't make temp dir" + cp -r "${SHELLCHECK_DIR}" "$tmp/" || die "Can't populate test dir" + printf 'Result: ' # >&3 + < /dev/null docker run -v "$tmp:/mnt" "$distro" sh -c " + $setup + cd /mnt/shellcheck || exit 1 + test/buildtest + " + ret=$? + if [ "$ret" = 0 ] + then + echo "OK" # >&3 + else + echo "FAIL with $ret. See $log" # >&3 + final=1 + fi + rm -rf "$tmp" + done +} + +export -f die +export -f distrotest_loop diff --git a/test/pash_tests/diverge.sh b/test/pash_tests/diverge.sh new file mode 100755 index 0000000..1cf8ce5 --- /dev/null +++ b/test/pash_tests/diverge.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +while true; do true; done diff --git a/test/pash_tests/double_sort.sh b/test/pash_tests/double_sort.sh new file mode 100644 index 0000000..16b4c81 --- /dev/null +++ b/test/pash_tests/double_sort.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cat $IN | tr A-Z a-z | sort | sort -r diff --git a/test/pash_tests/double_sort_env_test.sh b/test/pash_tests/double_sort_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/double_sort_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/drain_stream.sh b/test/pash_tests/drain_stream.sh new file mode 100755 index 0000000..84c05ae --- /dev/null +++ b/test/pash_tests/drain_stream.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +## This command drains a stream. It is used if we want a prefix of a +## stream that was written by tee. Since tee writes in both streams +## "almost" in lockstep, if we get a prefix on one side, the other +## side cannot progress. +dd of=/dev/null > /dev/null 2>&1 diff --git a/test/pash_tests/eager-no-task-par.sh b/test/pash_tests/eager-no-task-par.sh new file mode 100755 index 0000000..9c8858a --- /dev/null +++ b/test/pash_tests/eager-no-task-par.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# $input="${1}" +# $output="${2}" +# $temp="${3}" + +touch "$3" + +cat "$1" > "$3" +cat "$3" > "$2" +rm "$3" diff --git a/test/pash_tests/eager.sh b/test/pash_tests/eager.sh new file mode 100755 index 0000000..d1a3772 --- /dev/null +++ b/test/pash_tests/eager.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +input=${1?"ERROR: Eager: No input file given"} +output=${2?"ERROR: Eager: No output file given"} +intermediate_file=${3?"ERROR: Eager: No intermediate file given"} + +# Set a default DISH_TOP in this directory if it doesn't exist +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +# TODO: Doable check if this is still needed. Turned off for distributed exection. +# PR https://github.com/binpash/pash/pull/495 might've resolved it. +# cleanup() +# { +# kill -SIGPIPE $eager_pid > /dev/null 2>&1 +# } +# trap cleanup EXIT + +# $PASH_TOP/runtime/eager "$input" "$output" "$intermediate_file" & +# eager_pid=$! +# wait $eager_pid +"$PASH_TOP"/runtime/eager "$input" "$output" "$intermediate_file" +rm "$intermediate_file" diff --git a/test/pash_tests/eager_test.sh b/test/pash_tests/eager_test.sh new file mode 100755 index 0000000..f62b732 --- /dev/null +++ b/test/pash_tests/eager_test.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +mkfifo s1 s2 + +# IN=test_in.txt +IN=../scripts/input/1G.txt + +cat "$IN" > s1 & +cat s2 > test_out.txt & +./eager s1 s2 intermediate & + +wait + +rm s1 s2 + +# diff -s $IN test_out.txt diff --git a/test/pash_tests/echo_args.sh b/test/pash_tests/echo_args.sh new file mode 100644 index 0000000..5938c62 --- /dev/null +++ b/test/pash_tests/echo_args.sh @@ -0,0 +1,2 @@ +echo "$# $@" +echo $0 diff --git a/test/pash_tests/encrypt_files.sh b/test/pash_tests/encrypt_files.sh new file mode 100755 index 0000000..a2b123c --- /dev/null +++ b/test/pash_tests/encrypt_files.sh @@ -0,0 +1,5 @@ +# compress and encrypt all files in a directory +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/rtf} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +cd $INPUT +find . -name "*.rtf" | xargs -I {} sh -c "tar -czf - {} | openssl enc -e -pbkdf2 -out {}.enc; mv {}.enc $OUTPUT" sh {} diff --git a/test/pash_tests/escape-madness.sh b/test/pash_tests/escape-madness.sh new file mode 100644 index 0000000..80cf253 --- /dev/null +++ b/test/pash_tests/escape-madness.sh @@ -0,0 +1,3 @@ +echo "$(echo *)" +echo "$(echo "*")" +echo "${unset-*}" diff --git a/test/pash_tests/exec-redirections.sh b/test/pash_tests/exec-redirections.sh new file mode 100644 index 0000000..114e89c --- /dev/null +++ b/test/pash_tests/exec-redirections.sh @@ -0,0 +1,3 @@ +exec < exec-redirections.in > exec-redirections.out 2> exec-redirections.err +touch +cat diff --git a/test/pash_tests/execute_unix_benchmarks.sh b/test/pash_tests/execute_unix_benchmarks.sh new file mode 100755 index 0000000..e76b7db --- /dev/null +++ b/test/pash_tests/execute_unix_benchmarks.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## This sets up to what extent we run the evaluation. +## There are 2 levels: +## 1. Small inputs (1GB) | --width 4 +## 2. Big inputs (10GB) | --width 16 (EuroSys evaluation) +evaluation_level=1 + +while getopts 'slh' opt; do + case $opt in + s) evaluation_level=1 ;; + l) evaluation_level=2 ;; + h) echo "There are two possible execution levels:" + echo "option -s: Small inputs (1GB) | --width 4" + echo "option -l: Big inputs (10GB) | --width 16 (EuroSys evaluation)" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + + +unix50_dir="$PASH_TOP/evaluation/unix50/" +unix50_intermediary="${unix50_dir}/intermediary/" +intermediary_dir="$PASH_TOP/evaluation/intermediary/" +results_subdir_prefix="unix50" + +if [ "$evaluation_level" -eq 1 ]; then + echo "Executing Unix50 scripts with 1GB inputs and --width 4" + maximum_input_size="$((1024 * 1024 * 1024))" # 1 GB + n_in=4 +elif [ "$evaluation_level" -eq 2 ]; then + echo "Executing Unix50 scripts with 10GB inputs and --width 16" + maximum_input_size="$((10 * 1024 * 1024 * 1024))" # 10 GB + n_in=16 +else + echo "Unrecognizable execution level: $evaluation_level" + exit 1 +fi + +results_subdir="${results_subdir_prefix}_${n_in}_${maximum_input_size}" + +rm -r $unix50_intermediary +mkdir -p $unix50_intermediary +mkdir -p $intermediary_dir +mkdir -p "$PASH_TOP/evaluation/results/${results_subdir}/" + +## Make inputs larger and generate scripts and their envs +python3 generate_unix50_scripts.py $unix50_dir $unix50_intermediary $maximum_input_size + +for unix50_pipeline in $(ls ${unix50_intermediary} | grep -v "_env" | cut -f 1 -d '.' | sort); do + echo $unix50_pipeline + + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $unix50_intermediary $unix50_pipeline $n_in $intermediary_dir + + echo "Executing script with bash and pash..." + "$PASH_TOP/evaluation/execute_compile_evaluation_script.sh" -s -a "${unix50_pipeline}" "${n_in}" "${results_subdir}" > /dev/null 2>&1 + rm -f /tmp/eager* +done diff --git a/test/pash_tests/exit_code.sh b/test/pash_tests/exit_code.sh new file mode 100644 index 0000000..898a080 --- /dev/null +++ b/test/pash_tests/exit_code.sh @@ -0,0 +1,4 @@ +if read -r && read -r +then + exit 1 +fi < log_results/failed.log diff --git a/test/pash_tests/expand-u-positional.sh b/test/pash_tests/expand-u-positional.sh new file mode 100644 index 0000000..5b6a69d --- /dev/null +++ b/test/pash_tests/expand-u-positional.sh @@ -0,0 +1,2 @@ +set -u +echo $1 diff --git a/test/pash_tests/expand-u.sh b/test/pash_tests/expand-u.sh new file mode 100644 index 0000000..f01ee88 --- /dev/null +++ b/test/pash_tests/expand-u.sh @@ -0,0 +1,3 @@ +unset foobar +set -u +echo ${foobar} \ No newline at end of file diff --git a/test/pash_tests/export_var_script.sh b/test/pash_tests/export_var_script.sh new file mode 100755 index 0000000..5e738f6 --- /dev/null +++ b/test/pash_tests/export_var_script.sh @@ -0,0 +1,2 @@ +export N=1000 +seq 1 $N | sort -rn diff --git a/test/pash_tests/for-echo.sh b/test/pash_tests/for-echo.sh new file mode 100644 index 0000000..a8aebe3 --- /dev/null +++ b/test/pash_tests/for-echo.sh @@ -0,0 +1,6 @@ +N=${N:-100} +for i in $(seq $N) +do + echo $i +done +echo "end" diff --git a/test/pash_tests/for-loop.sh b/test/pash_tests/for-loop.sh new file mode 100644 index 0000000..090539f --- /dev/null +++ b/test/pash_tests/for-loop.sh @@ -0,0 +1,8 @@ +mkdir -p temp-out + +for i in $(seq 100) +do + cat $PASH_TOP/README.md | grep pash | grep pash > temp-out/$i.out +done + +echo done diff --git a/test/pash_tests/for_loop_simple.sh b/test/pash_tests/for_loop_simple.sh new file mode 100644 index 0000000..c267cd6 --- /dev/null +++ b/test/pash_tests/for_loop_simple.sh @@ -0,0 +1,3 @@ +for _ in $times; do + cat $IN | tr A-Z a-z | sort +done diff --git a/test/pash_tests/for_loop_simple_env_test.sh b/test/pash_tests/for_loop_simple_env_test.sh new file mode 100644 index 0000000..a60ab38 --- /dev/null +++ b/test/pash_tests/for_loop_simple_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt +times="1 2 3" diff --git a/test/pash_tests/for_spaces.sh b/test/pash_tests/for_spaces.sh new file mode 100755 index 0000000..ed36e7a --- /dev/null +++ b/test/pash_tests/for_spaces.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +oldifs=$IFS +IFS=$(echo -e "\t") +for f in `ls *`; do + echo $f +done +IFS=$oldifs diff --git a/test/pash_tests/fun-def.sh b/test/pash_tests/fun-def.sh new file mode 100644 index 0000000..999ef74 --- /dev/null +++ b/test/pash_tests/fun-def.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +custom_sort() { + sort $@ +} + +custom_tr() { + tr A-Z a-z +} + +export -f custom_tr + +FILES="$PASH_TOP/evaluation/tests/input/1M.txt ../evaluation/tests/input/1M.txt" + +cat $FILES | custom_tr | custom_sort diff --git a/test/pash_tests/gen_data.sh b/test/pash_tests/gen_data.sh new file mode 100644 index 0000000..9101fd2 --- /dev/null +++ b/test/pash_tests/gen_data.sh @@ -0,0 +1,126 @@ +DATA=final.csv +rm -f $DATA +replace_string() { + sed -i -e 's/'$1'/'"$2"'/g' .tmp +} + +prepare_run_data() { + DATA_FILE=$1.data.csv + FILES=$1.log.dat + echo $1.tmp + rm -f $DATA_FILE $FILES + + # gather all results + find $1 -name "*.res" -type f > $FILES + + # read each line of the file + while read p; do + # echo "Fixing: $p" + python3 ../prep_temp.py $p > .tmp + # break + # lines=$(cat $p | wc -l) + # if [[ $lines -gt 3 ]]; then + # echo "Head1:" + # tail -n +2 $p | head + # file="$(tail -n +2 $p)" + # else + # echo "Head2:" + # tail -n +2 $p | head + # file="$(tail -n +2 $p)" + # fi + # file="$(tail -n +2 $p)" + # echo "file: $file" + # echo "$file" > .tmp + bench=$(echo $p | awk -F '/' '{print $3}') + mode=$(echo $p | awk -F'/' '{print $2}') + # echo "Bench: $bench, mode: $mode" + # if [[ $bench == max-temp ]]; then + # cat .tmp | sed -E 's/^([a-zA-Z_0-9\-]+):.*([0-9]+.[0-9]+\n)$/\1\t\2/g' #| cut -f 1 + # fi + # read the contents of each execution file + while read l; do + #l=$(echo $l | sed 's/ //g') + echo $l | grep --quiet : + res=$? + if [[ $res == 1 ]]; then + perf=$(echo $l | grep -Eo '[0-9]+.[0-9]+$') + # echo "Perf 1: $perf" + script=$(echo $l | sed -e 's/'$perf'//g') + else + # get script name and performance + # strip the .sh and get fetch the script name + script=$(echo $l | awk -F ':' '{print $1}' | sed 's/...$//') + # get the execution time + perf=$(echo $l | awk -F ':' '{print $2}') + # echo "Perf 2: $perf" + fi + echo $bench,$script,$mode,$perf | sed 's/ //g'>> $DATA_FILE + done < .tmp + done < $FILES + sort $DATA_FILE > $1.tmp + rm $DATA_FILE rm -f $FILES +} + +cd eval_results +prepare_run_data run +# merge all the results +cat run.tmp | sed -s 's/,/ /g' | awk '{print $1,$2,$3,$4}' | awk ' {print $1','$2','$3','$4}' | tr ' ' ',' > .tmp +# cleanup +replace_string dependency_untangling for-loops +replace_string nlp NLP +replace_string oneliners Classics +replace_string unix50 Unix50 +replace_string analytics-mts COVID-mts +replace_string web-index WebIndex +replace_string max-temp AvgTemp +replace_string temp-analytics AvgTemp +replace_string Genomics_Computation Genomics +replace_string Program_Inference ProgInf +replace_string pash_jit_no_prof_no_du 'pash_jit -prof -par_pipe' +replace_string pash_jit_no_prof 'pash_jit -prof' +replace_string blish pash_jit +perf='' +# calculate the ratios +while read p; do + # is this the bash entry + echo $p | grep --quiet bash + res=$? + # fetch the performance + if [[ $res == 0 ]]; then + perf=$(echo $p | awk -F ',' '{print $4}') + fi + # get the bench + bench=$(echo $p | awk -F ',' '{print $1}') + # get the script + script=$(echo $p | awk -F ',' '{print $2}') + # get the mode + mode=$(echo $p | awk -F ',' '{print $3}') + # get the time of the pash/blish configs + current_perf=$(echo $p | awk -F ',' '{print $4}') + # calculate the ratio + if [[ $res == 0 ]]; then + ratio=$perf + else + ratio=$(echo "$perf $current_perf" | awk '{print $1/$2}' ) + fi + # replace the pash/blish time with the ratio + echo $bench,$script,$mode,$ratio >> $DATA +done < .tmp +rm -f .tmp +mv $DATA .. + +# in docker container, we are running with the CI +if [ -f /.dockerenv ]; then + exit 0 +fi +cd .. +# replace all the lines that are not needed in figure5 +sed 's/for-loops,AurPkg,pash_aot,.*/for-loops,AurPkg,pash_aot,0/g' $DATA > data_final.csv +sed -i 's/for-loops,FileEnc1,pash_aot,.*/for-loops,FileEnc1,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,FileEnc2,pash_aot,.*/for-loops,FileEnc2,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,LogAnalysis1,pash_aot,.*/for-loops,LogAnalysis1,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,LogAnalysis2,pash_aot,.*/for-loops,LogAnalysis2,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,MediaConv1,pash_aot,.*/for-loops,MediaConv1,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,MediaConv2,pash_aot,.*/for-loops,MediaConv2,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,ProgInf,pash_aot,.*/for-loops,ProgInf,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,Genomics,pash_aot,.*/for-loops,Genomics,pash_aot,0/g' data_final.csv diff --git a/test/pash_tests/gen_pl.sh b/test/pash_tests/gen_pl.sh new file mode 100644 index 0000000..7773f24 --- /dev/null +++ b/test/pash_tests/gen_pl.sh @@ -0,0 +1,4 @@ +# generate a playlist +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +find $1 -type f -name *.mp3 -o -name *.wav | sort > $OUTPUT/playlist.pls diff --git a/test/pash_tests/generate_single_chrom.sh b/test/pash_tests/generate_single_chrom.sh new file mode 100644 index 0000000..364c28a --- /dev/null +++ b/test/pash_tests/generate_single_chrom.sh @@ -0,0 +1,19 @@ +# Here are sample steps to generate a single paired read from hg19: +# https://www.biostars.org/p/150010/ +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +# filter out a single chromosome and index it, e.g. +samtools faidx ${INPUT}/human_g1k_v37.fasta 20 > ${OUTPUT}/human_g1k_v37_chr20.fasta +bowtie2-build ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/homo_chr20 +#simulate a single read sample, e.g. here is for a single (-N 1) paired read: +${INPUT}/wgsim/wgsim -N 1 ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/single.read1.fq ${OUTPUT}/single.read2.fq > ${OUTPUT}/wgsim.out +#generate the sam, e.g. +bowtie2 -x ${OUTPUT}/homo_chr20 -1 ${OUTPUT}/single.read1.fq -2 ${OUTPUT}/single.read2.fq -S ${OUTPUT}/single_pair.sam +#generate a bam +samtools view -b -S -o ${OUTPUT}/single_pair.bam ${OUTPUT}/single_pair.sam +#sort and index it +samtools sort ${OUTPUT}/single_pair.bam -o ${OUTPUT}/single_pair.sorted.bam +# this seems to not affect the file, but in other cases, its indeed needed +samtools index ${OUTPUT}/single_pair.sorted.bam + diff --git a/test/pash_tests/genomics.sh b/test/pash_tests/genomics.sh new file mode 100755 index 0000000..71833c3 --- /dev/null +++ b/test/pash_tests/genomics.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# create bam files with regions +################### 1KG SAMPLES +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input} +SAMTOOLS_BIN=${IN}/deps/samtools-1.7/samtools +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/bio} +LOGS=${OUT}/logs +IN_NAME=${IN}/bio/100G.txt +GENE_LOCS=${IN}/bio/Gene_locs.txt +mkdir -p ${LOGS} +run_tests() { + s_line=$(echo $1 | tr '@' ' ') + pop=$(echo $s_line |cut -f 1 -d " "); + sample=$(echo $s_line |cut -d " " -f 2); + link=$(echo $s_line |cut -f 3 -d " "); + ### correcting labeling of chromosomes so that all are 1,2,3.. instead of chr1,chr2 or chromosome1 etc + echo 'Processing Sample '${IN}/bio/$sample' '; + # uniform the chromosomes in the file due to inconsistencies + $SAMTOOLS_BIN view -H "${IN}/bio/$sample".bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' \ + | $SAMTOOLS_BIN reheader - "${IN}/bio/$sample".bam > "${OUT}/$sample"_corrected.bam 2> /dev/null + # create bai file + $SAMTOOLS_BIN index -b "${OUT}/$sample"_corrected.bam 2> /dev/null + ### Isolating each relevant chromosome based on Gen_locs + cut -f 2 ${IN}/bio/Gene_locs.txt |sort |uniq |while read chr; + do + echo 'Isolating Chromosome '$chr' from sample '${OUT}/$sample', '; + $SAMTOOLS_BIN view -b "${OUT}/$sample"_corrected.bam chr"$chr" > "${OUT}/$pop"_"$sample"_"$chr".bam 2> /dev/null + echo 'Indexing Sample '$pop'_'${OUT}/$sample' '; + $SAMTOOLS_BIN index -b "${OUT}/$pop"_"$sample"_"$chr".bam 2> /dev/null + done; +} + +export -f run_tests +data=$(cat ${IN_NAME} | tr ' ' '@') +pkg_count=0 +for item in $data; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > "${LOGS}"/"${pkg_count}.log" +done + +echo 'done'; diff --git a/test/pash_tests/genquality.sh b/test/pash_tests/genquality.sh new file mode 100755 index 0000000..64c777f --- /dev/null +++ b/test/pash_tests/genquality.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Identify the top 10 reasons why genome assemblies don't make it into GenBank +# -- NIH's genetic sequence database, an annotated collection of all publicly +# available DNA sequences +# http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html + +# Require: csvkit +# Data: http://ndr.md/data/bio/genbank.txt + +IN=./input/genbank.txt +OUT=./output/out.txt + +cat $IN | + csvcut -t -K 1 -c 'excluded_from_refseq' | + tail -n +2 | tr ";" "\n" | + sed -e 's/^ //' -e 's/ $//' | + grep -v '""' | + sort | + uniq -c | + sort -nr | + head -n 10 | + nl > $OUT + +# More bio pipelines for +# # Strains with Complete Genome +# cat assembly_summary.tsv \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# | wc -l +# +# # Most sequenced species with Complete Genome +# cat assembly_summary.tsv \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# | csvtk cut -t -f organism_name \ +# | cut -d ' ' -f 1,2 \ +# | csvtk freq -t -n -r | head -n 20 | csvtk pretty -t +# +# # Number of species, by organism name +# +# # Filter by species (organism_name) +# cat assembly_summary.tsv \ +# | csvtk grep -t -f organism_name -i -r -p "Mycobacterium tuberculosis" \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# > mt.tsv +# +# # Filter (complete genome) by species_taxid +# cat assembly_summary.tsv \ +# | csvtk grep -t -f species_taxid -p 239935,1280 \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# > bytaxid.tsv +# +# # Download genome sequence and annotation files +# cat mt.tsv | csvtk cut -t -f ftp_path | sed 1d \ +# | rush -v prefix='{}/{%}' \ +# ' \ +# wget -c {prefix}_genomic.fna.gz; \ +# wget -c {prefix}_genomic.gbff.gz; \ +# wget -c {prefix}_genomic.gff.gz; \ +# wget -c {prefix}_cds_from_genomic.fna.gz \ +# wget -c {prefix}_protein.faa.gz; \ +# ' \ +# -j 10 -c -C download.rush +# +# #Get GenBank assembly summary file +# wget ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/assembly_summary_genbank.txt +# +# #Get all lines that have "Mycobacter", if 12th field is "Complete Genome", print the 20th field (url to file). +# #But the actual filename ends _genomic.fna.gz so include that too.. +# grep Mycobacter assembly_summary_genbank.txt \ +# | awk 'BEGIN{FS="\t"}{if($12=="Complete Genome"){print $20}}' \ +# | awk 'BEGIN{OFS=FS="/"}{print $0,$NF"_genomic.fna.gz"}' \ +# > urls.txt +# +# #Now you can go through your urls file +# IFS=$'\n'; for NEXT in $(cat urls.txt); do wget "$NEXT"; done diff --git a/test/pash_tests/get-summary.sh b/test/pash_tests/get-summary.sh new file mode 100755 index 0000000..2d24856 --- /dev/null +++ b/test/pash_tests/get-summary.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo 'GNU Coreutils ('$(cat coreutils-summary.txt | wc -l | awk '{$1=$1};1') 'commands):' +echo ' S:' $(cat coreutils-summary.txt | grep ' S ' | wc -l) +echo ' P:' $(cat coreutils-summary.txt | grep ' P ' | wc -l) +echo ' N:' $(cat coreutils-summary.txt | grep ' N ' | wc -l) +echo ' E:' $(cat coreutils-summary.txt | grep ' E ' | wc -l) + +echo 'POSIX ('$(( $(cat posix-summary.txt | wc -l | awk '{$1=$1};1') + $(cat ../c_stats/posix.txt | grep -v Mandatory | wc -l) )) 'commands):' +echo ' S:' $(cat posix-summary.txt | grep ' S ' | wc -l) +echo ' P:' $(cat posix-summary.txt | grep ' P ' | wc -l) +echo ' N:' $(cat posix-summary.txt | grep ' N ' | wc -l) +echo ' E:' $(( $(cat posix-summary.txt | grep ' E ' | wc -l) + $(cat ../c_stats/posix.txt | grep -v Mandatory | wc -l) )) + diff --git a/test/pash_tests/get_hash.sh b/test/pash_tests/get_hash.sh new file mode 100644 index 0000000..1ecbf21 --- /dev/null +++ b/test/pash_tests/get_hash.sh @@ -0,0 +1,2 @@ +# calculate a hash? can we change it to calculate hashes for all the files? +head -c32 /dev/urandom | openssl dgst -sha256 -binary -hmac $(xxd -p -l32 -c32 /dev/urandom) | base64 | cut -b-32 diff --git a/test/pash_tests/get_results.sh b/test/pash_tests/get_results.sh new file mode 100755 index 0000000..591bbd9 --- /dev/null +++ b/test/pash_tests/get_results.sh @@ -0,0 +1,38 @@ +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} +rm -rf log_results +mkdir log_results + +stats() ( + test_results_dir=$1 + grep "are identical" "$test_results_dir"/result_status | + sed "s,^$PASH_TOP/,," > log_results/$2_passed.log + cat log_results/$2_passed.log >> log_results/passed.log + grep "are not identical" "$test_results_dir"/result_status | + sed "s,^$PASH_TOP/,," > log_results/$2_failed.log + # if the file has data, append it + if [ -s log_results/$2_failed.log ] + then + cat log_results/$2_failed.log >> log_results/failed.log + else + # remove since it's empty + rm log_results/$2_failed.log + fi + TOTAL_TESTS=$(cat "$test_results_dir"/result_status | wc -l) + PASSED_TESTS=$(grep "are identical" "$test_results_dir"/result_status | wc -l) + echo "$2: ${PASSED_TESTS}/${TOTAL_TESTS} tests passed." +) + +echo "Below follow the identical outputs:" > log_results/passed.log +echo "Below follow the non-identical outputs:" > log_results/failed.log +# +## intro tests +stats "$PASH_TOP/evaluation/intro/output" intro +# +## Interface Tests +stats "$PASH_TOP/evaluation/tests/interface_tests/output" interface +# +## compiler Tests +stats "${PASH_TOP}/evaluation/tests/results" compiler +# +## aggregator tests +stats "${PASH_TOP}/evaluation/tests/agg/output" agg diff --git a/test/pash_tests/get_type_count.sh b/test/pash_tests/get_type_count.sh new file mode 100644 index 0000000..54899fe --- /dev/null +++ b/test/pash_tests/get_type_count.sh @@ -0,0 +1,4 @@ +# count how many times each file type exist in a directory +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +find $INPUT -type f | while read f; do echo ""${f##*.}""; done | sed ""/^\s*$/d"" | sort | uniq -c | sort -rn > $OUTPUT/get_type_count_res diff --git a/test/pash_tests/grab_submissions.sh b/test/pash_tests/grab_submissions.sh new file mode 100755 index 0000000..3d2370c --- /dev/null +++ b/test/pash_tests/grab_submissions.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ -d "$1" ]; then + echo "Grading directory already exists" + exit 2 +fi + +mkdir $1 +mkdir $1/submissions +cp ../dropbox/$1/* $1/submissions diff --git a/test/pash_tests/grade.sh b/test/pash_tests/grade.sh new file mode 100755 index 0000000..a0f2ad2 --- /dev/null +++ b/test/pash_tests/grade.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +score=0 +total=0 + +if [ -d output ]; then + echo "output directory already exists, aborting" + exit 1 +fi + +mkdir output + +echo "LEXER/PARSER AUTOGRADER RESULTS" +echo + +# check success cases +for i in right/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 0 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +# check failure cases +for i in wrong/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 1 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +echo +echo "TOTAL: $score / $total" +echo +echo "PROBLEM 1: XXX / 5" +echo +let total=total+5 +echo "FINAL GRADE: $score + XXX / $total" diff --git a/test/pash_tests/grep-test.sh b/test/pash_tests/grep-test.sh new file mode 100644 index 0000000..34efeae --- /dev/null +++ b/test/pash_tests/grep-test.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +## This test contains all occurences of tr (to test the annotation) + +FILE="$PASH_TOP/evaluation/tests/input/1M.txt" + +cat $FILE | grep "the" +cat $FILE | grep -c "the" diff --git a/test/pash_tests/grep.sh b/test/pash_tests/grep.sh new file mode 100755 index 0000000..5699e84 --- /dev/null +++ b/test/pash_tests/grep.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cat $IN | grep 'the' diff --git a/test/pash_tests/grep_env_test.sh b/test/pash_tests/grep_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/grep_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/grep_f_script.sh b/test/pash_tests/grep_f_script.sh new file mode 100755 index 0000000..2a29820 --- /dev/null +++ b/test/pash_tests/grep_f_script.sh @@ -0,0 +1,28 @@ +mkfifo s1 s2 s3 s4 s5 + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +IN="$PASH_TOP/evaluation/tests/input/1M.txt" + +sorted_in="/tmp/sorted.in" + +sort $IN > $sorted_in + +echo " king" | tee s4 >s3 & +grep -vx -f s3 - > s1 < $sorted_in & +grep -vx -f s4 - > s2 < $sorted_in & +## The eager is essential here or after tee to ensure non-deadlocks +{ "$PASH_TOP/runtime/eager.sh" s2 s5 "/tmp/eager_intermediate_#file1" & } +cat s1 s5 > grep-f.out + +echo " king" | tee s4 >s3 & +comm -13 s3 - > s1 < $sorted_in & +comm -13 s4 - > s2 < $sorted_in & +## The eager is essential here or after tee to ensure non-deadlocks +{ "$PASH_TOP/runtime/eager.sh" s2 s5 "/tmp/eager_intermediate_#file1" & } +cat s1 s5 > comm.out + +rm s1 s2 s3 s4 s5 + +diff grep-f.out comm.out + diff --git a/test/pash_tests/head.sh b/test/pash_tests/head.sh new file mode 100755 index 0000000..6f217eb --- /dev/null +++ b/test/pash_tests/head.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# FIXME missing head parameters + +cat "${1}" diff --git a/test/pash_tests/head_deadlock.sh b/test/pash_tests/head_deadlock.sh new file mode 100755 index 0000000..a42fe20 --- /dev/null +++ b/test/pash_tests/head_deadlock.sh @@ -0,0 +1,9 @@ +mkfifo s1 s2 + +cat ../evaluation/scripts/input/1M.txt > s1 & +cat ../evaluation/scripts/input/1M.txt > s2 & +cat s1 s2 | head -n 1 & + +wait + +rm s1 s2 diff --git a/test/pash_tests/head_deadlock_fixed.sh b/test/pash_tests/head_deadlock_fixed.sh new file mode 100755 index 0000000..05a74de --- /dev/null +++ b/test/pash_tests/head_deadlock_fixed.sh @@ -0,0 +1,9 @@ +mkfifo s1 s2 + +cat ../evaluation/scripts/input/1M.txt > s1 & +cat ../evaluation/scripts/input/1M.txt > s2 & +cat s1 s2 | (head -n 1; ../evaluation/tools/drain_stream.sh) & + +wait + +rm s1 s2 diff --git a/test/pash_tests/head_deadlock_fixed3.sh b/test/pash_tests/head_deadlock_fixed3.sh new file mode 100755 index 0000000..e914c18 --- /dev/null +++ b/test/pash_tests/head_deadlock_fixed3.sh @@ -0,0 +1,39 @@ +mkfifo s1 s2 + +## This way of fixing the problem suffers from some issues. +## +## - First of all, gathering the children after the end of the graph +## seems to gather more than just the alive nodes. This could lead +## to killing some random pid in the system. This could potentially +## be solved by gathering all pids incrementally. +## +## - In addition, this way of getting the last pid does not work if +## there is more than one output. (This is never the case in our +## tests, but could be. +## +## - Finally, it is not local, since all of the monitoring happens +## globally. Ideally, it should be done by a wrapper in each - +## node. The wrapper should monitor if the node dies, and if so it - +## should send SIGPIPE to all its producers. + +cat ../evaluation/scripts/input/1M.txt > s1 & +echo "Current node: $!" +cat ../evaluation/scripts/input/1M.txt > s2 & +echo "Current node: $!" +cat s1 s2 | head -n 1 & + +last=$! + +echo "Children pids" +ps --ppid $$ | awk '{print $1}' | grep -E '[0-9]' + +echo "Alternative children pids" +jobs -l | awk '{print $1}' + +wait $last + +echo "Last pid: $last" + +ps --ppid $$ | awk '{print $1}' | grep -E '[0-9]' | xargs -n 1 kill -SIGPIPE + +rm s1 s2 diff --git a/test/pash_tests/head_deadlock_fixed_2.sh b/test/pash_tests/head_deadlock_fixed_2.sh new file mode 100755 index 0000000..fa66f85 --- /dev/null +++ b/test/pash_tests/head_deadlock_fixed_2.sh @@ -0,0 +1,9 @@ +mkfifo s1 s2 + +cat ../evaluation/scripts/input/1M.txt > s1 & +cat ../evaluation/scripts/input/1M.txt > s2 & +(cat s1 s2; head -n 1 s2 > /dev/null) | head -n 1 & + +wait + +rm s1 s2 diff --git a/test/pash_tests/hello-world.sh b/test/pash_tests/hello-world.sh new file mode 100755 index 0000000..21498d3 --- /dev/null +++ b/test/pash_tests/hello-world.sh @@ -0,0 +1,8 @@ +[ $(uname) = 'Darwin' ] && a=/usr/share/dict/web2 || a=/usr/share/dict/words + +if [ -f $a ]; then + cat $a $a $a $a $a $a $a $a | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' | wc -l +else + echo "Dictionary file $a not found.." +fi + diff --git a/test/pash_tests/heredoc1.sh b/test/pash_tests/heredoc1.sh new file mode 100644 index 0000000..fe5ecb3 --- /dev/null +++ b/test/pash_tests/heredoc1.sh @@ -0,0 +1,3 @@ +cat << foo +line one +foo \ No newline at end of file diff --git a/test/pash_tests/identity.sh b/test/pash_tests/identity.sh new file mode 100755 index 0000000..5e61350 --- /dev/null +++ b/test/pash_tests/identity.sh @@ -0,0 +1,36 @@ +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out + + +batchSize=10000 +testFile="/home/ubuntu/pash/evaluation/scripts/input/100M.txt" +if ![ $1 -eq 0 ]; then + testFile=@1 +fi +if ![ $2 -eq 0 ]; then + testFile=@2 +fi + + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 + +../r_split $testFile $batchSize $file1 $file2 & + +../r_wrap cat < $file1 > $file3 & +../r_wrap cat < $file2 > $file4 & + +../r_merge $file3 $file4 > $file5 + +if cmp -s "$testFile" "$file5"; then + printf 'The file "%s" is the same as "%s"\n' "$testFile" "$file5" +else + printf 'The file "%s" is different from "%s"\n' "$testFile" "$file5" +fi + +rm -rf *.out \ No newline at end of file diff --git a/test/pash_tests/img_convert.sh b/test/pash_tests/img_convert.sh new file mode 100755 index 0000000..ae908f4 --- /dev/null +++ b/test/pash_tests/img_convert.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# tag: resize image +IN=${JPG:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/jpg} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/jpg} +mkdir -p ${OUT} +for i in $IN/*.jpg; +do + out=$OUT/$(basename -- $i) + convert -resize 70% "$i" "$out"; +done + +echo 'done'; diff --git a/test/pash_tests/incr.sh b/test/pash_tests/incr.sh new file mode 100755 index 0000000..c2654e6 --- /dev/null +++ b/test/pash_tests/incr.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# https://unix.stackexchange.com/questions/193441/how-can-i-implement-a-circular-flow-of-data-among-interconnected-commands +F="temp.txt" +[ -f $F ] && (rm $F && echo 1 >$F ) +tail -f $F | while read n; do echo $((n+1)); sleep 1; done | tee -a $F diff --git a/test/pash_tests/innefficient_auto_split.sh b/test/pash_tests/innefficient_auto_split.sh new file mode 100755 index 0000000..f806c7d --- /dev/null +++ b/test/pash_tests/innefficient_auto_split.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +## Running it with PaSh: +## time ./pa.sh -w 4 -d 1 --output_time evaluation/scripts/innefficient_auto_split.sh +## +## is slower than running it with bash: +## time ./evaluation/scripts/innefficient_auto_split.sh +## +## because the script doesn't do a lot of processing so + +FILE="$PASH_TOP/evaluation/scripts/input/1G.txt" +cat $FILE | sed 1d | grep 'Bell' | cut -f 2 | wc -l + +## If instead we run the following, we get the expected results +# cat $FILE $FILE | grep 'Bell' | cut -f 2 | wc -l diff --git a/test/pash_tests/install-deps.sh b/test/pash_tests/install-deps.sh new file mode 100755 index 0000000..dc1a9a7 --- /dev/null +++ b/test/pash_tests/install-deps.sh @@ -0,0 +1,6 @@ +# install dependencies +pkgs='ffmpeg unrtf imagemagick' +if ! dpkg -s $pkgs >/dev/null 2>&1; then + sudo apt-get install $pkgs -y +fi + diff --git a/test/pash_tests/ldconfig.sh b/test/pash_tests/ldconfig.sh new file mode 100755 index 0000000..334ac7b --- /dev/null +++ b/test/pash_tests/ldconfig.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +cd _build/lib + +trylink() { + [ -f "$2" ] || ln -sf $1 $2 +} + +trylink dlldash.so.0.0.0 dlldash.so +trylink dlldash.so.0.0.0 dlldash.so.0 + +trylink libdash.so.0.0.0 libdash.so +trylink libdash.so.0.0.0 libdash.so.0 + diff --git a/test/pash_tests/longest-man.sh b/test/pash_tests/longest-man.sh new file mode 100755 index 0000000..6adc986 --- /dev/null +++ b/test/pash_tests/longest-man.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +# Find the 10 largest man pages + +find /usr/share/man -type f | xargs du -scb | sort -rn | head -n 10 diff --git a/test/pash_tests/loop1.sh b/test/pash_tests/loop1.sh new file mode 100644 index 0000000..6e8693b --- /dev/null +++ b/test/pash_tests/loop1.sh @@ -0,0 +1,3 @@ +for idFor1 in A B ; do + echo $idFor1 +done diff --git a/test/pash_tests/make-ec2.sh b/test/pash_tests/make-ec2.sh new file mode 100755 index 0000000..f1b59bc --- /dev/null +++ b/test/pash_tests/make-ec2.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +# Pair with ./suggest-ec2.sh + +main() { + set -x + aws ec2 run-instances \ + --output text \ + --query "Instances[0].InstanceId" \ + --image-id "$PASH_AWS_EC2_AMI" \ + --instance-type "$PASH_AWS_EC2_INSTANCE_TYPE" \ + --key-name "$PASH_AWS_EC2_KEY_NAME" \ + --security-group-ids "$PASH_AWS_EC2_SECURITY_GROUP" \ + --monitoring "Enabled=false" \ + --subnet-id "$PASH_AWS_EC2_SUBNET" \ + --query 'Instances[0].InstanceId' \ + --block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=$PASH_AWS_EC2_DISK_SIZE_GB}" \ + --output text +} + +main diff --git a/test/pash_tests/max-temp-preprocess.sh b/test/pash_tests/max-temp-preprocess.sh new file mode 100755 index 0000000..e3d4b98 --- /dev/null +++ b/test/pash_tests/max-temp-preprocess.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;$;/;' | + xargs -r -n 1 curl -s | + grep gz | + tr -s ' \n' | + cut -d ' ' -f9 | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed 's;^;http://ndr.md/data/noaa/;' | + xargs -n1 curl -s | + gunzip diff --git a/test/pash_tests/max-temp-process.sh b/test/pash_tests/max-temp-process.sh new file mode 100755 index 0000000..510bb1d --- /dev/null +++ b/test/pash_tests/max-temp-process.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +## Processing +cat $IN | + cut -c 89-92 | + grep -v 999 | + sort -rn | + head -n1 diff --git a/test/pash_tests/max-temp.sh b/test/pash_tests/max-temp.sh new file mode 100755 index 0000000..b0c18aa --- /dev/null +++ b/test/pash_tests/max-temp.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +FROM=${FROM:-2015} +TO=${TO:-2015} +IN=${IN:-'http://ndr.md/data/noaa/'} +fetch=${fetch:-"curl -s"} + +seq $FROM $TO | + sed "s;^;$IN;" | + sed 's;$;/;' | + xargs -r -n 1 $fetch | + grep gz | + tr -s ' \n' | + cut -d ' ' -f9 | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed "s;^;$IN;" | + xargs -n1 curl -s | + gunzip | + ## Processing + cut -c 89-92 | + grep -v 999 | + sort -rn | + head -n1 diff --git a/test/pash_tests/merge-uniq.sh b/test/pash_tests/merge-uniq.sh new file mode 100755 index 0000000..1e53c00 --- /dev/null +++ b/test/pash_tests/merge-uniq.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# This is how to merge results of `uniq -c`, contained in {1,2,3}.txt +# I am using 3 inputs to stress it works with more than just pairs:-) + +A=${1:-1.txt} +B=${1:-2.txt} +C=${1:-3.txt} +awk '{ count[$2] += $1 } END { for(e in count) print count[e], e }' "$A" "$B" "$C" diff --git a/test/pash_tests/merge-wc.sh b/test/pash_tests/merge-wc.sh new file mode 100755 index 0000000..1ce6779 --- /dev/null +++ b/test/pash_tests/merge-wc.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +A="paste -d '+' " +for i in "$@"; do + # cat "$i" | tr -s ' ' '\n' | tail -n +2 + A="$A <(cat $i | tr -s ' ' '\n' | tail -n +2) " +done +A="$A | head -n +3 | bc | tr -s '\n' ' ' | sed 's/$/\ /'" + +eval $A + diff --git a/test/pash_tests/micro_10.sh b/test/pash_tests/micro_10.sh new file mode 100644 index 0000000..8d70d87 --- /dev/null +++ b/test/pash_tests/micro_10.sh @@ -0,0 +1,11 @@ +cat $IN | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " diff --git a/test/pash_tests/micro_1000.sh b/test/pash_tests/micro_1000.sh new file mode 100644 index 0000000..7278a5a --- /dev/null +++ b/test/pash_tests/micro_1000.sh @@ -0,0 +1,1002 @@ +cat $IN | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " diff --git a/test/pash_tests/micro_1000_env_test.sh b/test/pash_tests/micro_1000_env_test.sh new file mode 100644 index 0000000..b648f0f --- /dev/null +++ b/test/pash_tests/micro_1000_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt diff --git a/test/pash_tests/micro_10_env_test.sh b/test/pash_tests/micro_10_env_test.sh new file mode 100644 index 0000000..b648f0f --- /dev/null +++ b/test/pash_tests/micro_10_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt diff --git a/test/pash_tests/minimal_grep.sh b/test/pash_tests/minimal_grep.sh new file mode 100644 index 0000000..2a65106 --- /dev/null +++ b/test/pash_tests/minimal_grep.sh @@ -0,0 +1,54 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/10M.txt" +batchSize=100000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +# mkfifo $file7 +# mkfifo $file8 +# mkfifo $file9 + + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + + +$PASH_TOP/runtime/dgsh-tee -I -i $file1 -o $file5 -b 10M & +$PASH_TOP/runtime/dgsh-tee -I -i $file2 -o $file6 -b 10M & + +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file5 > $file3 & +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file6 > $file4 & +# ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +$PASH_TOP/runtime/r_merge $file3 $file4 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out diff --git a/test/pash_tests/minimal_grep_env_test.sh b/test/pash_tests/minimal_grep_env_test.sh new file mode 100755 index 0000000..b648f0f --- /dev/null +++ b/test/pash_tests/minimal_grep_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt diff --git a/test/pash_tests/minimal_grep_stdin.sh b/test/pash_tests/minimal_grep_stdin.sh new file mode 100644 index 0000000..7dee616 --- /dev/null +++ b/test/pash_tests/minimal_grep_stdin.sh @@ -0,0 +1 @@ +tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' diff --git a/test/pash_tests/minimal_sort.sh b/test/pash_tests/minimal_sort.sh new file mode 100644 index 0000000..1f4cb65 --- /dev/null +++ b/test/pash_tests/minimal_sort.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cat $IN | tr A-Z a-z | sort diff --git a/test/pash_tests/minimal_sort_env_test.sh b/test/pash_tests/minimal_sort_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/minimal_sort_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/mk_dot_install.sh b/test/pash_tests/mk_dot_install.sh new file mode 100755 index 0000000..0ac9473 --- /dev/null +++ b/test/pash_tests/mk_dot_install.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e + +libdash_files=$(ls _build/lib) +bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" + +files= +for f in ${libdash_files} +do + files="${files} \"_build/lib/${f}\"" +done + +for f in ${bindings_files} +do + files="${files} \"ocaml/${f}\"" +done + +cat >libdash.install < sh_352.18tmp && echo sh_352.18 line 2 >> sh_352.18tmp && cat sh_352.18tmp ); echo "$x" \ No newline at end of file diff --git a/test/pash_tests/nfa-regex.sh b/test/pash_tests/nfa-regex.sh new file mode 100755 index 0000000..6431aa5 --- /dev/null +++ b/test/pash_tests/nfa-regex.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Match complex regular-expression over input + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' diff --git a/test/pash_tests/nginx.sh b/test/pash_tests/nginx.sh new file mode 100755 index 0000000..2a4e68e --- /dev/null +++ b/test/pash_tests/nginx.sh @@ -0,0 +1,22 @@ +############################### +### awk not working on pash ### +############################### +# sort by reponse codes +#pash 36 sec, bash 7 sec +INPUT=${PASH_TOP}/evaluation/scripts/input/access.log +cat ${INPUT} | cut -d "\"" -f3 | cut -d ' ' -f2 | sort | uniq -c | sort -rn > /dev/null +# awk alternative, too slow +awk '{print $9}' ${INPUT} | sort | uniq -c | sort -rn > /dev/null +# find broken links broken links +awk '($9 ~ /404/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -rn > /dev/null +# for 502 (bad-gateway) we can run following command: +awk '($9 ~ /502/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -r > /dev/null +# Who are requesting broken links (or URLs resulting in 502) +awk -F\" '($2 ~ "/wp-admin/install.php"){print $1}' ${INPUT} | awk '{print $1}' | sort | uniq -c | sort -r > /dev/null +# 404 for php files -mostly hacking attempts +awk '($9 ~ /404/)' ${INPUT} | awk -F\" '($2 ~ "^GET .*\.php")' | awk '{print $7}' | sort | uniq -c | sort -r | head -n 20 > /dev/null +############################## +# Most requested URLs ######## +awk -F\" '{print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r > /dev/null +# Most requested URLs containing XYZ +awk -F\" '($2 ~ "ref"){print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r > /dev/null diff --git a/test/pash_tests/no_in_script.sh b/test/pash_tests/no_in_script.sh new file mode 100755 index 0000000..f1357a9 --- /dev/null +++ b/test/pash_tests/no_in_script.sh @@ -0,0 +1,2 @@ +N=100 +seq 1 $N | sort -rn diff --git a/test/pash_tests/p1.sh b/test/pash_tests/p1.sh new file mode 100644 index 0000000..73daeb3 --- /dev/null +++ b/test/pash_tests/p1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +PROXY=$([ "$(hostname)" == "deathstar" ] && echo "gamma.ndr.md" || echo "localhost") +WIKI="$HOME/wikipedia/" +export WIKI +# Squash all HTML for each URL into a single line, streaming fashion +# It also prefixes with the URL + +page_per_line () { + cat "$WIKI/$0" | tr -d "\n\r" | tr -d '\n' | sed -e '/.$/a\' +} + +export -f page_per_line + +# xargs: +# add `-t` for debugging +cat $WIKI/index_h_100.txt | xargs -0 -d '\n' -n 1 bash -c 'page_per_line "$@"' diff --git a/test/pash_tests/p2.sh b/test/pash_tests/p2.sh new file mode 100644 index 0000000..3075b98 --- /dev/null +++ b/test/pash_tests/p2.sh @@ -0,0 +1,8 @@ + sed "s#^#$HOME/wikipedia/#" | + xargs cat | + iconv -c -t ascii//TRANSLIT | + pandoc +RTS -K64m -RTS --from html --to plain --quiet | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + grep -vwFf ../evaluation/scripts/web-index/stopwords.txt | + ../evaluation/scripts/web-index/stem-words.js diff --git a/test/pash_tests/pa.sh b/test/pash_tests/pa.sh new file mode 100755 index 0000000..d031d73 --- /dev/null +++ b/test/pash_tests/pa.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +export PASH_TOP=${PASH_TOP:-${BASH_SOURCE%/*}} +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" +# point to the local downloaded folders +export PYTHONPATH=${PASH_TOP}/python_pkgs/ +## Register the signal handlers, we can add more signals here +trap kill_all SIGTERM SIGINT + +## kill all the pending processes that are spawned by this shell +kill_all() { + # kill all my subprocesses only + kill -s SIGKILL 0 + # kill pash_daemon + kill -s SIGKILL "$daemon_pid" +} +## Save the umask to first create some files and then revert it +old_umask=$(umask) + +## Restore the umask to create files etc +umask u=rwx,g=rx,o=rx + +if [ "$#" -eq 1 ] && [ "$1" = "--init" ]; then + "$PASH_TOP"/compiler/superoptimize.sh + exit +fi + +if ! command -v python3 &> /dev/null +then + echo "Python >=3 could not be found" + exit +fi + +## Create a temporary directory where PaSh can use for temporary files and logs +export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/" + +## Create a timestamp that PaSh can use for log directories +## (should not be used to create critical directories/files, only logs/monitors/etc, +## all the cricial pash temp files should go in PASH_TMP_PREFIX) +export PASH_TIMESTAMP="$(date +"%y-%m-%d-%T")" + +## Create the input and output fifo that the runtime will use for communication +export RUNTIME_IN_FIFO="${PASH_TMP_PREFIX}/runtime_in_fifo" +export RUNTIME_OUT_FIFO="${PASH_TMP_PREFIX}/runtime_out_fifo" +## TODO: Get rid of these two commands if possible +rm -f "$RUNTIME_IN_FIFO" "$RUNTIME_OUT_FIFO" +mkfifo "$RUNTIME_IN_FIFO" "$RUNTIME_OUT_FIFO" +export DAEMON_SOCKET="${PASH_TMP_PREFIX}/daemon_socket" +export DSPASH_SOCKET="${PASH_TMP_PREFIX}/dspash_socket" + +## Initialize all things necessary for pash to execute (logging/functions/etc) +source "$PASH_TOP/compiler/pash_init_setup.sh" "$@" + +if [ "$pash_daemon" -eq 1 ] && [ "$show_version" -eq 0 ]; then + ## TODO: If possible, move the daemon start as easly as possible to reduce waiting + python3 -S "$PASH_TOP/compiler/pash_runtime_daemon.py" "$@" & + daemon_pid=$! + ## Wait until daemon has established connection + ## + ## TODO: Can we get rid of the `sleep` in this wait? + pash_wait_until_daemon_listening +fi + +## Restore the umask before executing +umask "$old_umask" +PASH_FROM_SH="pa.sh" python3 -S "$PASH_TOP/compiler/pash.py" "$@" +pash_exit_code=$? +if [ "$pash_daemon" -eq 1 ] && [ "$show_version" -eq 0 ]; then + ## Only wait for daemon if it lives (it might be dead, rip) + if ps -p "$daemon_pid" > /dev/null + then + ## Send and receive from daemon + msg="Done" + daemon_response=$(pash_communicate_daemon "$msg") + if [ "$distributed_exec" -eq 1 ]; then + # kill $worker_manager_pid + manager_response=$(pash_communicate_worker_manager "$msg") + fi + wait 2> /dev/null 1>&2 + fi +fi + + + +## Don't delete the temporary directory if we are debugging +if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then + rm -rf "${PASH_TMP_PREFIX}" +fi + +(exit "$pash_exit_code") diff --git a/test/pash_tests/pacaur.sh b/test/pash_tests/pacaur.sh new file mode 100755 index 0000000..5cde8dd --- /dev/null +++ b/test/pash_tests/pacaur.sh @@ -0,0 +1,40 @@ +#!/bin/bash +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/packages} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/packages} +LOGS=${OUT}/logs +mkdir -p ${OUT} ${LOGS} + +info() { echo -e "\e[1m--> $@\e[0m"; } +mkcd() { mkdir -p "$1" && cd "$1"; } + +# check if not running as root +# test "$UID" -gt 0 || { info "don't run this as root!"; exit; } + +# set link to plaintext PKGBUILDs +pkgbuild="https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h" + +run_tests() { + pgk=$1 + info "create subdirectory for $pkg" + mkcd "${OUT}/$pkg" + + info "fetch PKGBUILD for $pkg" + curl --insecure -o PKGBUILD "$pkgbuild=$pkg" 2> /dev/null|| echo ' ' + + #info "fetch required pgp keys from PKGBUILD" + #gpg --recv-keys $(sed -n "s:^validpgpkeys=('\([0-9A-Fa-fx]\+\)').*$:\1:p" PKGBUILD) + info "make and install ..." + timeout 100 makedeb-makepkg --format-makedeb -d 2>/dev/null|| echo 'failed' + cd - +} + +export -f run_tests +pkg_count=0 +# loop over required packages +for pkg in $(cat ${IN} | tr '\n' ' ' ); +do + pkg_count=$((pkg_count + 1)) + run_tests $pkg > "${LOGS}"/"$pkg_count.log" +done + +echo 'done'; diff --git a/test/pash_tests/page-count.sh b/test/pash_tests/page-count.sh new file mode 100755 index 0000000..b4a3326 --- /dev/null +++ b/test/pash_tests/page-count.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# A bash script for determining how many pages are in a folder of OpenOffice documents +# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 + +# Require: libimage-exiftool-perl, bc +# Data: +# http://ndr.md/data/dummy/large.pdf +# More data: +# https://arxiv.org/help/bulk_data + +IN=./input/large.pdf +OUT=./output/out.txt + +echo "$(exiftool $IN | + grep Page-count | + cut -d ":" -f2 | + tr '\n' '+')""0" | + bc | + sed 's/^/\n/' > $OUT diff --git a/test/pash_tests/page-per-line.sh b/test/pash_tests/page-per-line.sh new file mode 100755 index 0000000..4a0c10b --- /dev/null +++ b/test/pash_tests/page-per-line.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# Squash all HTML for each URL into a single line, streaming fashion +# It also prefixes with the URL + +page_per_line () { + curl -s "$1" | tr -d "\n\r" | tr -d '\n' | sed "s/^/$0 /" | sed -e '/.$/a\' +} + +export -f page_per_line + +# xargs: +# add `-t` for debugging +cat ./urls.txt | xargs -0 -d '\n' -n 1 bash -c 'page_per_line "$@"' _ diff --git a/test/pash_tests/parse.sh b/test/pash_tests/parse.sh new file mode 100755 index 0000000..48ab720 --- /dev/null +++ b/test/pash_tests/parse.sh @@ -0,0 +1,17 @@ +bash ./get_results.sh > out +mv out log_results +cat log_results/out +while read p; do + PASSED=$(echo $p | awk -F'[^0-9]+' '{ print $2 }') + TOTAL=$(echo $p | awk -F'[^0-9]+' '{ print $3 }') + FAILED=$((passed - failed)) + # failed, print to stdout + if [ $PASSED -ne $TOTAL ]; then + # get the benchmark name + f=${p%% *} + # strip the : + f="${f%?}" + # dump the failed tests + cat log_results/${f}_failed.log + fi +done < log_results/out diff --git a/test/pash_tests/pash_declare_vars.sh b/test/pash_tests/pash_declare_vars.sh new file mode 100644 index 0000000..9b1290f --- /dev/null +++ b/test/pash_tests/pash_declare_vars.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +vars_file="${1?File not given}" + +pash_redir_output echo "Writing vars to: $vars_file" + +declare -p > "$vars_file" +## KK 2021-11-23 We don't actually need to export functions in the vars file. +## We never expand them in the compiler +## declare -f >> "$vars_file" diff --git a/test/pash_tests/pash_ptempfile_name.sh b/test/pash_tests/pash_ptempfile_name.sh new file mode 100755 index 0000000..ff815aa --- /dev/null +++ b/test/pash_tests/pash_ptempfile_name.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +distro=${1??Distro not given} +# echo "$PASH_TMP_PREFIX/pash_$RANDOM$RANDOM$RANDOM" +mktemp -u "$PASH_TMP_PREFIX/pash_XXXXXXXXXX" diff --git a/test/pash_tests/pash_runtime_complete_execution.sh b/test/pash_tests/pash_runtime_complete_execution.sh new file mode 100644 index 0000000..10cbad1 --- /dev/null +++ b/test/pash_tests/pash_runtime_complete_execution.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +## +## Completes execution by measuring and logging execution times and restoring state +## + +## +## (6) +## + +pash_exec_time_end=$(date +"%s%N") + +## TODO: Maybe remove the temp file after execution + +## We want the execution time in milliseconds +if [ "$pash_output_time_flag" -eq 1 ]; then + pash_exec_time_ms=$(echo "scale = 3; ($pash_exec_time_end-$pash_exec_time_start)/1000000" | bc) + pash_redir_output echo "Execution time: $pash_exec_time_ms ms" +fi + +## Source back the output variables of the compiled script. +## In all cases we should have executed a script +pash_redir_output echo "$$: (7) Recovering BaSh variables from: $pash_output_var_file" +source "$RUNTIME_DIR/pash_source_declare_vars.sh" "$pash_output_var_file" + +## Save the previous `set` state to a variable +pash_redir_output echo "$$: (7) Reading current BaSh set state from: ${pash_output_set_file}" + +pash_redir_output echo "$$: (7) Current BaSh set state: $(cat "$pash_output_set_file")" +## WARNING: This has to happen after sourcing the variables so that it overwrites it +pash_previous_set_status=$(cat "$pash_output_set_file") + +export pash_input_args +pash_redir_output echo "$$: (7) Arguments (might) have been updated to be: $pash_input_args" + +## Propagate the `set` state after running the script to the outer script +## TODO: Maybe move this to the end to avoid spurious failures +pash_redir_output echo "$$: (7) Current PaSh set state: $-" +source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$(cat "$pash_output_set_file")" +pash_redir_output echo "$$: (7) Reverted to BaSh set state before exiting: $-" + +pash_redir_output echo "$$: (7) Reverting last BaSh exit code: $pash_runtime_final_status" +(exit "$pash_runtime_final_status") diff --git a/test/pash_tests/pash_runtime_shell_to_pash.sh b/test/pash_tests/pash_runtime_shell_to_pash.sh new file mode 100644 index 0000000..7780a9c --- /dev/null +++ b/test/pash_tests/pash_runtime_shell_to_pash.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +## +## This currently performs (5), i.e., reverting bash state to get back to pash mode. +## + +## TODO: Use that for (1) too + +output_vars_file=${1?Output var file not given} +output_set_file=${2?Output set file not given} + +pash_exec_status=${internal_exec_status} +pash_redir_output echo "$$: (5) BaSh script exited with ec: $pash_exec_status" + +## Save the current set options to a file so that they can be recovered +pash_final_set_vars=$- +pash_redir_output echo "$$: (5) Writing current BaSh set state to: $output_set_file" +pash_redir_output echo "$$: (5) Current BaSh shell: $-" +echo "$pash_final_set_vars" > "$output_set_file" + +## Revert to the old set state to avoid spurious fails +source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$pash_current_set_state" +pash_redir_output echo "$$: (5) Reverted to PaSh set state to: $-" + + +## Save the current variables +source "$RUNTIME_DIR/pash_declare_vars.sh" "$output_vars_file" +# pash_redir_output echo "$$: (5) Exiting from BaSh with BaSh status: $pash_exec_status" +# (exit "$pash_exec_status") diff --git a/test/pash_tests/pcap.sh b/test/pash_tests/pcap.sh new file mode 100755 index 0000000..cc855c6 --- /dev/null +++ b/test/pash_tests/pcap.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#tag: pcap analysis +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/pcap_data} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/pcap-analysis} +LOGS=${OUT}/logs +mkdir -p ${LOGS} +run_tests() { + INPUT=$1 + /usr/sbin/tcpdump -nn -r ${INPUT} -A 'port 53' 2> /dev/null | sort | uniq |grep -Ev '(com|net|org|gov|mil|arpa)' 2> /dev/null + # extract URL + /usr/sbin/tcpdump -nn -r ${INPUT} -s 0 -v -n -l 2> /dev/null | egrep -i "POST /|GET /|Host:" 2> /dev/null + # extract passwords + /usr/sbin/tcpdump -nn -r ${INPUT} -s 0 -A -n -l 2> /dev/null | egrep -i "POST /|pwd=|passwd=|password=|Host:" 2> /dev/null +} +export -f run_tests + +pkg_count=0 + +for item in ${IN}/*; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > ${LOGS}/${pkg_count}.log +done + +echo 'done'; diff --git a/test/pash_tests/pcap_bench.sh b/test/pash_tests/pcap_bench.sh new file mode 100755 index 0000000..a99790e --- /dev/null +++ b/test/pash_tests/pcap_bench.sh @@ -0,0 +1,8 @@ +INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} +INPUT2=${INPUT2:-$PASH_TOP/evaluation/scripts/input/2018-07-20-17-31-20-192.168.100.108.pcap} +tcpdump -nn -r ${INPUT} -A 'port 53'| sort | uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null +tcpdump -nn -r ${INPUT} -A 'port 53'| sort |uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null +# without the pipes, bash takes 11 sec, with pipes, it takes 12 sec, same performance +# with pash +time tcpdump -nn -r ${INPUT2} -A -c 1000000 > /dev/null +time tcpdump -nn -r ${INPUT2} -A -c 1000000 | sort |uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null diff --git a/test/pash_tests/pkg.sh b/test/pash_tests/pkg.sh new file mode 100755 index 0000000..19b9028 --- /dev/null +++ b/test/pash_tests/pkg.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Package several versions of PaSh: +# * a shallow-clone version for a quick-install from `up` +# * a deep-clone version for other environments TODO +# * a docker image running on ubuntu 18.04 TODO + +set -ex + +echo $(pwd) +REV=0 + +REV=$(git rev-parse --short HEAD) +cd ../../ + +# # Shallow clone --- might not be ideal for development +# git clone --depth 1 git@github.com:andromeda/pash.git +# mv pash pash-shallow +# tar -cvzf pash-shallow.tar.gz pash-shallow/ > /dev/null +# # uncomment the following line to keep all versions +# # mv pash.tar.gz get/pash-${REV}.tar.gz +# # ln -sf ./pash-${REV}.tar.gz get/latest +# mv pash-shallow.tar.gz get/ +# ln -sf ./pash-shallow.tar.gz get/latest +# rm -rf pash-shallow + +cd pash +git pull +cd .. +tar -cvzf pash.tar.gz ./pash > /dev/null +mv pash.tar.gz get/ +ln -sf ./pash.tar.gz get/latest + +# in the future, we might want to have versions +# ln -s pash.tar.gz latest + + +# TODO: for a clear release, remove all versioning artifacts +# cp -r pash release +# cd release +# rm -rf .gitignore .gitsubmodules .git +# cd .. + diff --git a/test/pash_tests/pretty_print_json.sh b/test/pash_tests/pretty_print_json.sh new file mode 100755 index 0000000..d9da823 --- /dev/null +++ b/test/pash_tests/pretty_print_json.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +sed 's//}/g' | \ + sed 's/(/[/g' | \ + sed 's/)/]/g' | \ + python -m json.tool diff --git a/test/pash_tests/proginf.sh b/test/pash_tests/proginf.sh new file mode 100755 index 0000000..3c2a80d --- /dev/null +++ b/test/pash_tests/proginf.sh @@ -0,0 +1,18 @@ +#!/bin/bash +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/node_modules} +MIR_BIN=${MIR_BIN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/mir-sa/.bin/mir-sa} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/mir} +mkdir -p ${OUT}/ +pkg_count=0 +run_tests() { + cd $1; + ${MIR_BIN} -p 2>>${OUT}/error.log +} +export -f run_tests +for item in ${IN}/*; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > ${OUT}/$pkg_count.log +done + +echo 'done'; diff --git a/test/pash_tests/r-bell_grep.sh b/test/pash_tests/r-bell_grep.sh new file mode 100755 index 0000000..fe5f641 --- /dev/null +++ b/test/pash_tests/r-bell_grep.sh @@ -0,0 +1,50 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/100M.txt" +batchSize=10000000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +# mkfifo $file7 +# mkfifo $file8 +# mkfifo $file9 + + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file1 > $file3 & +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file2 > $file4 & +# ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +$PASH_TOP/runtime/r_merge $file3 $file4 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out diff --git a/test/pash_tests/r-minimal_grep.sh b/test/pash_tests/r-minimal_grep.sh new file mode 100755 index 0000000..2dbc19b --- /dev/null +++ b/test/pash_tests/r-minimal_grep.sh @@ -0,0 +1,46 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/10M.txt" +batchSize=1000000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/r_wrap tr A-Z a-z < $file1 > $file3 & +$PASH_TOP/runtime/r_wrap tr A-Z a-z < $file2 > $file4 & + +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file3 > $file5 & +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file4 > $file6 & + +$PASH_TOP/runtime/r_merge $file5 $file6 +# cat $testFile | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > t2.out +# if cmp -s t1.out t2.out; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/r-shortest-scripts.sh b/test/pash_tests/r-shortest-scripts.sh new file mode 100644 index 0000000..2da7a68 --- /dev/null +++ b/test/pash_tests/r-shortest-scripts.sh @@ -0,0 +1,110 @@ +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file32" +rm -f "#file31" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" +mkfifo "#file2" +mkfifo "#file4" +mkfifo "#file6" +mkfifo "#file8" +mkfifo "#file10" +mkfifo "#file12" +mkfifo "#file14" +mkfifo "#file17" +mkfifo "#file18" +mkfifo "#file19" +mkfifo "#file20" +mkfifo "#file21" +mkfifo "#file22" +mkfifo "#file23" +mkfifo "#file24" +mkfifo "#file25" +mkfifo "#file26" +mkfifo "#file27" +mkfifo "#file28" +mkfifo "#file29" +mkfifo "#file30" +mkfifo "#file32" +mkfifo "#file31" +mkfifo "#file33" +mkfifo "#file34" +mkfifo "#file35" +mkfifo "#file36" +mkfifo "#file37" +mkfifo "#file38" +{ cat /home/tamlu/pash/evaluation/scripts/input/all_cmds_x100.txt >"#file2" & } +{ /home/tamlu/pash/runtime/r_split "#file2" 100000 "#file17" "#file18" & } +{ /home/tamlu/pash/runtime/r_wrap xargs file <"#file17" >"#file20" & } +{ /home/tamlu/pash/runtime/r_wrap xargs file <"#file18" >"#file21" & } +{ /home/tamlu/pash/runtime/r_wrap grep "shell script" <"#file20" >"#file22" & } +{ /home/tamlu/pash/runtime/r_wrap grep "shell script" <"#file21" >"#file23" & } +{ /home/tamlu/pash/runtime/r_wrap cut -d: -f1 <"#file22" >"#file24" & } +{ /home/tamlu/pash/runtime/r_wrap cut -d: -f1 <"#file23" >"#file25" & } +{ /home/tamlu/pash/runtime/r_wrap xargs -L 1 wc -l <"#file24" >"#file26" & } +{ /home/tamlu/pash/runtime/r_wrap xargs -L 1 wc -l <"#file25" >"#file27" & } +{ /home/tamlu/pash/runtime/r_wrap grep -v "^0$" <"#file26" >"#file28" & } +{ /home/tamlu/pash/runtime/r_wrap grep -v "^0$" <"#file27" >"#file29" & } +{ /home/tamlu/pash/runtime/r_unwrap <"#file28" >"#file32" & } +{ sort -n <"#file35" >"#file30" & } +{ /home/tamlu/pash/runtime/r_unwrap <"#file29" >"#file33" & } +{ sort -n <"#file36" >"#file31" & } +{ sort -n -m "#file37" "#file38" >"#file14" & } +{ /home/tamlu/pash/runtime/eager.sh "#file32" "#file35" "/tmp/pash_eager_intermediate_#file1" & } +{ /home/tamlu/pash/runtime/eager.sh "#file33" "#file36" "/tmp/pash_eager_intermediate_#file2" & } +{ /home/tamlu/pash/runtime/eager.sh "#file30" "#file37" "/tmp/pash_eager_intermediate_#file3" & } +{ /home/tamlu/pash/runtime/eager.sh "#file31" "#file38" "/tmp/pash_eager_intermediate_#file4" & } +{ head -15 <"#file14" & } +source /home/tamlu/pash/runtime/wait_for_output_and_sigpipe_rest.sh ${!} +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file32" +rm -f "#file31" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" \ No newline at end of file diff --git a/test/pash_tests/r-sort.sh b/test/pash_tests/r-sort.sh new file mode 100755 index 0000000..1978189 --- /dev/null +++ b/test/pash_tests/r-sort.sh @@ -0,0 +1,53 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +testFile=../../evaluation/scripts/input/100M.txt +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/r_unwrap < $file1 > $file3 & +$PASH_TOP/runtime/r_unwrap < $file2 > $file4 & + +$PASH_TOP/runtime/eager.sh $file3 $file5 "/tmp/pash_eager_intermediate_#file1" & +$PASH_TOP/runtime/eager.sh $file4 $file6 "/tmp/pash_eager_intermediate_#file2" & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/r-wc.sh b/test/pash_tests/r-wc.sh new file mode 100755 index 0000000..aaf1422 --- /dev/null +++ b/test/pash_tests/r-wc.sh @@ -0,0 +1,44 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +testFile=$PASH_TOP/evaluation/scripts/input/1G.txt +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file3 $file4 & + + +# $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & +# $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & + +wc $file3 > $file5 & +wc $file4 > $file6 & + +./merge-wc.sh $file5 $file6 + + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/raw-r-sort.sh b/test/pash_tests/raw-r-sort.sh new file mode 100755 index 0000000..b9a7920 --- /dev/null +++ b/test/pash_tests/raw-r-sort.sh @@ -0,0 +1,49 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/eager.sh $file1 $file5 "/tmp/pash_eager_intermediate_#file1" & +$PASH_TOP/runtime/eager.sh $file2 $file6 "/tmp/pash_eager_intermediate_#file2" & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/readonly.sh b/test/pash_tests/readonly.sh new file mode 100644 index 0000000..8a3d950 --- /dev/null +++ b/test/pash_tests/readonly.sh @@ -0,0 +1,8 @@ +var1=value1 +readonly var1 var2=value2 +var1=foo +var2=foo +echo $var1 $var2 +unset var1 +unset var2 +echo $var1 $var2 diff --git a/test/pash_tests/redir-var-test.sh b/test/pash_tests/redir-var-test.sh new file mode 100644 index 0000000..e82ffd7 --- /dev/null +++ b/test/pash_tests/redir-var-test.sh @@ -0,0 +1,10 @@ +#!/bin/sh +func_emit_tests_Makefile_am () +{ + ofd=3 + { + echo hi + } >&$ofd +} +fd=1 +echo hi >&$fd diff --git a/test/pash_tests/redirect.sh b/test/pash_tests/redirect.sh new file mode 100755 index 0000000..0fa3da7 --- /dev/null +++ b/test/pash_tests/redirect.sh @@ -0,0 +1,2 @@ +echo hello 1>&9 +# ls -laL /dev/fd \ No newline at end of file diff --git a/test/pash_tests/redirect_stdin_to.sh b/test/pash_tests/redirect_stdin_to.sh new file mode 100755 index 0000000..f43c9a6 --- /dev/null +++ b/test/pash_tests/redirect_stdin_to.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +## TODO: Is this a hack? +{ cat > "${1?No file to redirect to}" <&3 3<&- & } 3<&0 diff --git a/test/pash_tests/redirect_wrapper.sh b/test/pash_tests/redirect_wrapper.sh new file mode 100644 index 0000000..003b9df --- /dev/null +++ b/test/pash_tests/redirect_wrapper.sh @@ -0,0 +1 @@ +exec $1 redirect.sh 9>&1 \ No newline at end of file diff --git a/test/pash_tests/remote_read.sh b/test/pash_tests/remote_read.sh new file mode 100755 index 0000000..bc4577c --- /dev/null +++ b/test/pash_tests/remote_read.sh @@ -0,0 +1 @@ +"$PASH_TOP/runtime/dspash/file_reader/datastream_client" --type read "$@" diff --git a/test/pash_tests/remote_write.sh b/test/pash_tests/remote_write.sh new file mode 100755 index 0000000..3c5e724 --- /dev/null +++ b/test/pash_tests/remote_write.sh @@ -0,0 +1 @@ +"$PASH_TOP"/runtime/dspash/file_reader/datastream_client --type write "$@" diff --git a/test/pash_tests/remove_adapter.sh b/test/pash_tests/remove_adapter.sh new file mode 100644 index 0000000..006de5d --- /dev/null +++ b/test/pash_tests/remove_adapter.sh @@ -0,0 +1,3 @@ +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +# remove adapter +find ${INPUT} -name "*.fastq" | sort | uniq | xargs -I {} cutadapt -a AGATCGGAAGAGCACAC {} > /dev/null diff --git a/test/pash_tests/round_trip.sh b/test/pash_tests/round_trip.sh new file mode 100755 index 0000000..1aa1648 --- /dev/null +++ b/test/pash_tests/round_trip.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +if [ $# -ne 2 ]; then + echo "Usage: ${0##*/} program target" + exit 2 +fi + +p=$1 +tgt=$2 + +orig=$(${p} ${tgt} 2>&1) +if [ "$?" -ne 0 ]; +then echo "${tgt} FAILED, couldn't run (output: ${orig})"; exit 2 +fi + +rt=$(${p} ${tgt} | ${p} 2>&1) +if [ "$?" -ne 0 ]; +then echo "${tgt} FAILED round trip, couldn't run (output: $rt)"; exit 3 +fi + +if [ "${orig}" = "${rt}" ]; +then echo ${tgt} OK; exit 0 +else + echo ${tgt} FAILED + echo ${orig} + echo ========== + echo ${rt} + exit 1 +fi diff --git a/test/pash_tests/run_all_benchmarks.sh b/test/pash_tests/run_all_benchmarks.sh new file mode 100755 index 0000000..ef43816 --- /dev/null +++ b/test/pash_tests/run_all_benchmarks.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +## Determines whether the experimental pash flags will be tested. +## By default they are not. +export EXPERIMENTAL=0 +export DEBUG=0 + +for item in $@ +do + if [ "--experimental" == "$item" ]; then + export EXPERIMENTAL=1 + fi + + if [ "--debug" == "$item" ]; then + export DEBUG=1 + fi +done + +## This script is necessary to ensure that sourcing happens with bash +source run.seq.sh +source run.par.sh + +compare_outputs(){ + dir=$1 + outputs=$(ls $dir | grep "seq" | sed 's/.seq.out$//') + for out in $outputs; + do + seq_output="${dir}/${out}.seq.out" + pash_output="${dir}/${out}.par.out" + diff -q "$seq_output" "$pash_output" + done +} + +if [ "$EXPERIMENTAL" -eq 1 ]; then + export PASH_FLAGS="--r_split --dgsh_tee --r_split_batch_size 1000000" + # --speculation quick_abort is not maintained at the moment +else + export PASH_FLAGS="" +fi + +## Add the debug flag +if [ "$DEBUG" -eq 1 ]; then + export PASH_FLAGS="$PASH_FLAGS -d 1" +fi + + +oneliners +oneliners_pash + +compare_outputs "oneliners/outputs" + +unix50 +unix50_pash + +compare_outputs "unix50/outputs" + +nlp +nlp_pash + +compare_outputs "nlp/outputs" + +web-index +web-index_pash + +compare_outputs "web-index/outputs" + +analytics-mts +analytics-mts_pash + +compare_outputs "analytics-mts/outputs" diff --git a/test/pash_tests/run_evaluation.sh b/test/pash_tests/run_evaluation.sh new file mode 100644 index 0000000..bb9ead9 --- /dev/null +++ b/test/pash_tests/run_evaluation.sh @@ -0,0 +1,243 @@ +#!/bin/bash + +## TODO: Set up $PASH_TOP in the beginning or run the install script. + + +echo "This script runs the whole EuroSys 2021 PaSh evaluation" + +echo "" +echo "Section 6.1: Common Unix One-liners" + +## TODO: Also save aggregates (avg, etc) in a file + +## Note that input files that are used as inputs for this script are generated +## using the `gen*` scripts in `evaluation/scripts/input/`. +## ``` +## cd $PASH_TOP/evaluation/scripts/input/ +## ./gen.sh +## ./gen.sh # Warning: This requires more than 100GB of space. +## ``` +## +## If you just want to run the scripts with small inputs (the main conclusions still hold) +## you only need to run `./gen.sh`. +## +## The one-liner scripts are included in `evaluation/microbenchmarks` +## The list of scripts (and their correspondence to the names in the paper) are seen below: +## - minimal_grep.sh # EuroSys: nfa-regex +## - minimal_sort.sh # EuroSys: sort +## - topn.sh # EuroSys: top-n +## - wf.sh # EuroSys: wf +## - spell.sh # EuroSys: spell +## - diff.sh # EuroSys: difference +## - bigrams.sh # EuroSys: bi-grams +## - set-diff.sh # EuroSys: set-difference +## - double_sort.sh # EuroSys: sort-sort +## - shortest_scripts.sh # EuroSys: shortest-scripts +## +## The inputs that we are going to run them on are defined in +## - *_env_small.sh (for the small input) +## - *_env.sh (for the large EuroSys eval input, usually 10x larger than the small) +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The script that runs PaSh on these programs is: `evaluation/eurosys/execute_eurosys_one_liners.sh` +## There are three modes of execution (can be seen by calling the script with the -h flag): +## 1. Small inputs | --width 2, 16 | Only full PaSh config +## 2. Small inputs | --width 2, 16 | All PaSh configs +## 3. Big inputs | -- width 2, 4, 8, 16, 32, 64 | All PaSh configs +## +## The script `evaluation/eurosys/execute_eurosys_one_liners.sh` is based on +## `evaluation/execute_compile_evaluation_script.sh` that correctly sets up PaSh for the different configurations. +## +## If you just want to check that PaSh achieves speedups as presented in the paper +## you can just run 1 with option `-s`. +## +## If you are interested in seeing the improvements by PaSh's runtime primitives +## (all lines in Figure 9), you can run 2 with option `-m`. +## This should take a couple hours and should validate the trends between different PaSh +## configurations as shown in Figure 9. +## +## If you want to reproduce the complete results from Figure 9, you need to run 3 with option `-l`. +## Note that this should take more than a day to execute. +## Also this requires several hundred GBs of free space (due to intermediate inputs, outputs, and buffering). +## +## To plot the results from any of the above experiments, do the following: +## ``` +## cd $PASH_TOP/compiler +## python3 gather_results.py +## ``` +## +## This will create plots for all invocations of +## `evaluation/eurosys/execute_eurosys_one_liners.sh`, one for each flag. +## +## The plots are: +## - for `-s`: evaluation/plots/small_tiling_throughput_scaleup.pdf +## - for `-m`: evaluation/plots/medium_tiling_throughput_scaleup.pdf +## - for `-l`: evaluation/plots/tiling_throughput_scaleup.pdf +## +## Note that `-m` supersedes `-s` but `-l` does not supersede any of the two. +## +## Also note that if you run a script partially, it might end up saving partial results, +## therefore having 0 speedups in some points of the plots. + +echo "" +echo "Section 6.2: Unix50 from Bell Labs" + +## TODO: Also save aggregates (avg, etc) in a file + +## All of the Unix50 pipelines are in `evaluation/unix50/unix50.sh`. +## The inputs of the pipelines are in `evaluation/unix50/`. +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The script that runs PaSh on these programs is: `evaluation/eurosys/execute_unix_benchmarks.sh` +## There are two modes of execution (can be seen by calling the script with the -h flag): +## 1. Small inputs (1GB) | --width 4 +## 2. Big inputs (10GB) | --width 16 (EuroSys evaluation) +## +## The first one, called with `-s`, uses pash on the unix50 scripts with 1GB input and width 4 +## and should be done in less than an hour. +## The trend shown in the paper (Fig 10) should be visible in the results from this script. +## +## If you are interested in running the complete evaluation to reproduce Figure 10, +## you need to run the script with `-l`. This should take several hours. +## +## To plot the results from any of the above experiments, do the following: +## ``` +## cd $PASH_TOP/compiler +## python3 gather_results.py +## ``` +## +## This will create plots for both "1GB --width 4" and for "10GB --width 16". +## +## The plots are in: +## - for `-s`: evaluation/plots/unix50_1GB_individual_speedups_4.pdf +## - for `-l`: evaluation/plots/unix50_10GB_individual_speedups_16.pdf +## +## Note that the pipelines in the plot are sorted with respect to speedup, and not by their ID. +## So the first pipeline does not necessarily correspond to the first pipeline in `evaluation/unix50`. +## +## There are two small differences of these plots compared to Figure 10. +## These differences are due to the evolution of PaSh and the refinement of its annotations. +## - First, the first pipeline has higher speedup that 4 and 16 in both cases. This is because +## this pipeline is not very CPU intensive and contains an initial `cat`. PaSh has evolved +## to perform an optimization that removes `cat` occurences that only contain a single file, +## and therefore removes it, improving performance significantly. +## - Second, the slowdown in the last 3 scripts is more significant than the one reported in the paper. +## This is because these scripts contain `tr -d '\n'`, the annotation for which was refined recently due to additional testing. +## The initial annotation for `tr` considered this invocation of `tr` to be stateless while it isn't, +## since it removes all lines and therefore cannot be parallelized based on lines. The refinement in the annotation +## leads to additional splits to be added after `tr -d '\n'` (since it is non parallelizable pure). +## The issue with these splits is that they do not manage to split the file (since there is only one line) +## leaving the rest of the script to run sequentially. +## + + +echo "" +echo "Section 6.3: Use Case: NOAA Weather Analysis" + +## Note that input files that are needed by this script +## are `curl`ed from a server in the local network and therefore +## cannot be accessed from elsewhere. +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The program that we run, described in Section 6.3, can be seen in `evaluation/scripts/max-temp-complete.sh`. +## It takes as input a sequence of lines each containing a year (e.g. using `seq 2000 2004`). +## +## To run the script with a single year of input use: +## `./execute_max_temp_dish_evaluation.sh -s` +## +## These should take less than 10 minutes. +## +## It runs the script on: +## - bash +## - pa.sh --width 16 +## +## The results are saved in: +## - `evaluation/results/max-temp-complete-2000-2000-seq.time` +## - `evaluation/results/max-temp-complete-2000-2000-16-pash.time` +## +## If you want to run the program with 5 years of input (as is done in Section 6.3) +## you need to use the following: +## `./execute_max_temp_dish_evaluation.sh -l` +## +## It should take less than an hour. +## It also runs the script with bash and pash --width 16. +## +## The results are saved in: +## - `evaluation/results/max-temp-complete-2000-2004-seq.time` +## - `evaluation/results/max-temp-complete-2000-2004-16-pash.time` +## +## If you want to separate the preprocessing and processing (as done in Section 6.3) +## you need to add the `-e` flag to either 1 or 5 year execution, e.g.: +## `./execute_max_temp_dish_evaluation.sh -l -e` +## +## This runs: +## - `evaluation/scripts/max-temp-preprocess.sh` +## - `evaluation/scripts/max-temp-process.sh` +## +## with bash, and pash --width 16. It saves results in: +## - `evaluation/results/max-temp-preprocess-2000-2000-seq.time` +## - `evaluation/results/max-temp-preprocess-2000-2000-16-pash.time` +## - `evaluation/results/max-temp-process-2000-2000-seq.time` +## - `evaluation/results/max-temp-process-2000-2000-16-pash.time` +## +## and similarly for the large inputs (2000-2004). +## +## Note that PaSh's speedup for the complete script 2000-2004 with width 16 +## is actually higher than what is reported in the paper since it doesn't +## have to write the intermediate files (between preprocessing and processing) to disk. +## + +echo "" +echo "Section 6.4: Use Case: Wikipedia Web Indexing" + +## Note that input files that are needed by this script (complete Wikipedia) +## are saved locally on the server and therefore this program cannot be run from elsewhere. +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The program that we run, described in Section 6.4, can be seen in `evaluation/scripts/web-index.sh`. +## It requires having set the `$IN`, `$WIKI`, and `$WEB_INDEX_DIR` variables. +## +## To run the script for a 1000 wikipedia links use: +## `./execute_web_index_dish_evaluation.sh -s` +## +## This sets up the required variables and should take less than 5 minutes. +## It runs the script with bash, pash --width 2, pash --width 16. +## +## The results are saved in: +## - `evaluation/results/web-index-1000-seq.time` +## - `evaluation/results/web-index-1000-2-pash.time` +## - `evaluation/results/web-index-1000-16-pash.time` +## +## If you want to run with the EuroSys evaluation inputs (100k links), use: +## `./execute_web_index_dish_evaluation.sh -l` +## +## This should take a couple hours and the results are saved in: +## - `evaluation/results/web-index-100000-seq.time` +## - `evaluation/results/web-index-100000-2-pash.time` +## - `evaluation/results/web-index-100000-16-pash.time` + +echo "" +echo "Section 6.5: Further Micro-benchmarks" + +## To run the comparison with sort --parallel, just use `evaluation/eurosys/execute_baseline_sort.sh` +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## There are two modes of execution: +## 1. option: -s Small input | --width 2, 16 +## 2. option: -l Big input | -- width 2, 4, 8, 16, 32, 64 +## +## Note that this script executes sort --parallel with double the value of --width +## since we noticed that it grows slightly slower (as shown in the Figure in Section 6.5). + + +## TODO(@nikos): Run and explain the GNU Parallel diff --git a/test/pash_tests/run_grader.sh b/test/pash_tests/run_grader.sh new file mode 100755 index 0000000..10017b9 --- /dev/null +++ b/test/pash_tests/run_grader.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/grading" ]; then + echo "Couldn't find grading directory (looked in $1/grading)" + exit 2 +fi + +cd $1/grading + +errors="" +for s in `ls`; do + echo "GRADING $s" + (cd $s; make) + if [ "$?" != "0" ]; then + errors+=" $s" + fi +done + +echo +echo "There were errors for the following students:${errors}" +echo ${errors} >"$1/grading/errors.log" diff --git a/test/pash_tests/run_lda.sh b/test/pash_tests/run_lda.sh new file mode 100755 index 0000000..a2e8698 --- /dev/null +++ b/test/pash_tests/run_lda.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export PYTHONIOENCODING=utf8 + +if test $# -ne 0; +then + KS="$*"; +else + KS="50 75 100 125 150 175 200" +fi + +DIR=`date "+%Y-%m-%d_%H:%M"` +START=`date "+%Y-%m-%d %H:%M"` + +# TODO error handling + +echo "SETTING UP" +mkdir ${DIR} + +echo "PARSING" +python parse.py + +for dat in abstracts.dat vocab.dat docs.dat; do + mv ${dat} ${DIR} +done + +# we don't want to lose this one! +cp stopwords.dat ${DIR} + +echo "RUNNING LDA" + +ABS=${DIR}/abstracts.dat + +for k in ${KS}; do + lda est 1/50 ${k} settings.txt ${ABS} seeded ${DIR}/lda${k} & + echo lda${k} >>${DIR}/.gitignore +done + +wait +echo "PROCESSING TOPICS" + +for k in ${KS}; do + python debug_topics.py ${DIR} ${k} > ${DIR}/lda${k}_topics.txt +done + +echo "GENERATING CSV" + +for i in ${DIR}/lda*; do + test -d ${i} && python post.py ${i}/final.gamma ${DIR}/docs.dat > ${i}.csv + test -d ${i} && python by_year.py ${i}/final.gamma ${DIR}/docs.dat > ${i}_by_year.csv +done + +echo "MOVING TO OUTPUT DIRECTORY" +mv ${DIR} ../out + +echo "DONE" +echo All done. Started at ${START}, done at `date "+%Y-%m-%d %H:%M"`. diff --git a/test/pash_tests/run_tests.sh b/test/pash_tests/run_tests.sh new file mode 100755 index 0000000..08fec79 --- /dev/null +++ b/test/pash_tests/run_tests.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -x e + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +echo "Running intro tests..." +cd "$PASH_TOP/evaluation/intro" +./test.sh + +echo "Running interface tests..." +cd "$PASH_TOP/evaluation/tests/interface_tests" +./run.sh + +echo "Running compiler tests..." +cd "$PASH_TOP/compiler" +./test_evaluation_scripts.sh + +echo "Running aggregator tests..." +cd "$PASH_TOP/evaluation/tests/agg/" +./run.sh + +echo "Running aggregator tests..." +cd "$PASH_TOP/runtime/agg/cpp/tests" +./test.sh diff --git a/test/pash_tests/safe0.sh b/test/pash_tests/safe0.sh new file mode 100644 index 0000000..4a3e55b --- /dev/null +++ b/test/pash_tests/safe0.sh @@ -0,0 +1 @@ +echo nothing to expand diff --git a/test/pash_tests/safe1.sh b/test/pash_tests/safe1.sh new file mode 100644 index 0000000..e213fee --- /dev/null +++ b/test/pash_tests/safe1.sh @@ -0,0 +1 @@ +echo ~ is always safe diff --git a/test/pash_tests/safe2.sh b/test/pash_tests/safe2.sh new file mode 100644 index 0000000..2d66d10 --- /dev/null +++ b/test/pash_tests/safe2.sh @@ -0,0 +1 @@ +echo "quoting safe stuff is safe" diff --git a/test/pash_tests/safe3.sh b/test/pash_tests/safe3.sh new file mode 100644 index 0000000..bfc7420 --- /dev/null +++ b/test/pash_tests/safe3.sh @@ -0,0 +1 @@ +echo $((2 + 2)) = 4, safely diff --git a/test/pash_tests/safe4.sh b/test/pash_tests/safe4.sh new file mode 100644 index 0000000..5f91750 --- /dev/null +++ b/test/pash_tests/safe4.sh @@ -0,0 +1 @@ +echo ${PWD} is fine to show diff --git a/test/pash_tests/safe5.sh b/test/pash_tests/safe5.sh new file mode 100644 index 0000000..50ea6bc --- /dev/null +++ b/test/pash_tests/safe5.sh @@ -0,0 +1 @@ +echo ${#PWD} is also cool, as is ${x-default} and ${x+alt} and and ${x%%a*} ${x%a*} ${x#a*} ${x##a*} diff --git a/test/pash_tests/safe7.sh b/test/pash_tests/safe7.sh new file mode 100644 index 0000000..aa6b8d1 --- /dev/null +++ b/test/pash_tests/safe7.sh @@ -0,0 +1 @@ +echo ${#PWD} is also cool, as is ${x-default} and "${x+alt}" and ${x=set now} and ${x?won\'t run} diff --git a/test/pash_tests/search.sh b/test/pash_tests/search.sh new file mode 100755 index 0000000..12efebe --- /dev/null +++ b/test/pash_tests/search.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Complicated grep expression + +IN=./input/1G.txt # Change G to M for small input +OUT=./output/out.txt + +cat $IN | + grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $OUT diff --git a/test/pash_tests/sed-test.sh b/test/pash_tests/sed-test.sh new file mode 100644 index 0000000..f5ba0ac --- /dev/null +++ b/test/pash_tests/sed-test.sh @@ -0,0 +1,11 @@ +cat $PASH_TOP/evaluation/tests/input/1M.txt | + sed 's;^d;da;' | + sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;$;/;' | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed 's;^;http://ndr.md/data/noaa/;' | + sed "s#^#$WIKI#" | + sed s/\$/'0s'/ | + sed 1d | + sed 4d | + sed "\$d" \ No newline at end of file diff --git a/test/pash_tests/send_emails.sh b/test/pash_tests/send_emails.sh new file mode 100755 index 0000000..9e3515f --- /dev/null +++ b/test/pash_tests/send_emails.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/mail" ]; then + echo "Couldn't find mail directory (looked in $1/grading)" + exit 2 +fi + +cd $1/mail + +for s in `ls`; do + ../../mail.scpt "[cs131] $1 grade" $s +done diff --git a/test/pash_tests/set-dash-v-x.sh b/test/pash_tests/set-dash-v-x.sh new file mode 100644 index 0000000..168efba --- /dev/null +++ b/test/pash_tests/set-dash-v-x.sh @@ -0,0 +1,4 @@ +set - +echo hello +echo $# $1 $2 $3 $4 $5 + diff --git a/test/pash_tests/set-diff.sh b/test/pash_tests/set-diff.sh new file mode 100755 index 0000000..2c1afd0 --- /dev/null +++ b/test/pash_tests/set-diff.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Show the set-difference between two streams (i.e., elements in the first that are not in the second). +# https://stackoverflow.com/questions/2509533/bash-linux-set-difference-between-two-text-files + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +mkfifo s1 s2 + +cat $IN | + cut -d ' ' -f 1 | + tr [:lower:] [:upper:] | + sort > s1 & + +cat $IN | + cut -d ' ' -f 1 | + sort > s2 & + +comm -23 s1 s2 + +rm s1 s2 diff --git a/test/pash_tests/set-diff_env_test.sh b/test/pash_tests/set-diff_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/set-diff_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/set-e-2.sh b/test/pash_tests/set-e-2.sh new file mode 100644 index 0000000..a280386 --- /dev/null +++ b/test/pash_tests/set-e-2.sh @@ -0,0 +1,4 @@ +set -e +( { false; } + { echo one; } ) | cat +echo two diff --git a/test/pash_tests/set-e-3.sh b/test/pash_tests/set-e-3.sh new file mode 100644 index 0000000..54b10e4 --- /dev/null +++ b/test/pash_tests/set-e-3.sh @@ -0,0 +1,21 @@ +set -e +# individual command in a multi-command pipeline +false | : +echo passed pipeline +# part of a compound list of an 'elif' +if false; then :; elif false; then :; fi +echo passed elif +# non-subshell compound command whose exit status was the result +# of a failure while -e was being ignored +{ false && : ; } +echo passed compound-brace +for i in a; do false && : ; done +echo passed compound-for +# case x in x) false && : ;; esac +# echo passed compound-case +if :; then false && : ; fi +echo passed compound-if +cont=y; while [ $cont = y ]; do cont=n; false && : ; done +echo passed compound-while +end=n; until [ $end = y ]; do end=y; false && : ; done +echo passed compound-until \ No newline at end of file diff --git a/test/pash_tests/set-e.sh b/test/pash_tests/set-e.sh new file mode 100644 index 0000000..982c20c --- /dev/null +++ b/test/pash_tests/set-e.sh @@ -0,0 +1,18 @@ +set -e +# part of a compound list of a 'while', 'until' or 'if' +while false; do break; done +echo passed while +until false; do break; done +echo passed until +if false; then :; fi +echo passed if +# any command of an AND-OR list other than the last +false && : +echo passed AND list +false || : +echo passed OR list +: && false || false && : +echo passed AND-OR list +# part of a pipeline preceded by the '!' reserved word +! false +echo passed negated pipeline \ No newline at end of file diff --git a/test/pash_tests/set-v.sh b/test/pash_tests/set-v.sh new file mode 100644 index 0000000..f5b3edf --- /dev/null +++ b/test/pash_tests/set-v.sh @@ -0,0 +1,2 @@ +set -v +echo hello diff --git a/test/pash_tests/set.sh b/test/pash_tests/set.sh new file mode 100644 index 0000000..cae4de7 --- /dev/null +++ b/test/pash_tests/set.sh @@ -0,0 +1,7 @@ +dotFile=set.sh.tempfile +variable="value value" + +# the problem is that this returns more things (we have functions that are exported in set) +set | grep variable > $dotFile +. ./$dotFile +# set diff --git a/test/pash_tests/set_bug.sh b/test/pash_tests/set_bug.sh new file mode 100755 index 0000000..55d8d14 --- /dev/null +++ b/test/pash_tests/set_bug.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +f() { + echo "f: $@" +} + +set -- a b c +echo "$@" +f +echo "$@" + diff --git a/test/pash_tests/setup-dspash.sh b/test/pash_tests/setup-dspash.sh new file mode 100755 index 0000000..0a81473 --- /dev/null +++ b/test/pash_tests/setup-dspash.sh @@ -0,0 +1,64 @@ + +# TODO: install any extra needed python debs + +# Get PASH_TOP +if git rev-parse --git-dir > /dev/null 2>&1; then + # set PASH_TOP + PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +else + # set PASH_TOP to the root folder of the project if it is not available + PASH_TOP=${PASH_TOP:-$PWD/..} +fi + +# Install Go +wget https://go.dev/dl/go1.17.7.linux-amd64.tar.gz +rm -rf /usr/local/go && tar -C /usr/local -xzf go1.17.7.linux-amd64.tar.gz +echo -e '\nexport PATH=$PATH:/usr/local/go/bin' >> ~/.bashrc +export PATH=$PATH:/usr/local/go/bin +rm go1.17.7.linux-amd64.tar.gz + +# Install deps +GO111MODULE=on go get github.com/urfave/cli/v2 + +# Protobuf +apt-get update && apt-get install -y zip +PB_REL="https://github.com/protocolbuffers/protobuf/releases" +PROTOBUF_VER="3.15.8" +PROTOBUF_PACKAGE="protoc-$PROTOBUF_VER-linux-x86_64.zip" +curl -LO $PB_REL/download/v$PROTOBUF_VER/$PROTOBUF_PACKAGE +unzip $PROTOBUF_PACKAGE -d $HOME/.local +rm $PROTOBUF_PACKAGE +export PATH="$PATH:$HOME/.local/bin" +echo -e "\nPATH=\$PATH:$HOME/.local/bin" >> ~/.bashrc + +# Go protobuf deps +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +echo -e "\nexport PATH=\$PATH:$(go env GOPATH)/bin" >> ~/.bashrc +export PATH="$PATH:$(go env GOPATH)/bin" + +# Protobuf +apt-get update && apt-get install -y zip +PB_REL="https://github.com/protocolbuffers/protobuf/releases" +PROTOBUF_VER="3.15.8" +PROTOBUF_PACKAGE="protoc-$PROTOBUF_VER-linux-x86_64.zip" +curl -LO $PB_REL/download/v$PROTOBUF_VER/$PROTOBUF_PACKAGE +unzip $PROTOBUF_PACKAGE -d $HOME/.local +rm $PROTOBUF_PACKAGE +export PATH="$PATH:$HOME/.local/bin" +echo -e "\nPATH=\$PATH:$HOME/.local/bin" >> ~/.bashrc + +# Go protobuf deps +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +echo -e "\nexport PATH=\$PATH:$(go env GOPATH)/bin" >> ~/.bashrc +export PATH="$PATH:$(go env GOPATH)/bin" + +# Compile runtime +cd $PASH_TOP/runtime/dspash +go build socket_pipe.go +cd file_reader +go build client/dfs_split_reader.go +go build -o filereader_server server/server.go +go build -o discovery_server server/discovery_server.go +go build -o datastream_client client/datastream.go diff --git a/test/pash_tests/setup.sh b/test/pash_tests/setup.sh new file mode 100755 index 0000000..351e227 --- /dev/null +++ b/test/pash_tests/setup.sh @@ -0,0 +1,10 @@ +#!/bin/bash +setup_dataset() { + echo 'This experiment is expected to fetch data from a remote server' + echo 'To fetch the original dataset, use an FTP client' + echo 'e.g., "lftp ftp://ftp.ncdc.noaa.gov/pub/data/noaa"' +} + +source_var() { + export IN= +} diff --git a/test/pash_tests/shortest-scripts.sh b/test/pash_tests/shortest-scripts.sh new file mode 100755 index 0000000..92c6b87 --- /dev/null +++ b/test/pash_tests/shortest-scripts.sh @@ -0,0 +1,221 @@ +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file31" +rm -f "#file32" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" +rm -f "#file39" +rm -f "#file40" +rm -f "#file41" +rm -f "#file42" +rm -f "#file46" +rm -f "#file43" +rm -f "#file47" +rm -f "#file44" +rm -f "#file48" +rm -f "#file45" +rm -f "#file49" +rm -f "#file50" +rm -f "#file51" +rm -f "#file52" +rm -f "#file53" +rm -f "#file54" +rm -f "#file55" +rm -f "#file56" +rm -f "#file57" +rm -f "#file58" +rm -f "#file59" +rm -f "#file60" +rm -f "#file61" +rm -f "#file62" +mkfifo "#file2" +mkfifo "#file4" +mkfifo "#file6" +mkfifo "#file8" +mkfifo "#file10" +mkfifo "#file12" +mkfifo "#file14" +mkfifo "#file17" +mkfifo "#file18" +mkfifo "#file19" +mkfifo "#file20" +mkfifo "#file21" +mkfifo "#file22" +mkfifo "#file23" +mkfifo "#file24" +mkfifo "#file25" +mkfifo "#file26" +mkfifo "#file27" +mkfifo "#file28" +mkfifo "#file29" +mkfifo "#file30" +mkfifo "#file31" +mkfifo "#file32" +mkfifo "#file33" +mkfifo "#file34" +mkfifo "#file35" +mkfifo "#file36" +mkfifo "#file37" +mkfifo "#file38" +mkfifo "#file39" +mkfifo "#file40" +mkfifo "#file41" +mkfifo "#file42" +mkfifo "#file46" +mkfifo "#file43" +mkfifo "#file47" +mkfifo "#file44" +mkfifo "#file48" +mkfifo "#file45" +mkfifo "#file49" +mkfifo "#file50" +mkfifo "#file51" +mkfifo "#file52" +mkfifo "#file53" +mkfifo "#file54" +mkfifo "#file55" +mkfifo "#file56" +mkfifo "#file57" +mkfifo "#file58" +mkfifo "#file59" +mkfifo "#file60" +mkfifo "#file61" +mkfifo "#file62" + +mkfifo "#file63" +mkfifo "#file64" +mkfifo "#file65" +mkfifo "#file66" + +{ cat $PASH_TOP/evaluation/scripts/input/1G.txt >"#file2" & } +{ $PASH_TOP/runtime/r_split "#file2" 10000000 "#file63" "#file64" "#file65" "#file66" & } + +{ $PASH_TOP/runtime/dgsh_tee.sh "#file63" "#file17" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file64" "#file18" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file65" "#file19" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file66" "#file20" -I -f & } + +{ $PASH_TOP/runtime/r_wrap xargs file <"#file17" >"#file22" & } +{ $PASH_TOP/runtime/r_wrap xargs file <"#file18" >"#file23" & } +{ $PASH_TOP/runtime/r_wrap xargs file <"#file19" >"#file24" & } +{ $PASH_TOP/runtime/r_wrap xargs file <"#file20" >"#file25" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file22" >"#file26" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file23" >"#file27" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file24" >"#file28" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file25" >"#file29" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file26" >"#file30" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file27" >"#file31" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file28" >"#file32" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file29" >"#file33" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file30" >"#file34" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file31" >"#file35" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file32" >"#file36" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file33" >"#file37" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file34" >"#file38" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file35" >"#file39" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file36" >"#file40" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file37" >"#file41" & } +{ $PASH_TOP/runtime/r_unwrap <"#file38" >"#file46" & } +{ sort -n <"#file53" >"#file42" & } +{ $PASH_TOP/runtime/r_unwrap <"#file39" >"#file47" & } +{ sort -n <"#file54" >"#file43" & } +{ $PASH_TOP/runtime/r_unwrap <"#file40" >"#file48" & } +{ sort -n <"#file55" >"#file44" & } +{ $PASH_TOP/runtime/r_unwrap <"#file41" >"#file49" & } +{ sort -n <"#file56" >"#file45" & } +{ sort -n -m "#file57" "#file58" >"#file50" & } +{ sort -n -m "#file59" "#file60" >"#file51" & } +{ sort -n -m "#file61" "#file62" >"#file14" & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file46" "#file53" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file47" "#file54" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file48" "#file55" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file49" "#file56" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file42" "#file57" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file43" "#file58" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file44" "#file59" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file45" "#file60" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file50" "#file61" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file51" "#file62" -I -f & } +{ head -15 <"#file14" & } +source $PASH_TOP/runtime/wait_for_output_and_sigpipe_rest.sh ${!} +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file31" +rm -f "#file32" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" +rm -f "#file39" +rm -f "#file40" +rm -f "#file41" +rm -f "#file42" +rm -f "#file46" +rm -f "#file43" +rm -f "#file47" +rm -f "#file44" +rm -f "#file48" +rm -f "#file45" +rm -f "#file49" +rm -f "#file50" +rm -f "#file51" +rm -f "#file52" +rm -f "#file53" +rm -f "#file54" +rm -f "#file55" +rm -f "#file56" +rm -f "#file57" +rm -f "#file58" +rm -f "#file59" +rm -f "#file60" +rm -f "#file61" +rm -f "#file62" + +rm -f "#file63" +rm -f "#file64" +rm -f "#file65" +rm -f "#file66" \ No newline at end of file diff --git a/test/pash_tests/shortest_scripts.sh b/test/pash_tests/shortest_scripts.sh new file mode 100644 index 0000000..0d39131 --- /dev/null +++ b/test/pash_tests/shortest_scripts.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# A bash script for finding the shortest scripts +# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 +# +p.95 multiple sed +# +p.XX crawler + +cat $IN | xargs file | grep "shell script" | cut -d: -f1 | xargs -L 1 wc -l | grep -v '^0$' | sort -n | head -15 diff --git a/test/pash_tests/shortest_scripts_env_test.sh b/test/pash_tests/shortest_scripts_env_test.sh new file mode 100644 index 0000000..35e627d --- /dev/null +++ b/test/pash_tests/shortest_scripts_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/all_cmds.txt diff --git a/test/pash_tests/sine.sh b/test/pash_tests/sine.sh new file mode 100755 index 0000000..4b928e1 --- /dev/null +++ b/test/pash_tests/sine.sh @@ -0,0 +1,4 @@ +#!/bin/bash +F="temp.txt" +[ -f $F ] && (rm $F && echo 1 >$F ) +tail -f temp.txt | while read n; do echo "1+s(3*$n)" | bc -l; sleep 1; done | tee -a temp.txt diff --git a/test/pash_tests/sort-opt.sh b/test/pash_tests/sort-opt.sh new file mode 100755 index 0000000..a5af02e --- /dev/null +++ b/test/pash_tests/sort-opt.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +sort --buffer-size=30% --parallel=$1 $IN diff --git a/test/pash_tests/sort-opt_env.sh b/test/pash_tests/sort-opt_env.sh new file mode 100755 index 0000000..fdaa642 --- /dev/null +++ b/test/pash_tests/sort-opt_env.sh @@ -0,0 +1 @@ +IN=../evaluation/scripts/input/10G.txt diff --git a/test/pash_tests/sort-sort.sh b/test/pash_tests/sort-sort.sh new file mode 100755 index 0000000..a03e889 --- /dev/null +++ b/test/pash_tests/sort-sort.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Calculate sort twice + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr A-Z a-z | sort | sort -r diff --git a/test/pash_tests/sort.sh b/test/pash_tests/sort.sh new file mode 100755 index 0000000..7e457bf --- /dev/null +++ b/test/pash_tests/sort.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Sort input + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | sort + diff --git a/test/pash_tests/sort_env.sh b/test/pash_tests/sort_env.sh new file mode 100644 index 0000000..a65bd56 --- /dev/null +++ b/test/pash_tests/sort_env.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/scripts/input/10G.txt diff --git a/test/pash_tests/sort_env_small.sh b/test/pash_tests/sort_env_small.sh new file mode 100644 index 0000000..902f841 --- /dev/null +++ b/test/pash_tests/sort_env_small.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/scripts/input/1G.txt diff --git a/test/pash_tests/sort_env_test.sh b/test/pash_tests/sort_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/sort_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/spell-grep.sh b/test/pash_tests/spell-grep.sh new file mode 100755 index 0000000..cd87ca0 --- /dev/null +++ b/test/pash_tests/spell-grep.sh @@ -0,0 +1,17 @@ +set_diff() +{ + grep -vx -f $1 - +} + +dict=$PASH_TOP/evaluation/tests/input/sorted_words +IN=$PASH_TOP/evaluation/tests/input/1M.txt + +cat $IN | + # groff -t -e -mandoc -Tascii | # remove formatting commands + col -bx | # remove backspaces / linefeeds + tr -cs A-Za-z '\n' | + tr A-Z a-z | # map upper to lower case + tr -d '[:punct:]' | # remove punctuation + sort | # put words in alphabetical order + uniq | # remove duplicate words + set_diff $dict # report words not in dictionary diff --git a/test/pash_tests/spell.sh b/test/pash_tests/spell.sh new file mode 100644 index 0000000..9e38b4b --- /dev/null +++ b/test/pash_tests/spell.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Calculate mispelled words in an input +# https://dl.acm.org/doi/10.1145/3532.315102 +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} +dict=${dict:-$PASH_TOP/evaluation/benchmarks/oneliners/input/dict.txt} + +cat $IN | + iconv -f utf-8 -t ascii//translit | # remove non utf8 characters + # groff -t -e -mandoc -Tascii | # remove formatting commands + col -bx | # remove backspaces / linefeeds + tr -cs A-Za-z '\n' | + tr A-Z a-z | # map upper to lower case + tr -d '[:punct:]' | # remove punctuation + sort | # put words in alphabetical order + uniq | # remove duplicate words + comm -23 - $dict # report words not in dictionary diff --git a/test/pash_tests/spell_env_test.sh b/test/pash_tests/spell_env_test.sh new file mode 100755 index 0000000..7152781 --- /dev/null +++ b/test/pash_tests/spell_env_test.sh @@ -0,0 +1,3 @@ +dict=$PASH_TOP/evaluation/tests/input/sorted_words +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/split_pcap.sh b/test/pash_tests/split_pcap.sh new file mode 100644 index 0000000..7c5e7b1 --- /dev/null +++ b/test/pash_tests/split_pcap.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# To process large pcap file, usually it is better to split it into small chunks first, +# then process every chunk in parallel. +INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/scripts/input/out.pcap} +split_size=1000 +output_index=1 +loop_count=10 +exit_flag=0 + +command() { + echo "$1" "$2" +} + +tcpdump -r ${INPUT} -w ${OUTPUT} -C ${split_size} + +command ${OUTPUT} + +while : +do + loop_index=0 + while test ${loop_index} -lt ${loop_count} + do + if test -e ${OUTPUT}${output_index} + then + command ${OUTPUT} ${output_index} + output_index=$((output_index + 1)) + loop_index=$((loop_index + 1)) + else + exit_flag=1 + break + fi + done + wait + + if test ${exit_flag} -eq 1 + then + exit 0 + fi +done diff --git a/test/pash_tests/star-escape.sh b/test/pash_tests/star-escape.sh new file mode 100644 index 0000000..d7222cc --- /dev/null +++ b/test/pash_tests/star-escape.sh @@ -0,0 +1 @@ +x=$(echo "*" '*' \*); echo "$x" diff --git a/test/pash_tests/suggest-ec2.sh b/test/pash_tests/suggest-ec2.sh new file mode 100755 index 0000000..01e5241 --- /dev/null +++ b/test/pash_tests/suggest-ec2.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Suggests envvars for use in ./make-ec2.sh + +main() { + local vpc_id="$(aws ec2 describe-vpcs --output text --query 'Vpcs[0].VpcId')"; + local key_name="$(aws ec2 describe-key-pairs --output text --query 'KeyPairs[0].KeyName')"; + local subnet="$(aws ec2 describe-subnets --output text --query 'Subnets[0].SubnetId' --filter Name=vpc-id,Values=$vpc_id)"; + local sg="$(aws ec2 describe-security-groups --output text --filter Name=vpc-id,Values=$vpc_id --query 'SecurityGroups[0].GroupId')"; + + echo "export PASH_AWS_EC2_AMI='ami-0d739ceed1874f156';"; + echo "export PASH_AWS_EC2_INSTANCE_TYPE='t2.micro';"; + echo "export PASH_AWS_EC2_VPC_ID='$vpc_id';"; + echo "export PASH_AWS_EC2_KEY_NAME='$key_name';"; + echo "export PASH_AWS_EC2_SUBNET='$subnet';"; + echo "export PASH_AWS_EC2_SECURITY_GROUP='$sg';"; + echo "export PASH_AWS_EC2_DISK_SIZE_GB='10';"; +} + +main diff --git a/test/pash_tests/symtab-sha.sh b/test/pash_tests/symtab-sha.sh new file mode 100755 index 0000000..f460648 --- /dev/null +++ b/test/pash_tests/symtab-sha.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# To build and sign an SGX enclave, this script extracts the executable's symbol +# table and calculates its SHA256 hashsum. + +# Require +# Data: /usr/lib/libz3.so + +IN=/usr/lib/libz3.so +OUT=./output/out.txt + +readelf -x .symtab $IN | + tail -n +3 | + head -n -1 | # next three implement `awk '{print $2$3$4$5}'` + sed 's/^[[:space:]]*//' | + cut -d ' ' -f2-5 | + tr -d ' ' | + tr -d "\n" | + xxd -r -p | + sha256sum > $OUT diff --git a/test/pash_tests/tail.sh b/test/pash_tests/tail.sh new file mode 100755 index 0000000..e4277fe --- /dev/null +++ b/test/pash_tests/tail.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# FIXME missing tail parameters + +cat "${@: -1}" diff --git a/test/pash_tests/tailprogs.sh b/test/pash_tests/tailprogs.sh new file mode 100755 index 0000000..eae9e15 --- /dev/null +++ b/test/pash_tests/tailprogs.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# A bash script for finding the 10 longest scripts +# (TODO: `group_by` script type?) + +# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 + +# Data: +# Assumes a full list of commands +# +# # simple, from a single dir: +# echo "$( +# ls /usr/bin/* +# )" > all_cmds.txt +# +# # Or more complicated, from $PATH: +# +# echo "$( +# case "$PATH" in +# (*[!:]:) PATH="$PATH:" ;; +# esac +# +# set -f; IFS=: +# for dir in $PATH; do +# set +f +# [ -z "$dir" ] && dir="." +# for file in "$dir"/*; do +# if [ -x "$file" ] && ! [ -d "$file" ]; then +# printf '%s = %s\n' "${file##*/}" "$file" +# fi +# done +# done +# )" > ./input/allcmds.txt + +IN=./input/cmds10x.txt +OUT=./output/out.txt + +ls /usr/bin/* > $IN + +cat $IN | + xargs file | + grep "shell script" | + cut -d: -f1 | + xargs wc -l | + sort -rn | + head -n 25 > $OUT diff --git a/test/pash_tests/tee_web_index_bug.sh b/test/pash_tests/tee_web_index_bug.sh new file mode 100644 index 0000000..05e3587 --- /dev/null +++ b/test/pash_tests/tee_web_index_bug.sh @@ -0,0 +1,25 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt + +mkfifo {1,2,3}grams + +cat "$IN" | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + tee 3grams 2grams 1grams > /dev/null & + +cat 1grams | + sort | + uniq -c | + sort -rn > 1-grams.txt & + +cat 2grams | + sort | + uniq -c | + sort -rn > 2-grams.txt & + +cat 3grams | + sort | + uniq -c | + sort -rn # >> 3-grams.txt + +rm {1,2,3}grams {1,2,3}-grams.txt diff --git a/test/pash_tests/temp-analytics.sh b/test/pash_tests/temp-analytics.sh new file mode 100755 index 0000000..319a8f0 --- /dev/null +++ b/test/pash_tests/temp-analytics.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +FROM=${FROM:-2015} +TO=${TO:-2015} +IN=${IN:-'http://ndr.md/data/noaa/'} +fetch=${fetch:-"curl -s"} + +data_file=temperatures.txt + +## Downloading and extracting +seq $FROM $TO | + sed "s;^;$IN;" | + sed 's;$;/;' | + xargs -r -n 1 $fetch | + grep gz | + tr -s ' \n' | + cut -d ' ' -f9 | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed "s;^;$IN;" | + xargs -n1 curl -s | + gunzip > "${data_file}" + +## Processing +cat "${data_file}" | + cut -c 89-92 | + grep -v 999 | + sort -rn | + head -n1 > max.txt + +cat "${data_file}" | + cut -c 89-92 | + grep -v 999 | + sort -n | + head -n1 > min.txt + +cat "${data_file}" | + cut -c 89-92 | + grep -v 999 | + awk "{ total += \$1; count++ } END { print total/count }" > average.txt diff --git a/test/pash_tests/temp_test.sh b/test/pash_tests/temp_test.sh new file mode 100755 index 0000000..88f78f1 --- /dev/null +++ b/test/pash_tests/temp_test.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +func() { read -r distro setup; echo $distro $setup; } + +export -f func + +cat ../evaluation/usecases/shellcheck/temp_input.txt | + func diff --git a/test/pash_tests/test-common.sh b/test/pash_tests/test-common.sh new file mode 100755 index 0000000..63924fa --- /dev/null +++ b/test/pash_tests/test-common.sh @@ -0,0 +1,19 @@ +CMD="$1" +FLG="$2" +AGG="$3" + +cat $IN1 $IN2 | $CMD $FLG > ./temp/reference +cat $IN1 | $CMD $FLG > ./temp/partial1 +cat $IN2 | $CMD $FLG > ./temp/partial2 + +$AGG ./temp/partial1 ./temp/partial2 $FLG > ./temp/aggregated + +diff ./temp/aggregated ./temp/reference > ./temp/log +if [ $? -ne 0 ]; then + cat ./temp/log | head + echo $CMD "$FLG ...FAIL" +else + echo $CMD "$FLG ...pass" +fi + +rm -f ./temp/partial1 ./temp/partial2 ./temp/aggregated ./temp/reference ./temp/log diff --git a/test/pash_tests/test.sh b/test/pash_tests/test.sh new file mode 100755 index 0000000..54240a9 --- /dev/null +++ b/test/pash_tests/test.sh @@ -0,0 +1,5 @@ +# this is a comment + +ls +cd .. + diff --git a/test/pash_tests/test1.sh b/test/pash_tests/test1.sh new file mode 100755 index 0000000..3f091ac --- /dev/null +++ b/test/pash_tests/test1.sh @@ -0,0 +1,48 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out +testFile=../../evaluation/scripts/input/10M.txt +batchSize=100000 +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +# mkfifo $file5 + +mkfifo $file7 +mkfifo $file8 +mkfifo $file9 + +# mkfifo $file6 +# cat $testFile > $file9 & +# ../auto-split.sh $file9 $file1 $file2 & +# grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file3 > $file4 & +# grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file2 > $file6 & +# ../eager.sh $file1 $file3 temp & +# ../eager.sh $file6 $file7 temp2 & +# cat $file4 $file7 > $file5 + +../r_split $testFile $batchSize $file1 $file2 $file7 & + +../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file1 > $file3 & +../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file2 > $file4 & +../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +../r_merge $file3 $file4 $file8> $file5 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +# rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/tilde.sh b/test/pash_tests/tilde.sh new file mode 100644 index 0000000..5265eba --- /dev/null +++ b/test/pash_tests/tilde.sh @@ -0,0 +1,4 @@ +HOME='abc xyz' +printf '%s\n' ~ +HOME='test.*' +printf '%s\n' ~ \ No newline at end of file diff --git a/test/pash_tests/to_mp3.sh b/test/pash_tests/to_mp3.sh new file mode 100755 index 0000000..79a6931 --- /dev/null +++ b/test/pash_tests/to_mp3.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# tag: wav-to-mp3 +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/wav} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/mp3} +LOGS=${OUT}/logs +mkdir -p ${LOGS} +run_tests(){ + FILE=$1 + ffmpeg -y -i $FILE -f mp3 -ab 192000 $OUT/$(basename $FILE).mp3 2>/dev/null +} + +export -f run_tests + +pkg_count=0 +for item in ${IN}/*; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > ${LOGS}/${pkg_count}.log +done + +echo 'done'; diff --git a/test/pash_tests/top-n.sh b/test/pash_tests/top-n.sh new file mode 100755 index 0000000..d4373f7 --- /dev/null +++ b/test/pash_tests/top-n.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Top-N (1000) terms +# from https://dl.acm.org/doi/10.1145/5948.315654 + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed 100q + diff --git a/test/pash_tests/topn.sh b/test/pash_tests/topn.sh new file mode 100755 index 0000000..6825938 --- /dev/null +++ b/test/pash_tests/topn.sh @@ -0,0 +1,2 @@ +# Top-N (1000) terms +cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed ${N}q diff --git a/test/pash_tests/topn_env_test.sh b/test/pash_tests/topn_env_test.sh new file mode 100644 index 0000000..0e8bdb7 --- /dev/null +++ b/test/pash_tests/topn_env_test.sh @@ -0,0 +1,3 @@ +N=1000 +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/tr-test.sh b/test/pash_tests/tr-test.sh new file mode 100644 index 0000000..becbb84 --- /dev/null +++ b/test/pash_tests/tr-test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +## This test contains all occurences of tr (to test the annotation) + +FILE="$PASH_TOP/evaluation/tests/input/1M.txt" + +cat $FILE | tr -d ',' +cat $FILE | tr '[A-Z]' '[a-z]' +cat $FILE | tr -s ' ' '\n' +cat $FILE | tr '[a-z]' 'P' +cat $FILE | tr -c "[a-z][A-Z]" '\n' +cat $FILE | tr ' ' '\n' +cat $FILE | tr '[a-z]' '\n' +## This is a bit tricky but `tr -d '\n'` is pure because after it is done there is only one line. +cat $FILE | tr -d '\n' | grep "the" +cat $FILE | tr -c '[A-Z]' '\n' +cat $FILE | tr " " " " +cat $FILE | tr -cs A-Za-z '\n' +cat $FILE | tr A-Z a-z +cat $FILE | tr -d '[:punct:]' +cat $FILE | tr [:lower] [:upper] +cat $FILE | tr [:lower:] [:upper:] +cat $FILE | tr -s ' ' +cat $FILE | tr -s ' \n' +cat $FILE | tr -d '\012' | sort diff --git a/test/pash_tests/tr_cs_wc_test.sh b/test/pash_tests/tr_cs_wc_test.sh new file mode 100755 index 0000000..b8fc1fc --- /dev/null +++ b/test/pash_tests/tr_cs_wc_test.sh @@ -0,0 +1,17 @@ +## This script is used to experiment with how to get parallelism benefits from a bunch of Unix50 pipelines + +## You have to run the following before running this script. +## The output should be 439M long +## Warning: Takes a long time +## cat $PASH_TOP/evaluation/unix50/4.txt | $PASH_TOP/runtime/multiply.sh -m 1000000 | pv > $PASH_TOP/evaluation/unix50/big_4.txt + +FILE="${PASH_TOP}/evaluation/unix50/big_4.txt" + +# cat $FILE | tr -s ' ' '\n' | grep 'x' | grep '\.' | wc -l + +cat $FILE | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l + +## Possible solutions: +## 1. Make an aggregator for tr -s (This is the best solutoin) +## 2. Remove the -s since it is not actually necessary +## 3. Make an aggregator for wc (?) \ No newline at end of file diff --git a/test/pash_tests/trap.sh b/test/pash_tests/trap.sh new file mode 100644 index 0000000..f959731 --- /dev/null +++ b/test/pash_tests/trap.sh @@ -0,0 +1,7 @@ +myfunction() +{ + echo myfunction invoked +} +trap myfunction EXIT +echo hello one +echo hello two diff --git a/test/pash_tests/trigrams.sh b/test/pash_tests/trigrams.sh new file mode 100755 index 0000000..5942c2e --- /dev/null +++ b/test/pash_tests/trigrams.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# A somewhat suboptimal way of calculating 3-grams. +# Part of the intention is to highlight overheads of tagging each stream element + +IN=./input/1G.txt +OUT=./output/out.txt + +mkfifo s2 s3 + +cat $IN | +# head -n 2 | + sed 's/[^a-zA-Z0-9]/ /g' | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + tee s2 | + tail +2 | + paste s2 - | # At this point the stream has two elements + tee s3 | + cut -f 1 | + tail +3 | + paste s3 - | # Joining (1) the first two words , (2) + sort | + uniq > $OUT +rm s2 s3 + + + diff --git a/test/pash_tests/trim_primers.sh b/test/pash_tests/trim_primers.sh new file mode 100644 index 0000000..5254ae9 --- /dev/null +++ b/test/pash_tests/trim_primers.sh @@ -0,0 +1,6 @@ +# trim primers +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -a TCCTCCGCTTATTGATAGC -o ${OUTPUT}/{}\_trimmed.fastq {}; + diff --git a/test/pash_tests/uniq-c.2.sh b/test/pash_tests/uniq-c.2.sh new file mode 100755 index 0000000..3c8a259 --- /dev/null +++ b/test/pash_tests/uniq-c.2.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +END_OF_1=$(tail -n 1 "$1") +END_NUM=$(echo "$END_OF_1" | grep -E -o '^[ ]*[0-9]*[ ]*' | tr -d "[:space:]") +END_WORD=$(echo "$END_OF_1" | sed 's/^[ ]*[0-9]*[ ]*//g') + +START_OF_2=$(head -n 1 "$2") +START_NUM=$(echo "$START_OF_2" | grep -E -o '^[ ]*[0-9]*[ ]*' | tr -d "[:space:]") +START_WORD=$(echo "$START_OF_2" | sed 's/^[ ]*[0-9]*[ ]*//g') + +if [[ $START_WORD == "$END_WORD" ]]; then + TOTAL_NUM=$((START_NUM + END_NUM)) + sed '$d' "$1" + printf "%7s %s\n" "$TOTAL_NUM" "$START_WORD" + sed '1d' "$2" +else + cat "$1" "$2" +fi diff --git a/test/pash_tests/uniq.sh b/test/pash_tests/uniq.sh new file mode 100755 index 0000000..dc75a38 --- /dev/null +++ b/test/pash_tests/uniq.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# simply rerun uniq +cat "$@" | uniq diff --git a/test/pash_tests/unix50.sh b/test/pash_tests/unix50.sh new file mode 100755 index 0000000..7c5182b --- /dev/null +++ b/test/pash_tests/unix50.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +# scripts from https://unixgame.io/ +# https://github.com/psinghbh/softsec.github.io +# input files https://github.com/psinghbh/softsec.github.io/tree/master/ctf/unixgame.io/challenges +# Which join is easier: http://www.theunixschool.com/2011/08/5-different-ways-to-join-all-lines-in.html +# 1 (default) + 3 + 1 + 1 + 6 + 1 + 1 + 3 + 5 + 9 + 3 + 2 + 1 = 37 (there are 3 missing) +# missing 8.5, 9.5, 12.1 + +if [[ -z "$IN_PRE" ]]; then + if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 + else + export IN_PRE=$PASH_TOP/evaluation/benchmarks/unix50/input + fi +fi + +IN1=$IN_PRE/1.txt +IN2=$IN_PRE/2.txt +IN3=$IN_PRE/3.txt +IN4=$IN_PRE/4.txt +IN5=$IN_PRE/5.txt +IN6=$IN_PRE/6.txt +IN7=$IN_PRE/7.txt +IN8=$IN_PRE/8.txt +IN91=$IN_PRE/9.1.txt +IN92=$IN_PRE/9.2.txt +IN93=$IN_PRE/9.3.txt +IN94=$IN_PRE/9.4.txt +IN95=$IN_PRE/9.5.txt +IN96=$IN_PRE/9.6.txt +IN97=$IN_PRE/9.7.txt +IN98=$IN_PRE/9.8.txt +IN99=$IN_PRE/9.9.txt +IN10=$IN_PRE/10.txt +IN11=$IN_PRE/11.txt +IN12=$IN_PRE/12.txt + +# 1.0: extract the last name +cat $IN1 | cut -d ' ' -f 2 + +# 1.1: extract names and sort +cat $IN1 | cut -d ' ' -f 2 | sort + +# 1.2: extract names and sort +cat $IN1 | head -n 2 | cut -d ' ' -f 2 + +# 1.3: sort top first names +cat $IN1 | cut -d ' ' -f 1 | sort | uniq -c | sort -r + +# 2.1: get all Unix utilities +cat $IN2 | cut -d ' ' -f 4 | tr -d ',' + +# 3.1: get lowercase first letter of last names (awk) +cat $IN3 | cut -d ' ' -f 2 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + +# 4.1: find number of rounds +cat $IN4 | tr ' ' '\n' | grep '\.' | wc -l + +# 4.2: find pieces captured by Belle +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l + +# 4.3: find pieces captured by Belle with a pawn +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep -v '[KQRBN]' | wc -l + +# 4.4: histogram of Belle's captures (-pawns) by each type of piece +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep '[KQRBN]' | cut -c 1-1 | sort | uniq -c | sort -nr + +# 4.5: 4.4 + pawns +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort | uniq -c | sort -nr + +# 4.6: piece used the most by Belle +cat $IN4 | tr ' ' '\n' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort -r | uniq | head -n 3 | tail -n 1 + +# 5.1: extract hello world +cat $IN5 | grep 'print' | cut -d "\"" -f 2 | cut -c 1-12 + +# 6.1: order the bodies by how easy it would be to land on them in Thompson's Space Travel game when playing at the highest simulation scale +cat $IN6 | awk "{print \$2, \$0}" | sort -nr | cut -d ' ' -f 2 + +# 7.1: identify number of AT&T unix versions +cat $IN7 | cut -f 1 | grep 'AT&T' | wc -l + +# 7.2: find most frequently occurring machine +cat $IN7 | cut -f 2 | sort -n | uniq -c | sort -nr | head -n 1 | tr -s ' ' '\n' | tail -n 1 + +# 7.3: all the decades in which a unix version was released +cat $IN7 | cut -f 4 | sort -n | cut -c 3-3 | uniq | sed s/\$/'0s'/ + +# 8.1: count unix birth-year +cat $IN8 | tr ' ' '\n' | grep 1969 | wc -l + +# 8.2: find Bell Labs location where Dennis Ritchie had his office +cat $IN8 | grep 'Bell' | awk 'length <= 45' | cut -d ',' -f 2 | awk "{\$1=\$1};1" + +# 8.3: find names of the four people most involved with unix +cat $IN8 | grep '(' | cut -d '(' -f 2 | cut -d ')' -f 1 | head -n 1 + +# 8.4: find longest words without hyphens +cat $IN8 | tr -c "[a-z][A-Z]" '\n' | sort | awk "length >= 16" + +# # 8.5: Find second-most-freq 8-character word(s) without hyphens +# cat $IN8 > /dev/null + +# 9.1: extract the word PORT +cat $IN91 | tr ' ' '\n' | grep '[A-Z]' | tr '[a-z]' '\n' | grep '[A-Z]' | tr -d '\n' | cut -c 1-4 + +# 9.2: extract the word BELL +cat $IN92 | cut -c 1-1 | tr -d '\n' + +# 9.3: animal that used to decorate the Unix room +cat $IN93 | cut -c 1-2 | tr -d '\n' + +# 9.4: four corners with E centered, for an "X" configuration +cat $IN94 | tr ' ' '\n' | grep "\"" | sed 4d | cut -d "\"" -f 2 | tr -d '\n' + +# # 9.5: backwards running clock, in a backwards poem +# cat $IN95 > /dev/null + +# 9.6: Follow the directions for grep +cat $IN96 | tr ' ' '\n' | grep '[A-Z]' | sed 1d | sed 3d | sed 3d | tr '[a-z]' '\n' | grep '[A-Z]' | sed 3d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 9.7: Four corners +cat $IN97 | sed 2d | sed 2d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 9.8: TELE-communications +cat $IN98 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 2d | sed 3d | sed 4d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 9.9: +cat $IN99 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 1d | sed 2d | sed 3d | sed 5d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 10.1: count Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 | wc -l + +# 10.2: list Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 + +# 10.3: extract Ritchie's username +cat $IN10 | grep 'Bell' | cut -f 2 | head -n 1 | fmt -w1 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + +# 11.1: year Ritchie and Thompson receive the Hamming medal +cat $IN11 | grep 'UNIX' | cut -f 1 + +# 11.2: most repeated first name in the list? +cat $IN11 | cut -f 2 | cut -d ' ' -f 1 | sort | uniq -c | sort -nr | head -n 1 | fmt -w1 | sed 1d + + +# # 12.1: transform this list of instructions such that if the snake follows the +# # new instructions top to bottom, it ends on the location of the apple. +# cat $IN12 > /dev/null diff --git a/test/pash_tests/unparsing-special-chars.sh b/test/pash_tests/unparsing-special-chars.sh new file mode 100644 index 0000000..7f584ab --- /dev/null +++ b/test/pash_tests/unparsing-special-chars.sh @@ -0,0 +1,10 @@ + x=`printf '%s' \#`; printf '%s\n' "$x" + x=`printf '%s' "#"`; printf '%s\n' "$x" + x=`printf '%s' \<`; printf '%s\n' "$x" + x=`printf '%s' "<"`; printf '%s\n' "$x" + x=`printf '%s' \>`; printf '%s\n' "$x" + x=`printf '%s' ">"`; printf '%s\n' "$x" + x=`printf '%s' \~`; printf '%s\n' "$x" + x=`printf '%s' "~"`; printf '%s\n' "$x" + x=`printf '%s' \ `; printf '%s\n' "$x" + x=`printf '%s' " "`; printf '%s\n' "$x" \ No newline at end of file diff --git a/test/pash_tests/unsafe0.sh b/test/pash_tests/unsafe0.sh new file mode 100644 index 0000000..9d7ef33 --- /dev/null +++ b/test/pash_tests/unsafe0.sh @@ -0,0 +1 @@ +echo ${x=uhoh} diff --git a/test/pash_tests/unsafe1.sh b/test/pash_tests/unsafe1.sh new file mode 100644 index 0000000..a60c58c --- /dev/null +++ b/test/pash_tests/unsafe1.sh @@ -0,0 +1 @@ +echo $((x=2)) diff --git a/test/pash_tests/unsafe2.sh b/test/pash_tests/unsafe2.sh new file mode 100644 index 0000000..475e688 --- /dev/null +++ b/test/pash_tests/unsafe2.sh @@ -0,0 +1 @@ +echo ${nonesuch?uhoh} is unsafe diff --git a/test/pash_tests/update-img.sh b/test/pash_tests/update-img.sh new file mode 100755 index 0000000..ca6617c --- /dev/null +++ b/test/pash_tests/update-img.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +### +# Repackage updated pash docker image to latest commit +### + +cd $(dirname $0) + +# Assumes a pash image exists already +# curl img.pash.ndr.md | docker load; docker run --name pash-playground -it pash/18.04 + +docker start pash-playground +docker exec pash-playground bash -c 'cd /pash; git pull' +docker stop pash-playground + + +docker commit $(docker ps -a | grep pash-playground | cut -f1 -d' ') pash/18.04:latest +docker save pash/18.04:latest | gzip > pash-docker.tar.gz + +if [[ "$(hostname)" == "beta" ]]; then + # This assumes you're on beta + mv pash-docker.tar.gz /var/www/pash-web/ +fi + +docker build -t pash-play ../ + +if [[ ./token.txt ]]; then + cat ~/token.txt | docker login https://docker.pkg.github.com -u nvasilakis --password-stdin +fi + +docker push docker.pkg.github.com/andromeda/pash/play:latest \ No newline at end of file diff --git a/test/pash_tests/var_assgn.sh b/test/pash_tests/var_assgn.sh new file mode 100644 index 0000000..0938da7 --- /dev/null +++ b/test/pash_tests/var_assgn.sh @@ -0,0 +1,3 @@ +echo "foobar: ${FOOBAR}" +echo "foobar: ${FOOBAR:=baz}" +echo "foobar: ${FOOBAR}" diff --git a/test/pash_tests/wait_for_output_and_sigpipe_rest.sh b/test/pash_tests/wait_for_output_and_sigpipe_rest.sh new file mode 100755 index 0000000..a56bfb5 --- /dev/null +++ b/test/pash_tests/wait_for_output_and_sigpipe_rest.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +## TODO: Give it the output pid as an argument +wait "$@" + +## TODO: This only works if there is only a single output node +## (and a single node given as an argument to `wait`). +export internal_exec_status=$? + +# It is assumed that $distro is set when this is called. + +# Note: We need the || true after the grep so that it doesn't exit with error if it finds nothing. + + +# This value may contains multiple pids as a whitespace-separated string, and +# we must split it as multiple pids by shell's field splitting. +# shellcheck disable=SC2086 +(> /dev/null 2>&1 kill -SIGPIPE $pids_to_kill || true) + +## +## Old way of waiting, very inefficient. +## + +# now do different things depending on distro + +## TODO: Delete this since it is very costly +# case "$distro" in +# freebsd*) +# # not sure at all about this one +# pids_to_kill="$(ps -efl $BASHPID |awk '{print $1}' | { grep -E '[0-9]' || true; } )" +# ;; +# *) +# pids_to_kill="$(ps --ppid $BASHPID |awk '{print $1}' | { grep -E '[0-9]' || true; } )" +# ;; +# esac +# pids_to_kill="" + +## TODO: Maybe send a signal to all pids at once +# for pid in $pids_to_kill +# do +# # wait $pid +# (> /dev/null 2>&1 kill -SIGPIPE $pid || true) +# done diff --git a/test/pash_tests/wc.sh b/test/pash_tests/wc.sh new file mode 100755 index 0000000..1020c17 --- /dev/null +++ b/test/pash_tests/wc.sh @@ -0,0 +1,3 @@ +# IN=/home/ubuntu/pash/evaluation/scripts/input/10M.txt + +cat $IN | wc \ No newline at end of file diff --git a/test/pash_tests/web-index-aux.sh b/test/pash_tests/web-index-aux.sh new file mode 100644 index 0000000..cb6fd40 --- /dev/null +++ b/test/pash_tests/web-index-aux.sh @@ -0,0 +1,141 @@ +mkfifo {1,2,3}grams + +bigrams_aux() +{ + ( mkfifo s2 > /dev/null ) ; + ( mkfifo s3 > /dev/null ) ; + + sed '$d' s2 > s3 & + tee s2 | + tail +2 | + paste s3 - + rm s2 + rm s3 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + ## New way of doing it using an intermediate file. This is slow + ## but doesn't deadlock + cat $IN > $temp + + sed '$d' $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail +2 | paste $aux3 - > $OUT & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + + +trigrams_aux() +{ + s2=$(mktemp -u) + s3=$(mktemp -u) + + mkfifo $s2 $s3 + + tee $s2 | + tail +2 | + paste $s2 - | + tee $s3 | + cut -f 1 | + tail +3 | + paste $s3 - | + sed "\$d" | + sed "\$d" + + rm $s2 $s3 +} + + +extract_text() +{ + while read -r line + do + cat $line | + iconv -c -t ascii//TRANSLIT | + pandoc +RTS -K64m -RTS --from html --to plain --quiet + done +} + + +cat $IN | + sed "s#^#$WIKI#" | + extract_text | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + grep -vwFf $WEB_INDEX_DIR/stopwords.txt | + $WEB_INDEX_DIR/stem-words.js | + tee 3grams 2grams 1grams > /dev/null & + +cat 1grams | + sort | + uniq -c | + sort -rn > 1-grams.txt & + +cat 2grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq -c | + sort -rn > 2-grams.txt & + +cat 3grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + trigrams_aux | + sort | + uniq -c | + sort -rn # > 3-grams.txt + +rm {1,2,3}grams diff --git a/test/pash_tests/web-index.sh b/test/pash_tests/web-index.sh new file mode 100755 index 0000000..bca753b --- /dev/null +++ b/test/pash_tests/web-index.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/web-index/input/5.txt} +WEB_INDEX_DIR=${WEB_INDEX_DIR:-$PASH_TOP/evaluation/benchmarks/web-index/input} +WIKI=${WIKI:-$PASH_TOP/evaluation/benchmarks/web-index/input/} + +mkfifo {1,2,3}grams + +bigrams_aux() +{ + ( mkfifo s2 > /dev/null ) ; + ( mkfifo s3 > /dev/null ) ; + + sed '$d' s2 > s3 & + tee s2 | + tail +2 | + paste s3 - + rm s2 + rm s3 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + ## New way of doing it using an intermediate file. This is slow + ## but doesn't deadlock + cat $IN > $temp + + sed '$d' $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail +2 | paste $aux3 - > $OUT & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + + +trigrams_aux() +{ + s2=$(mktemp -u) + s3=$(mktemp -u) + + mkfifo $s2 $s3 + + tee $s2 | + tail +2 | + paste $s2 - | + tee $s3 | + cut -f 1 | + tail +3 | + paste $s3 - | + sed "\$d" | + sed "\$d" + + rm $s2 $s3 +} + + +extract_text() +{ + while read -r line + do + cat $line | + iconv -c -t ascii//TRANSLIT | + pandoc +RTS -K64m -RTS --from html --to plain --quiet + done +} + +export -f extract_text + +cat $IN | + sed "s#^#$WIKI#" | + extract_text | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + grep -vwFf $WEB_INDEX_DIR/stopwords.txt | + $WEB_INDEX_DIR/stem-words.js | + tee 3grams 2grams 1grams > /dev/null & + +cat 1grams | + sort | + uniq -c | + sort -rn > 1-grams.txt & + +cat 2grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq -c | + sort -rn > 2-grams.txt & + +cat 3grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + trigrams_aux | + sort | + uniq -c | + sort -rn # > 3-grams.txt + +rm {1,2,3}grams diff --git a/test/pash_tests/wf.sh b/test/pash_tests/wf.sh new file mode 100755 index 0000000..262e7b7 --- /dev/null +++ b/test/pash_tests/wf.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Calculate the frequency of each word in the document, and sort by frequency + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn diff --git a/test/pash_tests/wf_env_test.sh b/test/pash_tests/wf_env_test.sh new file mode 100644 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/wf_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/worker.sh b/test/pash_tests/worker.sh new file mode 100644 index 0000000..a94285a --- /dev/null +++ b/test/pash_tests/worker.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# trap ctrl-c and call ctrl_c() +trap cleanup INT + +export PASH_TOP=${PASH_TOP:-${BASH_SOURCE%/*}} +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" +# point to the local downloaded folders +export PYTHONPATH=${PASH_TOP}/python_pkgs/ +export PASH_TIMESTAMP="$(date +"%y-%m-%d-%T")" + +# add hdfs directory if hdfs command exist +if command -v "hdfs" &> /dev/null +then + datanode_dir=$(hdfs getconf -confKey dfs.datanode.data.dir) + export HDFS_DATANODE_DIR=${datanode_dir#"file://"} # removes file:// prefix +fi + +source "$PASH_TOP/compiler/pash_init_setup.sh" "$@" --distributed_exec + +export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/" + +cleanup() { + kill "$FILEREADER_PID" "$DISCOVERY_PID" + wait "$FILEREADER_PID" "$DISCOVERY_PID" 2>/dev/null + rm -rf "$PASH_TMP_PREFIX" +} + +"$PASH_TOP/runtime/dspash/file_reader/filereader_server" & +FILEREADER_PID=$! +"$PASH_TOP/runtime/dspash/file_reader/discovery_server" & +DISCOVERY_PID=$! +python3 "$PASH_TOP/compiler/dspash/worker.py" "$@" diff --git a/test/pash_tests/wrap_cat.sh b/test/pash_tests/wrap_cat.sh new file mode 100755 index 0000000..f38b981 --- /dev/null +++ b/test/pash_tests/wrap_cat.sh @@ -0,0 +1,27 @@ +file1=1.out +file2=2.out +file3=3.out +file4=4.out +testFile=../../evaluation/scripts/input/10M.txt +batchSize=70000 + +mkfifo $file1 +mkfifo $file3 + +## 1. TODO: Deadlocks on merge + split (true) +## 2. Increasing batchsize deadlock +## 3. Improving wrap performance + +../r_split $testFile $batchSize $file1 $file3 & +# ../r_wrap cat < $file1 > $file3 & +../r_merge $file1 $file3 > $file4 + +# cat $testFile > $file4 + +# if cmp -s "$testFile" "$file4"; then +# printf 'The file "%s" is the same as "%s"\n' "$file1" "$file3" +# else +# printf 'The file "%s" is different from "%s"\n' "$file1" "$file3" +# fi + +rm -rf *.out diff --git a/test/round_trip.sh b/test/round_trip.sh new file mode 100755 index 0000000..df7c7af --- /dev/null +++ b/test/round_trip.sh @@ -0,0 +1,65 @@ +#!/bin/sh + +if [ $# -ne 2 ]; then + echo "Usage: ${0##*/} program target" + exit 2 +fi + +p=$1 +tgt=$2 + +two_roundtrips() { + [ "$(head -n1 "$tgt")" != '# TEST: single roundtrip' ] +} + +orig=$(mktemp) + +"$p" "$tgt" >"$orig" +if [ "$?" -ne 0 ] +then + echo "RT_ABORT_1: '$tgt' -> '$orig'" + exit 3 +fi + +rt=$(mktemp) + +"$p" "$orig" >"$rt" +if [ "$?" -ne 0 ] +then + echo "RT_ABORT_2: '$tgt' -> '$orig' -> '$rt'" + exit 4 +fi + +if diff -b "$orig" "$rt" >/dev/null +then + echo "PASS '$tgt'" + exit 0 +else + if two_roundtrips + then + # try one more time around the loop + rtrt=$(mktemp) + + "$p" "$rt" >"$rtrt" + if [ "$?" -ne 0 ] + then + echo "RT_ABORT_3: '$tgt' -> '$orig' -> '$rt' -> '$rtrt'" + exit 5 + fi + + if diff -b "$rt" "$rtrt" >/dev/null + then + echo "PASS '$tgt' (two runs to fixpoint)" + exit 0 + fi + fi + + echo "FAIL: '$tgt' first time" + diff -ub "$orig" "$rt" + if two_roundtrips + then + echo ">>> '$tgt' second time" + diff -ub "$rt" "$rtrt" + fi + exit 1 +fi diff --git a/test/test_ocaml_python.sh b/test/test_ocaml_python.sh new file mode 100755 index 0000000..48e2c68 --- /dev/null +++ b/test/test_ocaml_python.sh @@ -0,0 +1,63 @@ +#!/bin/sh + +: ${RT_OCAML=../ocaml/rt.sh} +: ${RT_PYTHON=../python/rt.py} + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + +testFile="$1" + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + +ocaml_rt=$(mktemp) +ocaml_err=$(mktemp) +python_rt=$(mktemp) +python_err=$(mktemp) + +"$RT_OCAML" "$testFile" >"$ocaml_rt" 2>"$ocaml_err" +ocaml_ec=$? +"$RT_PYTHON" < "$testFile" >"$python_rt" 2>"$python_err" +python_ec=$? + +if [ "$ocaml_ec" -ne 0 ] && [ "$python_ec" -ne 0 ] +then + echo "PASS '$testFile' | both abort" + exit 0 +elif [ "$ocaml_ec" -ne 0 ] +then + echo "OCAML_ABORT: '$testFile'" + cat "$ocaml_err" >&2 + exit 1 +elif [ "$python_ec" -ne 0 ] +then + echo "PYTHON_ABORT: '$testFile'" + cat "$python_err" >&2 + exit 1 +fi + +diff "$ocaml_rt" "$python_rt" >/dev/null +if [ $? -ne 0 ] +then + diff -w "$ocaml_rt" "$python_rt" >/dev/null + if [ $? -ne 0 ] + then + diff -w "$ocaml_rt" "$python_rt" >/dev/null + echo "FAIL: '$testFile' | $ocaml_rt $python_rt" + else + diff "$ocaml_rt" "$python_rt" >/dev/null + echo "FAIL_WHITESPACE: '$testFile' | $ocaml_rt $python_rt" + fi + exit 1 +fi + +echo "PASS '$testFile'" diff --git a/test/tests/aaaa b/test/tests/aaaa new file mode 100644 index 0000000..04d8190 --- /dev/null +++ b/test/tests/aaaa @@ -0,0 +1 @@ +eval "\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" diff --git a/test/tests/aaaa_single b/test/tests/aaaa_single new file mode 100644 index 0000000..e75bf90 --- /dev/null +++ b/test/tests/aaaa_single @@ -0,0 +1 @@ +eval '"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' diff --git a/test/tests/backslash b/test/tests/backslash new file mode 100644 index 0000000..2cb4253 --- /dev/null +++ b/test/tests/backslash @@ -0,0 +1 @@ +printf %s\\n foobar\|\&\;\<\>\(\)\$\`\\\"\'\ \?\*\[\ diff --git a/test/tests/braces_amp.sh b/test/tests/braces_amp.sh new file mode 100644 index 0000000..fc82664 --- /dev/null +++ b/test/tests/braces_amp.sh @@ -0,0 +1,5 @@ +for x in foo; do a & b & c & d & done +echo a & echo b +for y in foo; do a & b & done +while false; do a & b & done +until true; do forever & ever & ever & done diff --git a/test/tests/builtin.trap.exitcode.test b/test/tests/builtin.trap.exitcode.test new file mode 100644 index 0000000..4b832a6 --- /dev/null +++ b/test/tests/builtin.trap.exitcode.test @@ -0,0 +1,4 @@ +# https://www.spinics.net/lists/dash/msg01770.html + +trap 'set -o bad@option' INT +kill -s INT $$ diff --git a/test/tests/diverge.sh b/test/tests/diverge.sh new file mode 100755 index 0000000..1cf8ce5 --- /dev/null +++ b/test/tests/diverge.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +while true; do true; done diff --git a/test/tests/empty_case b/test/tests/empty_case new file mode 100644 index 0000000..7d18b23 --- /dev/null +++ b/test/tests/empty_case @@ -0,0 +1,6 @@ +case foo in + bar) + ;; + *) + echo hi;; +esac \ No newline at end of file diff --git a/test/tests/escaping b/test/tests/escaping new file mode 100644 index 0000000..9ea9a1d --- /dev/null +++ b/test/tests/escaping @@ -0,0 +1 @@ +${x=;|&!~*\}\{()\$\' "this is a \"quoted\" string"} \ No newline at end of file diff --git a/test/tests/for_spaces.sh b/test/tests/for_spaces.sh new file mode 100755 index 0000000..ed36e7a --- /dev/null +++ b/test/tests/for_spaces.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +oldifs=$IFS +IFS=$(echo -e "\t") +for f in `ls *`; do + echo $f +done +IFS=$oldifs diff --git a/test/tests/grab_submissions.sh b/test/tests/grab_submissions.sh new file mode 100755 index 0000000..3d2370c --- /dev/null +++ b/test/tests/grab_submissions.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ -d "$1" ]; then + echo "Grading directory already exists" + exit 2 +fi + +mkdir $1 +mkdir $1/submissions +cp ../dropbox/$1/* $1/submissions diff --git a/test/tests/grade.sh b/test/tests/grade.sh new file mode 100755 index 0000000..a0f2ad2 --- /dev/null +++ b/test/tests/grade.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +score=0 +total=0 + +if [ -d output ]; then + echo "output directory already exists, aborting" + exit 1 +fi + +mkdir output + +echo "LEXER/PARSER AUTOGRADER RESULTS" +echo + +# check success cases +for i in right/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 0 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +# check failure cases +for i in wrong/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 1 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +echo +echo "TOTAL: $score / $total" +echo +echo "PROBLEM 1: XXX / 5" +echo +let total=total+5 +echo "FINAL GRADE: $score + XXX / $total" diff --git a/test/tests/nested_shell_in_subshell.sh b/test/tests/nested_shell_in_subshell.sh new file mode 100644 index 0000000..8bfb75b --- /dev/null +++ b/test/tests/nested_shell_in_subshell.sh @@ -0,0 +1,4 @@ +( (echo abc) ) +echo $( (echo abc) ) +echo `(echo abc)` +echo $() diff --git a/test/tests/redir_indirect b/test/tests/redir_indirect new file mode 100644 index 0000000..16e2052 --- /dev/null +++ b/test/tests/redir_indirect @@ -0,0 +1 @@ +x=1; echo msg 2>&$x diff --git a/test/tests/run_grader.sh b/test/tests/run_grader.sh new file mode 100755 index 0000000..10017b9 --- /dev/null +++ b/test/tests/run_grader.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/grading" ]; then + echo "Couldn't find grading directory (looked in $1/grading)" + exit 2 +fi + +cd $1/grading + +errors="" +for s in `ls`; do + echo "GRADING $s" + (cd $s; make) + if [ "$?" != "0" ]; then + errors+=" $s" + fi +done + +echo +echo "There were errors for the following students:${errors}" +echo ${errors} >"$1/grading/errors.log" diff --git a/test/tests/run_lda.sh b/test/tests/run_lda.sh new file mode 100755 index 0000000..a2e8698 --- /dev/null +++ b/test/tests/run_lda.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export PYTHONIOENCODING=utf8 + +if test $# -ne 0; +then + KS="$*"; +else + KS="50 75 100 125 150 175 200" +fi + +DIR=`date "+%Y-%m-%d_%H:%M"` +START=`date "+%Y-%m-%d %H:%M"` + +# TODO error handling + +echo "SETTING UP" +mkdir ${DIR} + +echo "PARSING" +python parse.py + +for dat in abstracts.dat vocab.dat docs.dat; do + mv ${dat} ${DIR} +done + +# we don't want to lose this one! +cp stopwords.dat ${DIR} + +echo "RUNNING LDA" + +ABS=${DIR}/abstracts.dat + +for k in ${KS}; do + lda est 1/50 ${k} settings.txt ${ABS} seeded ${DIR}/lda${k} & + echo lda${k} >>${DIR}/.gitignore +done + +wait +echo "PROCESSING TOPICS" + +for k in ${KS}; do + python debug_topics.py ${DIR} ${k} > ${DIR}/lda${k}_topics.txt +done + +echo "GENERATING CSV" + +for i in ${DIR}/lda*; do + test -d ${i} && python post.py ${i}/final.gamma ${DIR}/docs.dat > ${i}.csv + test -d ${i} && python by_year.py ${i}/final.gamma ${DIR}/docs.dat > ${i}_by_year.csv +done + +echo "MOVING TO OUTPUT DIRECTORY" +mv ${DIR} ../out + +echo "DONE" +echo All done. Started at ${START}, done at `date "+%Y-%m-%d %H:%M"`. diff --git a/test/tests/send_emails.sh b/test/tests/send_emails.sh new file mode 100755 index 0000000..9e3515f --- /dev/null +++ b/test/tests/send_emails.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/mail" ]; then + echo "Couldn't find mail directory (looked in $1/grading)" + exit 2 +fi + +cd $1/mail + +for s in `ls`; do + ../../mail.scpt "[cs131] $1 grade" $s +done diff --git a/test/tests/single_quoted_dollar_sign.sh b/test/tests/single_quoted_dollar_sign.sh new file mode 100644 index 0000000..53d1edc --- /dev/null +++ b/test/tests/single_quoted_dollar_sign.sh @@ -0,0 +1,4 @@ +# TEST: single roundtrip + +echo '$1' +echo $ a diff --git a/test/tests/syntax b/test/tests/syntax new file mode 100644 index 0000000..55fb8ae --- /dev/null +++ b/test/tests/syntax @@ -0,0 +1,35 @@ +ls * ${x:-$(foo)} ${#foo}seven $x "foo\"${x}"${y} $((x + ${x})) `ls 1` $(bq1)x$(bq2) >foo 2>&1 </dev/null +mv /tmp/foo /tmp/bar & +foo | bar | baz | quux +foo | bar | baz | quux & +if /bin/true; then always; else never; fi +if [ -x some_file ]; then maybe; elif [ -d some_dir ]; then otherwise; fi +if something; then we are looking for the one-armed bandit; fi +while [ ! -x some_file ]; do try to make some_file; done +until [ -x some_file ]; do seriously make that file; done +while { ! a && ! b ; } ; do certainly not c; done +for x in a b c d; do something to those letters; done +for x; do something to those arguments implicitly; done +case "$1" in start) echo starting ;; stop) oh noes ;; *) blargh ;; esac +case "$1" in start) echo starting ;; stop) oh noes ;; *) blargh;; esac +function foo { echo $*; export x=$((x + 1)) } +function foo { echo $*; export x=$((x + 1)) } +foo() { echo $*; export x=$((x + 1)) ; } +this < + +scriptName="${0##*/}" + +declare -i DEFAULT_TIMEOUT=9 +declare -i DEFAULT_INTERVAL=1 +declare -i DEFAULT_DELAY=1 + +# Timeout. +declare -i timeout=DEFAULT_TIMEOUT +# Interval between checks if the process is still alive. +declare -i interval=DEFAULT_INTERVAL +# Delay between posting the SIGTERM signal and destroying the process by SIGKILL. +declare -i delay=DEFAULT_DELAY + +printUsage() { + cat < /dev/null & + +exec "${@}" + diff --git a/test/tests/weird_tilde.sh b/test/tests/weird_tilde.sh new file mode 100644 index 0000000..82949bc --- /dev/null +++ b/test/tests/weird_tilde.sh @@ -0,0 +1,11 @@ +case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac diff --git a/version.sh b/version.sh new file mode 100755 index 0000000..4982456 --- /dev/null +++ b/version.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' "') + +PYTHON_VERSION2=$(grep -e 'version=' setup.py | cut -d'=' -f2 | tr -d "',") + +[ "$PYTHON_VERSION" = "$PYTHON_VERSION2" ] && echo "$PYTHON_VERSION" && exit 0 + +echo "Version numbers don't match!" +echo " Python is '$PYTHON_VERSION' in pyproject.toml" +echo " Python is '$PYTHON_VERSION2' in setup.py" +exit 1