From e7a5030483e039971aad59db3fc238c3051bd565 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 25 Feb 2019 12:49:20 +0800 Subject: [PATCH 001/401] options: Do not set commandname in procargs We set commandname in procargs when we don't have to. This results in a duplicated output of arg0 when an error occurs. Reported-by: Olivier Duclos Signed-off-by: Herbert Xu --- src/options.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/options.c b/src/options.c index 6f381e6..a46c23b 100644 --- a/src/options.c +++ b/src/options.c @@ -159,7 +159,6 @@ procargs(int argc, char **argv) setinputfile(*xargv, 0); setarg0: arg0 = *xargv++; - commandname = arg0; } shellparam.p = xargv; From 86a841bb444ed1d9a09afb38cb818a21c04e1beb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 25 Feb 2019 14:00:31 +0800 Subject: [PATCH 002/401] expand: Fix double-decrement in argstr Due to a double decrement in argstr we may miss field separators at the end of a word in certain situations. Reported-by: Martijn Dekker Fixes: 3cd538634f71 ("expand: Do not reprocess data when...") Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index af9cac9..e57efa6 100644 --- a/src/expand.c +++ b/src/expand.c @@ -285,7 +285,7 @@ static char *argstr(char *p, int flag) q = stnputs(p, length, expdest); q[-1] &= end - 1; expdest = q - (flag & EXP_WORD ? end : 0); - newloc = expdest - (char *)stackblock() - end; + newloc = q - (char *)stackblock() - end; if (breakall && !inquotes && newloc > startloc) { recordregion(startloc, newloc, 0); } From 02a00569ba60e502f876c36d894ba0cc2d0682b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 3 Mar 2019 21:57:50 +0800 Subject: [PATCH 003/401] eval: Reset handler when entering a subshell As it is a subshell can execute code that is only meant for the parent shell when it executes a longjmp that is caught by something like evalcommand. This patch fixes it by resetting the handler when entering a subshell. Reported-by: Martijn Dekker Signed-off-by: Herbert Xu --- src/eval.c | 4 ++++ src/main.c | 11 ++++++++--- src/main.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/eval.c b/src/eval.c index 1aad31a..6ee2e1a 100644 --- a/src/eval.c +++ b/src/eval.c @@ -41,6 +41,7 @@ * Evaluate a command. */ +#include "main.h" #include "shell.h" #include "nodes.h" #include "syntax.h" @@ -492,6 +493,7 @@ evalsubshell(union node *n, int flags) if (backgnd) flags &=~ EV_TESTED; nofork: + reset_handler(); redirect(n->nredir.redirect, 0); evaltreenr(n->nredir.n, flags); /* never returns */ @@ -574,6 +576,7 @@ evalpipe(union node *n, int flags) } } if (forkshell(jp, lp->n, n->npipe.backgnd) == 0) { + reset_handler(); INTON; if (pip[1] >= 0) { close(pip[0]); @@ -630,6 +633,7 @@ evalbackcmd(union node *n, struct backcmd *result) sh_error("Pipe call failed"); jp = makejob(n, 1); if (forkshell(jp, n, FORK_NOJOB) == 0) { + reset_handler(); FORCEINTON; close(pip[0]); if (pip[1] != 1) { diff --git a/src/main.c b/src/main.c index 6b3a090..b2712cb 100644 --- a/src/main.c +++ b/src/main.c @@ -71,6 +71,7 @@ int *dash_errno; short profile_buf[16384]; extern int etext(); #endif +static struct jmploc main_handler; STATIC void read_profile(const char *); STATIC char *find_dot_file(char *); @@ -90,7 +91,6 @@ main(int argc, char **argv) { char *shinit; volatile int state; - struct jmploc jmploc; struct stackmark smark; int login; @@ -102,7 +102,7 @@ main(int argc, char **argv) monitor(4, etext, profile_buf, sizeof profile_buf, 50); #endif state = 0; - if (unlikely(setjmp(jmploc.loc))) { + if (unlikely(setjmp(main_handler.loc))) { int e; int s; @@ -137,7 +137,7 @@ main(int argc, char **argv) else goto state4; } - handler = &jmploc; + handler = &main_handler; #ifdef DEBUG opentrace(); trputs("Shell args: "); trargs(argv); @@ -353,3 +353,8 @@ exitcmd(int argc, char **argv) exraise(EXEXIT); /* NOTREACHED */ } + +void reset_handler(void) +{ + handler = &main_handler; +} diff --git a/src/main.h b/src/main.h index 19e4983..51f1604 100644 --- a/src/main.h +++ b/src/main.h @@ -52,3 +52,4 @@ extern int *dash_errno; void readcmdfile(char *); int dotcmd(int, char **); int exitcmd(int, char **); +void reset_handler(void); From b9f8565be338c172c3bd8a96d3fe3e3ab4775120 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 29 Mar 2019 13:49:59 +0800 Subject: [PATCH 004/401] parser: Fix old-style command substitution here-document crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Wed, Jul 25, 2018 at 12:38:27PM +0000, project-repo wrote: > Hi, > I am working on a project in which I use the honggfuzz fuzzer to fuzz open > source software and I decided to fuzz dash. In doing so I discovered a > NULL pointer dereference in src/redir.ch on line 305. Following is a > backtrace as supplied by the address sanitizer: > > AddressSanitizer:DEADLYSIGNAL > ================================================================= > ==39623==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000010 (pc 0x0000005768ed bp 0x7ffc00273df0 sp 0x7ffc00273c60 T0) > ==39623==The signal is caused by a READ memory access. > ==39623==Hint: address points to the zero page. > #0 0x5768ec in openhere /home/jfe/dash/src/redir.c:305:29 > #1 0x574d92 in openredirect /home/jfe/dash/src/redir.c:230:7 > #2 0x5737fe in redirect /home/jfe/dash/src/redir.c:121:11 > #3 0x576017 in redirectsafe /home/jfe/dash/src/redir.c:424:3 > #4 0x522326 in evalcommand /home/jfe/dash/src/eval.c:828:11 > #5 0x520010 in evaltree /home/jfe/dash/src/eval.c:288:12 > #6 0x5270da in evaltreenr /home/jfe/dash/src/eval.c:332:2 > #7 0x526f04 in evalbackcmd /home/jfe/dash/src/eval.c:640:3 > #8 0x539020 in expbackq /home/jfe/dash/src/expand.c:522:2 > #9 0x5332d7 in argstr /home/jfe/dash/src/expand.c:343:4 > #10 0x5322f7 in expandarg /home/jfe/dash/src/expand.c:196:2 > #11 0x528118 in fill_arglist /home/jfe/dash/src/eval.c:659:3 > #12 0x5213b6 in evalcommand /home/jfe/dash/src/eval.c:769:13 > #13 0x520010 in evaltree /home/jfe/dash/src/eval.c:288:12 > #14 0x554423 in cmdloop /home/jfe/dash/src/main.c:234:8 > #15 0x553bcc in main /home/jfe/dash/src/main.c:176:3 > #16 0x7f201c2b2a86 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21a86) > #17 0x41dfb9 in _start (/home/jfe/dash/src/dash+0x41dfb9) > > AddressSanitizer can not provide additional info. > SUMMARY: AddressSanitizer: SEGV /home/jfe/dash/src/redir.c:305:29 in openhere > ==39623==ABORTING > > This bug can be reproduced by running "dash < min" where min is þhe file > attached. I was able to reproduce this bug with the current git version > and the current debian version. > > cheers > project-repo > > < `< Fixes: 51e2d88d6e51 ("parser: Save/restore here-documents in...") Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 1f9e8ec..4bda42e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1451,9 +1451,9 @@ parsebackq: { if (readtoken() != TRP) synexpect(TRP); setinputstring(nullstr); - parseheredoc(); } + parseheredoc(); heredoclist = saveheredoclist; (*nlpp)->n = n; From c4f4ee8ecf85834811c252fc1df3892863572bbd Mon Sep 17 00:00:00 2001 From: Nikolai Merinov Date: Mon, 29 Apr 2019 19:13:37 +0500 Subject: [PATCH 005/401] expand: Fix trailing newlines processing in backquote expanding According to POSIX.1-2008 we should remove newlines only at the end of the substitution. Newlines-only substitions causes dash to remove newlines before beggining of the substitution. The following code: cat <2" instead of expected "12". This patch fixes trailing newlines processing in backquote expanding. Signed-off-by: Nikolai Merinov Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index e57efa6..4a5d75a 100644 --- a/src/expand.c +++ b/src/expand.c @@ -525,7 +525,7 @@ expbackq(union node *cmd, int flag) /* Eat all trailing newlines */ dest = expdest; - for (; dest > (char *)stackblock() && dest[-1] == '\n';) + for (; dest > ((char *)stackblock() + startloc) && dest[-1] == '\n';) STUNPUTC(dest); expdest = dest; From 7710a926b321879b84bf349b865b1dd4c52083f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 27 May 2019 13:39:37 +0800 Subject: [PATCH 006/401] parser: Only accept single-digit parameter expansion outside of braces On Thu, Apr 25, 2019 at 01:39:52AM +0000, Michael Orlitzky wrote: > The POSIX spec says, > > The parameter name or symbol can be enclosed in braces, which are > optional except for positional parameters with more than one digit or > when parameter is a name and is followed by a character that could be > interpreted as part of the name. > > However, dash seems to diverge from that behavior when we get to $10: > > $ cat test.sh > echo $10 > > $ dash ./test.sh one two three four five six seven eight nine ten > ten > > $ bash ./test.sh one two three four five six seven eight nine ten > one0 This patch should fix the problem. Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 4bda42e..b318b08 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1268,7 +1268,7 @@ parsesub: { do { STPUTC(c, out); c = pgetc_eatbnl(); - } while (is_digit(c)); + } while (!subtype && is_digit(c)); } else if (c != '}') { int cc = c; From f30bd155ccbc3f084bbf03d56f9cc43f4b02af2a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 10 Oct 2019 16:30:42 +0000 Subject: [PATCH 007/401] shell: delete AC_PROG_YACC Signed-off-by: Herbert Xu --- configure.ac | 1 - 1 file changed, 1 deletion(-) diff --git a/configure.ac b/configure.ac index 036730d..5dab5aa 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,6 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) dnl Checks for programs. AC_PROG_CC AC_USE_SYSTEM_EXTENSIONS -AC_PROG_YACC AC_MSG_CHECKING([for build system compiler]) if test "$cross_compiling" = yes; then From f613f9573f044220453069ee81ed8706c6e18225 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 Jan 2020 18:21:59 +0800 Subject: [PATCH 008/401] redir: Clear saved redirections in subshell When we enter a subshell we need to drop the saved redirections as otherwise a subsequent unwindredir could produce incorrect results. This patch does this by simply clearing redirlist. While we could actually free the memory underneath for subshells it isn't really worth the trouble for now. In order to ensure that this is done in every place where we enter a subshell, this patch adds a new mkinit hook called forkreset. The calls closescript, clear_traps and reset_handler are also added to the forkreset hook. This fixes a bug where the first two functions weren't called if we enter a subshell without forking. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/eval.c | 12 ++++++------ src/init.h | 1 + src/input.c | 26 +++++++++----------------- src/input.h | 1 - src/jobs.c | 4 ++-- src/main.c | 9 ++++++--- src/main.h | 1 - src/memalloc.h | 1 + src/mkinit.c | 6 ++++++ src/redir.c | 4 ++++ src/redir.h | 1 - src/trap.c | 44 +++++++++++++++++++------------------------- src/trap.h | 1 - 13 files changed, 54 insertions(+), 57 deletions(-) diff --git a/src/eval.c b/src/eval.c index 6ee2e1a..1b5d61d 100644 --- a/src/eval.c +++ b/src/eval.c @@ -41,6 +41,7 @@ * Evaluate a command. */ +#include "init.h" #include "main.h" #include "shell.h" #include "nodes.h" @@ -483,17 +484,18 @@ evalsubshell(union node *n, int flags) lineno -= funcline - 1; expredir(n->nredir.redirect); - if (!backgnd && flags & EV_EXIT && !have_traps()) - goto nofork; INTOFF; + if (!backgnd && flags & EV_EXIT && !have_traps()) { + forkreset(); + goto nofork; + } jp = makejob(n, 1); if (forkshell(jp, n, backgnd) == 0) { - INTON; flags |= EV_EXIT; if (backgnd) flags &=~ EV_TESTED; nofork: - reset_handler(); + INTON; redirect(n->nredir.redirect, 0); evaltreenr(n->nredir.n, flags); /* never returns */ @@ -576,7 +578,6 @@ evalpipe(union node *n, int flags) } } if (forkshell(jp, lp->n, n->npipe.backgnd) == 0) { - reset_handler(); INTON; if (pip[1] >= 0) { close(pip[0]); @@ -633,7 +634,6 @@ evalbackcmd(union node *n, struct backcmd *result) sh_error("Pipe call failed"); jp = makejob(n, 1); if (forkshell(jp, n, FORK_NOJOB) == 0) { - reset_handler(); FORCEINTON; close(pip[0]); if (pip[1] != 1) { diff --git a/src/init.h b/src/init.h index 49791a0..d56fb28 100644 --- a/src/init.h +++ b/src/init.h @@ -36,4 +36,5 @@ void init(void); void exitreset(void); +void forkreset(void); void reset(void); diff --git a/src/input.c b/src/input.c index ae0c4c8..177fd0a 100644 --- a/src/input.c +++ b/src/input.c @@ -78,6 +78,7 @@ static int preadbuffer(void); #ifdef mkinit INCLUDE +INCLUDE INCLUDE "input.h" INCLUDE "error.h" @@ -91,6 +92,14 @@ RESET { basepf.lleft = basepf.nleft = 0; popallfiles(); } + +FORKRESET { + popallfiles(); + if (parsefile->fd > 0) { + close(parsefile->fd); + parsefile->fd = 0; + } +} #endif @@ -495,20 +504,3 @@ popallfiles(void) { unwindfiles(&basepf); } - - - -/* - * Close the file(s) that the shell is reading commands from. Called - * after a fork is done. - */ - -void -closescript(void) -{ - popallfiles(); - if (parsefile->fd > 0) { - close(parsefile->fd); - parsefile->fd = 0; - } -} diff --git a/src/input.h b/src/input.h index a9c0517..8acc6e9 100644 --- a/src/input.h +++ b/src/input.h @@ -99,4 +99,3 @@ void setinputstring(char *); void popfile(void); void unwindfiles(struct parsefile *); void popallfiles(void); -void closescript(void); diff --git a/src/jobs.c b/src/jobs.c index 26a6248..f377d8c 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -55,6 +55,7 @@ #endif #include "exec.h" #include "eval.h" +#include "init.h" #include "redir.h" #include "show.h" #include "main.h" @@ -857,8 +858,7 @@ static void forkchild(struct job *jp, union node *n, int mode) if (!lvforked) { shlvl++; - closescript(); - clear_traps(); + forkreset(); #if JOBS /* do job control only in root shell */ diff --git a/src/main.c b/src/main.c index b2712cb..36431fc 100644 --- a/src/main.c +++ b/src/main.c @@ -71,7 +71,7 @@ int *dash_errno; short profile_buf[16384]; extern int etext(); #endif -static struct jmploc main_handler; +MKINIT struct jmploc main_handler; STATIC void read_profile(const char *); STATIC char *find_dot_file(char *); @@ -354,7 +354,10 @@ exitcmd(int argc, char **argv) /* NOTREACHED */ } -void reset_handler(void) -{ +#ifdef mkinit +INCLUDE "error.h" + +FORKRESET { handler = &main_handler; } +#endif diff --git a/src/main.h b/src/main.h index 51f1604..19e4983 100644 --- a/src/main.h +++ b/src/main.h @@ -52,4 +52,3 @@ extern int *dash_errno; void readcmdfile(char *); int dotcmd(int, char **); int exitcmd(int, char **); -void reset_handler(void); diff --git a/src/memalloc.h b/src/memalloc.h index b9c63da..b9adf76 100644 --- a/src/memalloc.h +++ b/src/memalloc.h @@ -35,6 +35,7 @@ */ #include +#include struct stackmark { struct stack_block *stackp; diff --git a/src/mkinit.c b/src/mkinit.c index 5bca9ee..9025862 100644 --- a/src/mkinit.c +++ b/src/mkinit.c @@ -113,6 +113,11 @@ char exitreset[] = "\ * but prior to exitshell. \n\ */\n"; +char forkreset[] = "\ +/*\n\ + * This routine is called when we enter a subshell.\n\ + */\n"; + char reset[] = "\ /*\n\ * This routine is called when an error or an interrupt occurs in an\n\ @@ -123,6 +128,7 @@ char reset[] = "\ struct event event[] = { {"INIT", "init", init}, {"EXITRESET", "exitreset", exitreset}, + {"FORKRESET", "forkreset", forkreset}, {"RESET", "reset", reset}, {NULL, NULL} }; diff --git a/src/redir.c b/src/redir.c index 6c81dd0..895140c 100644 --- a/src/redir.c +++ b/src/redir.c @@ -401,6 +401,10 @@ EXITRESET { unwindredir(0); } +FORKRESET { + redirlist = NULL; +} + #endif diff --git a/src/redir.h b/src/redir.h index 8e56995..1cf2761 100644 --- a/src/redir.h +++ b/src/redir.h @@ -45,7 +45,6 @@ struct redirtab; union node; void redirect(union node *, int); void popredir(int); -void clearredir(void); int savefd(int, int); int redirectsafe(union node *, int); void unwindredir(struct redirtab *stop); diff --git a/src/trap.c b/src/trap.c index 58a7c60..82e7ece 100644 --- a/src/trap.c +++ b/src/trap.c @@ -66,7 +66,7 @@ /* trap handler commands */ -static char *trap[NSIG]; +MKINIT char *trap[NSIG]; /* number of non-null traps */ int trapcnt; /* current value of signal */ @@ -83,11 +83,29 @@ extern char *signal_names[]; static int decode_signum(const char *); #ifdef mkinit +INCLUDE "memalloc.h" INCLUDE "trap.h" + INIT { sigmode[SIGCHLD - 1] = S_DFL; setsignal(SIGCHLD); } + +FORKRESET { + char **tp; + + INTOFF; + for (tp = trap ; tp < &trap[NSIG] ; tp++) { + if (*tp && **tp) { /* trap not NULL or SIG_IGN */ + ckfree(*tp); + *tp = NULL; + if (tp != &trap[0]) + setsignal(tp - trap); + } + } + trapcnt = 0; + INTON; +} #endif /* @@ -150,30 +168,6 @@ trapcmd(int argc, char **argv) -/* - * Clear traps on a fork. - */ - -void -clear_traps(void) -{ - char **tp; - - INTOFF; - for (tp = trap ; tp < &trap[NSIG] ; tp++) { - if (*tp && **tp) { /* trap not NULL or SIG_IGN */ - ckfree(*tp); - *tp = NULL; - if (tp != &trap[0]) - setsignal(tp - trap); - } - } - trapcnt = 0; - INTON; -} - - - /* * Set the signal handler for the specified signal. The routine figures * out what it should be set to. diff --git a/src/trap.h b/src/trap.h index 5fd65af..4c455a8 100644 --- a/src/trap.h +++ b/src/trap.h @@ -42,7 +42,6 @@ extern volatile sig_atomic_t pending_sig; extern int gotsigchld; int trapcmd(int, char **); -void clear_traps(void); void setsignal(int); void ignoresig(int); void onsig(int); From fd5311ace04bcc2fea0a330c95a3423931ec45fa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jan 2020 17:46:48 +0800 Subject: [PATCH 009/401] builtin: Fix seconds part of times(1) The seconds part of the times(1) built-in is wrong as it does not exclude the minutes part of the result. This patch fixes it. This problem was first noted by Michael Greenberg who also sent a similar patch. Reported-by: Michael Greenberg Signed-off-by: Herbert Xu --- src/bltin/times.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/bltin/times.c b/src/bltin/times.c index 8eabc1f..1166a68 100644 --- a/src/bltin/times.c +++ b/src/bltin/times.c @@ -15,16 +15,28 @@ int timescmd() { struct tms buf; long int clk_tck = sysconf(_SC_CLK_TCK); + int mutime, mstime, mcutime, mcstime; + double utime, stime, cutime, cstime; times(&buf); - printf("%dm%fs %dm%fs\n%dm%fs %dm%fs\n", - (int) (buf.tms_utime / clk_tck / 60), - ((double) buf.tms_utime) / clk_tck, - (int) (buf.tms_stime / clk_tck / 60), - ((double) buf.tms_stime) / clk_tck, - (int) (buf.tms_cutime / clk_tck / 60), - ((double) buf.tms_cutime) / clk_tck, - (int) (buf.tms_cstime / clk_tck / 60), - ((double) buf.tms_cstime) / clk_tck); + + utime = (double)buf.tms_utime / clk_tck; + mutime = utime / 60; + utime -= mutime * 60.0; + + stime = (double)buf.tms_stime / clk_tck; + mstime = stime / 60; + stime -= mstime * 60.0; + + cutime = (double)buf.tms_cutime / clk_tck; + mcutime = cutime / 60; + cutime -= mcutime * 60.0; + + cstime = (double)buf.tms_cstime / clk_tck; + mcstime = cstime / 60; + cstime -= mcstime * 60.0; + + printf("%dm%fs %dm%fs\n%dm%fs %dm%fs\n", mutime, utime, mstime, stime, + mcutime, cutime, mcstime, cstime); return 0; } From 42a0e93a91f88f25f7a7c6a69809e1efe14fb837 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 19 Feb 2020 11:30:08 +0100 Subject: [PATCH 010/401] jobs: Rename DOWAIT_NORMAL to DOWAIT_NONBLOCK To make it clearer what it is doing: nonblocking wait() Signed-off-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/jobs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index f377d8c..ba77c00 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -78,7 +78,7 @@ #define CUR_STOPPED 0 /* mode flags for dowait */ -#define DOWAIT_NORMAL 0 +#define DOWAIT_NONBLOCK 0 #define DOWAIT_BLOCK 1 #define DOWAIT_WAITCMD 2 @@ -558,7 +558,7 @@ showjobs(struct output *out, int mode) TRACE(("showjobs(%x) called\n", mode)); /* If not even one one job changed, there is nothing to do */ - dowait(DOWAIT_NORMAL, NULL); + dowait(DOWAIT_NONBLOCK, NULL); for (jp = curjob; jp; jp = jp->prev_job) { if (!(mode & SHOW_CHANGED) || jp->changed) @@ -1013,7 +1013,7 @@ waitforjob(struct job *jp) int st; TRACE(("waitforjob(%%%d) called\n", jp ? jobno(jp) : 0)); - dowait(jp ? DOWAIT_BLOCK : DOWAIT_NORMAL, jp); + dowait(jp ? DOWAIT_BLOCK : DOWAIT_NONBLOCK, jp); if (!jp) return exitstatus; @@ -1123,7 +1123,7 @@ static int waitone(int block, struct job *job) static int dowait(int block, struct job *jp) { - int pid = block == DOWAIT_NORMAL ? gotsigchld : 1; + int pid = block == DOWAIT_NONBLOCK ? gotsigchld : 1; while (jp ? jp->state == JOBRUNNING : pid > 0) { if (!jp) From ecd82629d6115036b1f89410a0964d251527c4ca Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 19 Feb 2020 17:39:13 +0100 Subject: [PATCH 011/401] var: Remove poplocalvars() always-zero argument, make it static Signed-off-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/var.c | 24 ++++-------------------- src/var.h | 1 - 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/src/var.c b/src/var.c index 0d7e1db..b146018 100644 --- a/src/var.c +++ b/src/var.c @@ -504,8 +504,8 @@ void mklocal(char *name, int flags) * Interrupts must be off. */ -void -poplocalvars(int keep) +static void +poplocalvars(void) { struct localvar_list *ll; struct localvar *lvp, *next; @@ -522,23 +522,7 @@ poplocalvars(int keep) next = lvp->next; vp = lvp->vp; TRACE(("poplocalvar %s\n", vp ? vp->text : "-")); - if (keep) { - int bits = VSTRFIXED; - - if (lvp->flags != VUNSET) { - if (vp->text == lvp->text) - bits |= VTEXTFIXED; - else if (!(lvp->flags & (VTEXTFIXED|VSTACK))) - ckfree(lvp->text); - } - - vp->flags &= ~bits; - vp->flags |= (lvp->flags & bits); - - if ((vp->flags & - (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET) - unsetvar(vp->text); - } else if (vp == NULL) { /* $- saved */ + if (vp == NULL) { /* $- saved */ memcpy(optlist, lvp->text, sizeof(optlist)); ckfree(lvp->text); optschanged(); @@ -586,7 +570,7 @@ struct localvar_list *pushlocalvars(int push) void unwindlocalvars(struct localvar_list *stop) { while (localvar_stack != stop) - poplocalvars(0); + poplocalvars(); } diff --git a/src/var.h b/src/var.h index cd0477f..aa7575a 100644 --- a/src/var.h +++ b/src/var.h @@ -148,7 +148,6 @@ int exportcmd(int, char **); int localcmd(int, char **); void mklocal(char *name, int flags); struct localvar_list *pushlocalvars(int push); -void poplocalvars(int); void unwindlocalvars(struct localvar_list *stop); int unsetcmd(int, char **); void unsetvar(const char *); From 2a4f8f94a72413997ff98800fc6bf6bc028290a5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Apr 2020 21:03:09 +1000 Subject: [PATCH 012/401] jobs: Fix infinite loop in waitproc After we changed the resetting of gotsigchld so that it is only done if jp is NULL, we can now get an infinite loop in waitproc if gotsigchld is set but there is no outstanding child because everything had been waited for previously without gotsigchld being zeroed. This patch fixes it by always zeroing gotsigchld as we did before. The bug that the previous patch was trying to fix is now resolved by switching the blocking mode to DOWAIT_NORMAL after the specified job has been completed so that we really do wait for all outstanding dead children. Reported-by: Harald van Dijk Fixes: 6c691b3e5099 ("jobs: Only clear gotsigchld when waiting...") Signed-off-by: Herbert Xu --- src/jobs.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index ba77c00..a9e6524 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1123,15 +1123,28 @@ static int waitone(int block, struct job *job) static int dowait(int block, struct job *jp) { - int pid = block == DOWAIT_NONBLOCK ? gotsigchld : 1; + int gotchld = *(volatile int *)&gotsigchld; + int rpid; + int pid; + + if (jp && jp->state != JOBRUNNING) + block = DOWAIT_NONBLOCK; + + if (block == DOWAIT_NONBLOCK && !gotchld) + return 1; - while (jp ? jp->state == JOBRUNNING : pid > 0) { - if (!jp) - gotsigchld = 0; + rpid = 1; + + do { + gotsigchld = 0; pid = waitone(block, jp); - } + rpid &= !!pid; - return pid; + if (!pid || (jp && jp->state != JOBRUNNING)) + block = DOWAIT_NONBLOCK; + } while (pid >= 0); + + return rpid; } /* @@ -1163,7 +1176,10 @@ waitproc(int block, int *status) #endif do { - err = wait3(status, flags, NULL); + do + err = wait3(status, flags, NULL); + while (err < 0 && errno == EINTR); + if (err || (err = -!block)) break; @@ -1173,8 +1189,6 @@ waitproc(int block, int *status) sigsuspend(&oldmask); sigclearmask(); - - err = 0; } while (gotsigchld); return err; From a256b373624d3c6c6f38bce03b58cc38fab7e1e2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Apr 2020 01:15:26 +1000 Subject: [PATCH 013/401] parser: Fix handling of empty aliases Dash was incorrectly handling empty aliases. When attempting to use an empty alias with nothing else, I'm (incorrectly) prompted for more input: ``` $ alias empty='' $ empty > ``` Other shells (e.g., bash, yash) correctly handle the lone, empty alias as an empty command: ``` $ alias empty='' $ empty $ ``` The problem here is that we incorrectly enter the loop eating TNLs in readtoken(). This patch fixes it by setting checkkwd correctly. Reported-by: Michael Greenberg Signed-off-by: Herbert Xu --- src/parser.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/parser.c b/src/parser.c index b318b08..5c9e9a0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -115,7 +115,6 @@ STATIC union node *simplecmd(void); STATIC union node *makename(void); STATIC void parsefname(void); STATIC void parseheredoc(void); -STATIC int peektoken(void); STATIC int readtoken(void); STATIC int xxreadtoken(void); STATIC int pgetc_eatbnl(); @@ -161,21 +160,23 @@ parsecmd(int interact) STATIC union node * list(int nlflag) { + int chknl = nlflag & 1 ? 0 : CHKNL; union node *n1, *n2, *n3; int tok; n1 = NULL; for (;;) { - switch (readtoken()) { + checkkwd = chknl | CHKKWD | CHKALIAS; + tok = readtoken(); + switch (tok) { case TNL: - if (!(nlflag & 1)) - break; parseheredoc(); return n1; case TEOF: - if (!n1 && (nlflag & 1)) + if (!n1 && !chknl) n1 = NEOF; +out_eof: parseheredoc(); tokpushback++; lasttoken = TEOF; @@ -183,8 +184,7 @@ list(int nlflag) } tokpushback++; - checkkwd = CHKNL | CHKKWD | CHKALIAS; - if (nlflag == 2 && tokendlist[peektoken()]) + if (nlflag == 2 && tokendlist[tok]) return n1; nlflag |= 2; @@ -214,15 +214,16 @@ list(int nlflag) n1 = n3; } switch (tok) { - case TNL: case TEOF: + goto out_eof; + case TNL: tokpushback++; /* fall through */ case TBACKGND: case TSEMI: break; default: - if ((nlflag & 1)) + if (!chknl) synexpect(-1); tokpushback++; return n1; @@ -685,16 +686,6 @@ parseheredoc(void) } } -STATIC int -peektoken(void) -{ - int t; - - t = readtoken(); - tokpushback++; - return (t); -} - STATIC int readtoken(void) { From 50fc8edbe2532b573f2edb727861e3649b9dafef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Apr 2020 16:17:58 +1000 Subject: [PATCH 014/401] parser: Catch errors in expandstr On Fri, Dec 13, 2019 at 02:51:34PM +0000, Simon Ser wrote: > Just noticed another dash bug: when setting invalid PS1 values dash > enters an infinite loop. > > For instance, setting PS1='$(' makes dash print many of these: > > dash: 1: Syntax error: end of file unexpected (expecting ")") > > It would be nice to fallback to the default PS1 value on error. This patch fixes it by using the literal value of PS1 should an error occur during expansion. On Wed, Feb 26, 2020 at 09:12:04PM +0000, Ron Yorston wrote: > > There's another case that should be handled. PS1='`xxx(`' causes the > shell to exit because the old-style backquote leaves an additional file > on the stack. Ron's change has been folded into this patch. Reported-by: Simon Ser Reported-by: Ron Yorston Signed-off-by: Herbert Xu --- src/parser.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/parser.c b/src/parser.c index 5c9e9a0..9c9a7dc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1562,28 +1562,46 @@ setprompt(int which) const char * expandstr(const char *ps) { + struct parsefile *volatile file_stop; + struct jmploc *volatile savehandler; + const char *volatile result; + volatile int saveprompt; + struct jmploc jmploc; union node n; - int saveprompt; + int err; + + file_stop = parsefile; /* XXX Fix (char *) cast. */ setinputstring((char *)ps); saveprompt = doprompt; doprompt = 0; + result = ps; + savehandler = handler; + if (unlikely(err = setjmp(jmploc.loc))) + goto out; + handler = &jmploc; readtoken1(pgetc_eatbnl(), DQSYNTAX, FAKEEOFMARK, 0); - doprompt = saveprompt; - - popfile(); - n.narg.type = NARG; n.narg.next = NULL; n.narg.text = wordtext; n.narg.backquote = backquotelist; expandarg(&n, NULL, EXP_QUOTED); - return stackblock(); + result = stackblock(); + +out: + handler = savehandler; + if (err && exception != EXERROR) + longjmp(handler->loc, 1); + + doprompt = saveprompt; + unwindfiles(file_stop); + + return result; } /* From 07951cfffb218291e7574b874660eab652cb54f1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Apr 2020 00:19:59 +1000 Subject: [PATCH 015/401] parser: Fix alias expansion after heredoc or newlines This script should print OK: alias a="case x in " b=x a b) echo BAD;; esac alias BEGIN={ END=} BEGIN cat <<- EOF > /dev/null $(:) EOF END : <<- EOF && $(:) EOF BEGIN echo OK END However, because the value of checkkwd is either zeroed when it shouldn't, or isn't zeroed when it should, dash currently gets it wrong in every case. This patch fixes it by saving checkkwd and zeroing it where needed. Suggested-by: Harald van Dijk Reported-by: Harald van Dijk Reported-by: Martijn Dekker Signed-off-by: Herbert Xu --- src/parser.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 9c9a7dc..3131045 100644 --- a/src/parser.c +++ b/src/parser.c @@ -704,10 +704,14 @@ readtoken(void) if (kwd & CHKNL) { while (t == TNL) { parseheredoc(); + checkkwd = 0; t = xxreadtoken(); } } + kwd |= checkkwd; + checkkwd = 0; + if (t != TWORD || quoteflag) { goto out; } @@ -725,7 +729,7 @@ readtoken(void) } } - if (checkkwd & CHKALIAS) { + if (kwd & CHKALIAS) { struct alias *ap; if ((ap = lookupalias(wordtext, 1)) != NULL) { if (*ap->val) { @@ -735,7 +739,6 @@ readtoken(void) } } out: - checkkwd = 0; #ifdef DEBUG if (!alreadyseen) TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); From 24bc10e6496bd69df275838c2722ddce0cff0e46 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 29 Apr 2020 08:29:53 +0200 Subject: [PATCH 016/401] expand: Remove unused expandmeta() flag parameter Signed-off-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/expand.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/expand.c b/src/expand.c index 4a5d75a..985cd70 100644 --- a/src/expand.c +++ b/src/expand.c @@ -118,7 +118,7 @@ STATIC char *evalvar(char *, int); static size_t strtodest(const char *p, int flags); static size_t memtodest(const char *p, size_t len, int flags); STATIC ssize_t varvalue(char *, int, int, int); -STATIC void expandmeta(struct strlist *, int); +STATIC void expandmeta(struct strlist *); #ifdef HAVE_GLOB STATIC void addglob(const glob_t *); #else @@ -205,7 +205,7 @@ expandarg(union node *arg, struct arglist *arglist, int flag) ifsbreakup(p, -1, &exparg); *exparg.lastp = NULL; exparg.lastp = &exparg.list; - expandmeta(exparg.list, flag); + expandmeta(exparg.list); } else { sp = (struct strlist *)stalloc(sizeof (struct strlist)); sp->text = p; @@ -1155,9 +1155,7 @@ void ifsfree(void) #ifdef HAVE_GLOB STATIC void -expandmeta(str, flag) - struct strlist *str; - int flag; +expandmeta(struct strlist *str) { /* TODO - EXP_REDIR */ @@ -1221,7 +1219,7 @@ STATIC unsigned expdir_max; STATIC void -expandmeta(struct strlist *str, int flag) +expandmeta(struct strlist *str) { static const char metachars[] = { '*', '?', '[', 0 From 2b6a5a30eeae293ca50d849bc1dae15f27044f99 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 29 Apr 2020 20:04:21 -0700 Subject: [PATCH 017/401] shell: mktokens relative TMPDIR The mktokens script fails when /tmp isn't writable (e.g., when building in a sandbox with a different TMPDIR). Replace absolute references to /tmp to relative references to TMPDIR. If TMPDIR is unset or null, default to /tmp. The mkbuiltins script was already hardened to work relative to TMPDIR, also defaulting to /tmp. v2 ensures that TMPDIR is quoted. v3 adds an extra quotation that prevents extra pathname expansions. Signed-off-by: Michael Greenberg Signed-off-by: Herbert Xu --- src/mktokens | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/mktokens b/src/mktokens index cd52241..78055be 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,7 +37,9 @@ # token marks the end of a list. The third column is the name to print in # error messages. -cat > /tmp/ka$$ <<\! +: "${TMPDIR:=/tmp}" + +cat > "${TMPDIR}"/ka$$ <<\! TEOF 1 end of file TNL 0 newline TSEMI 0 ";" @@ -68,28 +70,28 @@ TWHILE 0 "while" TBEGIN 0 "{" TEND 1 "}" ! -nl=`wc -l /tmp/ka$$` +nl=`wc -l "${TMPDIR}"/ka$$` exec > token.h -awk '{print "#define " $1 " " NR-1}' /tmp/ka$$ +awk '{print "#define " $1 " " NR-1}' "${TMPDIR}"/ka$$ exec > token_vars.h echo ' /* Array indicating which tokens mark the end of a list */ static const char tokendlist[] = {' -awk '{print "\t" $2 ","}' /tmp/ka$$ +awk '{print "\t" $2 ","}' "${TMPDIR}"/ka$$ echo '}; static const char *const tokname[] = {' sed -e 's/"/\\"/g' \ -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/ "\1",/' \ - /tmp/ka$$ + "${TMPDIR}"/ka$$ echo '}; ' -sed 's/"//g' /tmp/ka$$ | awk ' +sed 's/"//g' "${TMPDIR}"/ka$$ | awk ' /TNOT/{print "#define KWDOFFSET " NR-1; print ""; print "static const char *const parsekwd[] = {"} /TNOT/,/neverfound/{if (last) print " \"" last "\","; last = $3} END{print " \"" last "\"\n};"}' -rm /tmp/ka$$ +rm "${TMPDIR}"/ka$$ From 8e559f5f8c1a928f6dae8543640886758469683c Mon Sep 17 00:00:00 2001 From: Jeroen Roovers Date: Sat, 2 May 2020 14:37:06 -0400 Subject: [PATCH 018/401] input: Fix compiling against libedit with -fno-common With -fno-common, which will be enabled by default in GCC 10, we see this error: ld: input.o:(.bss+0x0): multiple definition of `el'; histedit.o:(.bss+0x8): first defined here To fix this, simply remove the definition as it is not needed. Signed-off-by: Jeroen Roovers Signed-off-by: Mike Gilbert Signed-off-by: Herbert Xu --- src/input.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/input.c b/src/input.c index 177fd0a..4a9b267 100644 --- a/src/input.c +++ b/src/input.c @@ -67,10 +67,6 @@ MKINIT char basebuf[IBUFSIZ]; /* buffer for top level input file */ struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ -#ifndef SMALL -EditLine *el; /* cookie for editline package */ -#endif - STATIC void pushfile(void); static int preadfd(void); static void setinputfd(int fd, int push); From 3e3e7af1a49273a5e49d50565b3b079a2ab19142 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 7 May 2020 23:42:12 +1000 Subject: [PATCH 019/401] shell: Always use explicit large file API There are some remaining stat/readdir calls in dash that may lead to spurious EOVERFLOW errors on 32-bit platforms. This patch changes them (as well as open(2)) to use the explicit large file API. Reported-by: Tatsuki Sugiura Signed-off-by: Herbert Xu --- configure.ac | 5 +++++ src/bltin/test.c | 26 +++++++++++++------------- src/cd.c | 4 ++-- src/expand.c | 4 ++-- src/input.c | 2 +- src/jobs.c | 4 ++-- src/main.c | 4 ++-- src/var.c | 4 ++-- 8 files changed, 29 insertions(+), 24 deletions(-) diff --git a/configure.ac b/configure.ac index 5dab5aa..dbd97d8 100644 --- a/configure.ac +++ b/configure.ac @@ -144,8 +144,13 @@ AC_CHECK_FUNC(stat64,, [ AC_DEFINE(fstat64, fstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(lstat64, lstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(stat64, stat, [64-bit operations are the same as 32-bit]) + AC_DEFINE(readdir64, readdir, + [64-bit operations are the same as 32-bit]) + AC_DEFINE(dirent64, dirent, + [64-bit operations are the same as 32-bit]) ]) +dnl OS X apparently has stat64 but not open64. AC_CHECK_FUNC(open64,, [ AC_DEFINE(open64, open, [64-bit operations are the same as 32-bit]) ]) diff --git a/src/bltin/test.c b/src/bltin/test.c index b7188df..c7fc479 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -473,17 +473,17 @@ static int isoperand(char **tp) static int newerf (const char *f1, const char *f2) { - struct stat b1, b2; + struct stat64 b1, b2; #ifdef HAVE_ST_MTIM - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && + return (stat64(f1, &b1) == 0 && + stat64(f2, &b2) == 0 && ( b1.st_mtim.tv_sec > b2.st_mtim.tv_sec || (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec > b2.st_mtim.tv_nsec ))) ); #else - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && + return (stat64(f1, &b1) == 0 && + stat64(f2, &b2) == 0 && b1.st_mtime > b2.st_mtime); #endif } @@ -491,17 +491,17 @@ newerf (const char *f1, const char *f2) static int olderf (const char *f1, const char *f2) { - struct stat b1, b2; + struct stat64 b1, b2; #ifdef HAVE_ST_MTIM - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && + return (stat64(f1, &b1) == 0 && + stat64(f2, &b2) == 0 && (b1.st_mtim.tv_sec < b2.st_mtim.tv_sec || (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec < b2.st_mtim.tv_nsec ))) ); #else - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && + return (stat64(f1, &b1) == 0 && + stat64(f2, &b2) == 0 && b1.st_mtime < b2.st_mtime); #endif } @@ -509,10 +509,10 @@ olderf (const char *f1, const char *f2) static int equalf (const char *f1, const char *f2) { - struct stat b1, b2; + struct stat64 b1, b2; - return (stat (f1, &b1) == 0 && - stat (f2, &b2) == 0 && + return (stat64(f1, &b1) == 0 && + stat64(f2, &b2) == 0 && b1.st_dev == b2.st_dev && b1.st_ino == b2.st_ino); } diff --git a/src/cd.c b/src/cd.c index b6742af..1ef1dc5 100644 --- a/src/cd.c +++ b/src/cd.c @@ -96,7 +96,7 @@ cdcmd(int argc, char **argv) const char *path; const char *p; char c; - struct stat statb; + struct stat64 statb; int flags; int len; @@ -132,7 +132,7 @@ cdcmd(int argc, char **argv) c = *p; p = stalloc(len); - if (stat(p, &statb) >= 0 && S_ISDIR(statb.st_mode)) { + if (stat64(p, &statb) >= 0 && S_ISDIR(statb.st_mode)) { if (c && c != ':') flags |= CD_PRINT; docd: diff --git a/src/expand.c b/src/expand.c index 985cd70..1730670 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1284,7 +1284,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) int metaflag; struct stat64 statb; DIR *dirp; - struct dirent *dp; + struct dirent64 *dp; int atend; int matchdot; int esc; @@ -1361,7 +1361,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) p++; if (*p == '.') matchdot++; - while (! int_pending() && (dp = readdir(dirp)) != NULL) { + while (! int_pending() && (dp = readdir64(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; if (pmatch(start, dp->d_name)) { diff --git a/src/input.c b/src/input.c index 4a9b267..17544e7 100644 --- a/src/input.c +++ b/src/input.c @@ -393,7 +393,7 @@ setinputfile(const char *fname, int flags) int fd; INTOFF; - if ((fd = open(fname, O_RDONLY)) < 0) { + if ((fd = open64(fname, O_RDONLY)) < 0) { if (flags & INPUT_NOFILE_OK) goto out; exitstatus = 127; diff --git a/src/jobs.c b/src/jobs.c index a9e6524..f65435d 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -196,7 +196,7 @@ setjobctl(int on) return; if (on) { int ofd; - ofd = fd = open(_PATH_TTY, O_RDWR); + ofd = fd = open64(_PATH_TTY, O_RDWR); if (fd < 0) { fd += 3; while (!isatty(fd)) @@ -887,7 +887,7 @@ static void forkchild(struct job *jp, union node *n, int mode) ignoresig(SIGQUIT); if (jp->nprocs == 0) { close(0); - if (open(_PATH_DEVNULL, O_RDONLY) != 0) + if (open64(_PATH_DEVNULL, O_RDONLY) != 0) sh_error("Can't open %s", _PATH_DEVNULL); } } diff --git a/src/main.c b/src/main.c index 36431fc..7a28534 100644 --- a/src/main.c +++ b/src/main.c @@ -298,7 +298,7 @@ find_dot_file(char *basename) { char *fullname; const char *path = pathval(); - struct stat statb; + struct stat64 statb; int len; /* don't try this for absolute or relative paths */ @@ -308,7 +308,7 @@ find_dot_file(char *basename) while ((len = padvance(&path, basename)) >= 0) { fullname = stackblock(); if ((!pathopt || *pathopt == 'f') && - !stat(fullname, &statb) && S_ISREG(statb.st_mode)) { + !stat64(fullname, &statb) && S_ISREG(statb.st_mode)) { /* This will be freed by the caller. */ return stalloc(len); } diff --git a/src/var.c b/src/var.c index b146018..ef9c2bd 100644 --- a/src/var.c +++ b/src/var.c @@ -125,7 +125,7 @@ INIT { char **envp; static char ppid[32] = "PPID="; const char *p; - struct stat st1, st2; + struct stat64 st1, st2; initvar(); for (envp = environ ; *envp ; envp++) { @@ -143,7 +143,7 @@ INIT { p = lookupvar("PWD"); if (p) - if (*p != '/' || stat(p, &st1) || stat(".", &st2) || + if (*p != '/' || stat64(p, &st1) || stat64(".", &st2) || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) p = 0; setpwd(p, 0); From ec7fc7c7e33b06f31d21bbc03bf81290fd0c7742 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 17 May 2020 23:36:25 +1000 Subject: [PATCH 020/401] parser: Save and restore heredoclist in expandstr On Sun, May 17, 2020 at 01:19:28PM +0100, Harald van Dijk wrote: > > This still does not restore the state completely. It does not clean up any > pending heredocs. I see: > > $ PS1='$(< src/dash: 1: Syntax error: Unterminated quoted string > $(< > > > That is, after entering the ':' command, the shell is still trying to read > the heredoc from the prompt. This patch saves and restores the heredoclist in expandstr. It also removes a bunch of unnecessary volatiles as those variables are only referenced in case of a longjmp other than one started by a signal like SIGINT. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/parser.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/parser.c b/src/parser.c index 3131045..54c2861 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1565,10 +1565,11 @@ setprompt(int which) const char * expandstr(const char *ps) { - struct parsefile *volatile file_stop; + struct parsefile *file_stop; struct jmploc *volatile savehandler; - const char *volatile result; - volatile int saveprompt; + struct heredoc *saveheredoclist; + const char *result; + int saveprompt; struct jmploc jmploc; union node n; int err; @@ -1578,6 +1579,8 @@ expandstr(const char *ps) /* XXX Fix (char *) cast. */ setinputstring((char *)ps); + saveheredoclist = heredoclist; + heredoclist = NULL; saveprompt = doprompt; doprompt = 0; result = ps; @@ -1603,6 +1606,7 @@ expandstr(const char *ps) doprompt = saveprompt; unwindfiles(file_stop); + heredoclist = saveheredoclist; return result; } From c09c60ae76a4262f645fc8f4c7569d3d97d1a37d Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Wed, 20 May 2020 16:08:36 +0800 Subject: [PATCH 021/401] shell: Fix typos Signed-off-by: Martin Michlmayr Signed-off-by: Herbert Xu --- src/TOUR | 4 ++-- src/jobs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/TOUR b/src/TOUR index 056e79b..e30836e 100644 --- a/src/TOUR +++ b/src/TOUR @@ -100,7 +100,7 @@ string was going to be: p = stackptr; *p++ = c; /* repeated as many times as needed */ stackptr = p; -The folloing three macros (defined in memalloc.h) perform these +The following three macros (defined in memalloc.h) perform these operations, but grow the stack if you run off the end: STARTSTACKSTR(p); STPUTC(c, p); /* repeated as many times as needed */ @@ -198,7 +198,7 @@ EXECUTION: Command execution is handled by the following files: eval.c The top level routines. redir.c Code to handle redirection of input and output. jobs.c Code to handle forking, waiting, and job control. - exec.c Code to to path searches and the actual exec sys call. + exec.c Code to path searches and the actual exec sys call. expand.c Code to evaluate arguments. var.c Maintains the variable symbol table. Called from expand.c. diff --git a/src/jobs.c b/src/jobs.c index f65435d..0926360 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -557,7 +557,7 @@ showjobs(struct output *out, int mode) TRACE(("showjobs(%x) called\n", mode)); - /* If not even one one job changed, there is nothing to do */ + /* If not even one job changed, there is nothing to do */ dowait(DOWAIT_NONBLOCK, NULL); for (jp = curjob; jp; jp = jp->prev_job) { From e8e29001ad1cdeb68a5c617db149830e676c32c8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 May 2020 23:19:05 +1000 Subject: [PATCH 022/401] parser: Fix double-backslash nl in old-style command sub When handling backslashes within an old-style command substitution, we should not call pgetc_eatbnl because that would treat the next backslash character as another escape character if it was then followed by a new-line. This patch fixes it by calling pgetc. Reported-by: Matt Whitlock Fixes: 6bbc71d84bea ("parser: use pgetc_eatbnl() in more places") Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 54c2861..a47022e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1393,7 +1393,7 @@ parsebackq: { goto done; case '\\': - pc = pgetc_eatbnl(); + pc = pgetc(); if (pc != '\\' && pc != '`' && pc != '$' && (!synstack->dblquote || pc != '"')) STPUTC('\\', pout); From 39279293fa358fa8bd681f8ae27bdf11d3a66b53 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 1 Jun 2020 23:13:28 +1000 Subject: [PATCH 023/401] Release 0.5.11. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dbd97d8..e783f30 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([dash],[0.5.10.2]) +AC_INIT([dash],[0.5.11]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) From 8557066bcf8d76734331051cfbcdd9ba6cbb730f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 27 May 2020 12:19:13 +1000 Subject: [PATCH 024/401] parser: Get rid of PEOA PEOA is a special character used to mark an alias as being finished so that we don't enter an infinite loop with nested aliases. It complicates the parser because we have to ensure that it is skipped where necessary and not copied to the resulting token text. This patch removes it and instead delays the marking of aliases until the second pgetc. This has the same effect as the current PEOA code while keeping the complexities within the input code. Signed-off-by: Herbert Xu --- src/input.c | 81 ++++++++++++++++++++++++++++++-------------------- src/input.h | 7 ++++- src/mksyntax.c | 16 ++++------ src/parser.c | 28 +++++------------ 4 files changed, 67 insertions(+), 65 deletions(-) diff --git a/src/input.c b/src/input.c index 17544e7..cf4efdc 100644 --- a/src/input.c +++ b/src/input.c @@ -68,6 +68,7 @@ struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ STATIC void pushfile(void); +static void popstring(void); static int preadfd(void); static void setinputfd(int fd, int push); static int preadbuffer(void); @@ -99,13 +100,32 @@ FORKRESET { #endif -/* - * Read a character from the script, returning PEOF on end of file. - * Nul characters in the input are silently discarded. - */ +static void freestrings(struct strpush *sp) +{ + INTOFF; + do { + struct strpush *psp; -int -pgetc(void) + if (sp->ap) { + sp->ap->flag &= ~ALIASINUSE; + if (sp->ap->flag & ALIASDEAD) { + unalias(sp->ap->name); + } + } + + psp = sp; + sp = sp->spfree; + + if (psp != &(parsefile->basestrpush)) + ckfree(psp); + } while (sp); + + parsefile->spfree = NULL; + INTON; +} + + +static int __pgetc(void) { int c; @@ -125,17 +145,18 @@ pgetc(void) /* - * Same as pgetc(), but ignores PEOA. + * Read a character from the script, returning PEOF on end of file. + * Nul characters in the input are silently discarded. */ -int -pgetc2() +int pgetc(void) { - int c; - do { - c = pgetc(); - } while (c == PEOA); - return c; + struct strpush *sp = parsefile->spfree; + + if (unlikely(sp)) + freestrings(sp); + + return __pgetc(); } @@ -214,16 +235,8 @@ static int preadbuffer(void) char savec; if (unlikely(parsefile->strpush)) { - if ( - parsefile->nleft == -1 && - parsefile->strpush->ap && - parsefile->nextc[-1] != ' ' && - parsefile->nextc[-1] != '\t' - ) { - return PEOA; - } popstring(); - return pgetc(); + return __pgetc(); } if (unlikely(parsefile->nleft == EOF_NLEFT || parsefile->buf == NULL)) @@ -331,7 +344,8 @@ pushstring(char *s, void *ap) len = strlen(s); INTOFF; /*dprintf("*** calling pushstring: %s, %d\n", s, len);*/ - if (parsefile->strpush) { + if ((unsigned long)parsefile->strpush | + (unsigned long)parsefile->spfree) { sp = ckmalloc(sizeof (struct strpush)); sp->prev = parsefile->strpush; parsefile->strpush = sp; @@ -340,6 +354,7 @@ pushstring(char *s, void *ap) sp->prevstring = parsefile->nextc; sp->prevnleft = parsefile->nleft; sp->unget = parsefile->unget; + sp->spfree = parsefile->spfree; memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc)); sp->ap = (struct alias *)ap; if (ap) { @@ -349,11 +364,11 @@ pushstring(char *s, void *ap) parsefile->nextc = s; parsefile->nleft = len; parsefile->unget = 0; + parsefile->spfree = NULL; INTON; } -void -popstring(void) +static void popstring(void) { struct strpush *sp = parsefile->strpush; @@ -366,10 +381,6 @@ popstring(void) if (sp->string != sp->ap->val) { ckfree(sp->string); } - sp->ap->flag &= ~ALIASINUSE; - if (sp->ap->flag & ALIASDEAD) { - unalias(sp->ap->name); - } } parsefile->nextc = sp->prevstring; parsefile->nleft = sp->prevnleft; @@ -377,8 +388,7 @@ popstring(void) memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc)); /*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/ parsefile->strpush = sp->prev; - if (sp != &(parsefile->basestrpush)) - ckfree(sp); + parsefile->spfree = sp; INTON; } @@ -460,6 +470,7 @@ pushfile(void) pf->prev = parsefile; pf->fd = -1; pf->strpush = NULL; + pf->spfree = NULL; pf->basestrpush.prev = NULL; pf->unget = 0; parsefile = pf; @@ -476,8 +487,12 @@ popfile(void) close(pf->fd); if (pf->buf) ckfree(pf->buf); - while (pf->strpush) + if (parsefile->spfree) + freestrings(parsefile->spfree); + while (pf->strpush) { popstring(); + freestrings(parsefile->spfree); + } parsefile = pf->prev; ckfree(pf); INTON; diff --git a/src/input.h b/src/input.h index 8acc6e9..8c39f33 100644 --- a/src/input.h +++ b/src/input.h @@ -50,6 +50,9 @@ struct strpush { struct alias *ap; /* if push was associated with an alias */ char *string; /* remember the string since it may change */ + /* Delay freeing so we can stop nested aliases. */ + struct strpush *spfree; + /* Remember last two characters for pungetc. */ int lastc[2]; @@ -73,6 +76,9 @@ struct parsefile { struct strpush *strpush; /* for pushing strings at this level */ struct strpush basestrpush; /* so pushing one is fast */ + /* Delay freeing so we can stop nested aliases. */ + struct strpush *spfree; + /* Remember last two characters for pungetc. */ int lastc[2]; @@ -93,7 +99,6 @@ int pgetc(void); int pgetc2(void); void pungetc(void); void pushstring(char *, void *); -void popstring(void); int setinputfile(const char *, int); void setinputstring(char *); void popfile(void); diff --git a/src/mksyntax.c b/src/mksyntax.c index a23c18c..da18f5d 100644 --- a/src/mksyntax.c +++ b/src/mksyntax.c @@ -64,7 +64,6 @@ struct synclass synclass[] = { { "CEOF", "end of file" }, { "CCTL", "like CWORD, except it must be escaped" }, { "CSPCL", "these terminate a word" }, - { "CIGN", "character should be ignored" }, { NULL, NULL } }; @@ -145,9 +144,8 @@ main(int argc, char **argv) fprintf(hfile, "/* %s */\n", is_entry[i].comment); } putc('\n', hfile); - fprintf(hfile, "#define SYNBASE %d\n", 130); - fprintf(hfile, "#define PEOF %d\n\n", -130); - fprintf(hfile, "#define PEOA %d\n\n", -129); + fprintf(hfile, "#define SYNBASE %d\n", 129); + fprintf(hfile, "#define PEOF %d\n\n", -129); putc('\n', hfile); fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile); fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile); @@ -170,7 +168,6 @@ main(int argc, char **argv) add("$", "CVAR"); add("}", "CENDVAR"); add("<>();&| \t", "CSPCL"); - syntax[1] = "CSPCL"; print("basesyntax"); init(); fputs("\n/* syntax table used when in double quotes */\n", cfile); @@ -223,7 +220,7 @@ filltable(char *dftval) { int i; - for (i = 0 ; i < 258; i++) + for (i = 0 ; i < 257; i++) syntax[i] = dftval; } @@ -239,9 +236,8 @@ init(void) filltable("CWORD"); syntax[0] = "CEOF"; - syntax[1] = "CIGN"; for (ctl = CTL_FIRST; ctl <= CTL_LAST; ctl++ ) - syntax[130 + ctl] = "CCTL"; + syntax[129 + ctl] = "CCTL"; } @@ -253,7 +249,7 @@ static void add(char *p, char *type) { while (*p) - syntax[(signed char)*p++ + 130] = type; + syntax[(signed char)*p++ + 129] = type; } @@ -271,7 +267,7 @@ print(char *name) fprintf(hfile, "extern const char %s[];\n", name); fprintf(cfile, "const char %s[] = {\n", name); col = 0; - for (i = 0 ; i < 258; i++) { + for (i = 0 ; i < 257; i++) { if (i == 0) { fputs(" ", cfile); } else if ((i & 03) == 0) { diff --git a/src/parser.c b/src/parser.c index a47022e..3c80d17 100644 --- a/src/parser.c +++ b/src/parser.c @@ -796,7 +796,6 @@ xxreadtoken(void) c = pgetc_eatbnl(); switch (c) { case ' ': case '\t': - case PEOA: continue; case '#': while ((c = pgetc()) != '\n' && c != PEOF); @@ -838,7 +837,7 @@ static int pgetc_eatbnl(void) int c; while ((c = pgetc()) == '\\') { - if (pgetc2() != '\n') { + if (pgetc() != '\n') { pungetc(); break; } @@ -943,7 +942,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) break; /* backslash */ case CBACK: - c = pgetc2(); + c = pgetc(); if (c == PEOF) { USTPUTC(CTLESC, out); USTPUTC('\\', out); @@ -1048,14 +1047,10 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) break; case CEOF: goto endword; /* exit outer loop */ - case CIGN: - break; default: if (synstack->varnest == 0) goto endword; /* exit outer loop */ - if (c != PEOA) { - USTPUTC(c, out); - } + USTPUTC(c, out); } c = pgetc_top(synstack); } @@ -1103,13 +1098,9 @@ checkend: { int markloc; char *p; - if (c == PEOA) { - c = pgetc2(); - } if (striptabs) { - while (c == '\t') { - c = pgetc2(); - } + while (c == '\t') + c = pgetc(); } markloc = out - (char *)stackblock(); @@ -1117,7 +1108,7 @@ checkend: { if (c != *p) goto more_heredoc; - c = pgetc2(); + c = pgetc(); } if (c == '\n' || c == PEOF) { @@ -1229,7 +1220,6 @@ parsesub: { c = pgetc_eatbnl(); if ( (checkkwd & CHKEOFMARK) || - c <= PEOA || (c != '(' && c != '{' && !is_name(c) && !is_special(c)) ) { USTPUTC('$', out); @@ -1397,13 +1387,9 @@ parsebackq: { if (pc != '\\' && pc != '`' && pc != '$' && (!synstack->dblquote || pc != '"')) STPUTC('\\', pout); - if (pc > PEOA) { - break; - } - /* fall through */ + break; case PEOF: - case PEOA: synerror("EOF in backquote substitution"); case '\n': From d80189f2b99a4172a60589b6c23eedc2b4077a20 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 27 May 2020 13:19:10 +1000 Subject: [PATCH 025/401] eval: Prevent recursive PS4 expansion Yaroslav Halchenko wrote: > > I like to (ab)use PS4 and set -x for tracing execution of scripts. > Reporting time and PID is very useful in this context. > > I am not 100% certain if bash's behavior (of actually running the command > embedded within PS4 string, probably eval'ing it) is actually POSIX > compliant, posh seems to not do that; but I think it is definitely not > desired for dash to just stall: > > - the script: > > #!/bin/sh > set -x > export PS4='+ $(date +%T.%N) [$$] ' > > echo "lets go" > sleep 1 > echo "done $var" > > - bash: > > /tmp > bash --posix test.sh > +export 'PS4=+ $(date +%T.%N) [$$] ' > +PS4='+ $(date +%T.%N) [$$] ' > + 09:15:48.982296333 [2764323] echo 'lets go' > lets go > + 09:15:48.987829613 [2764323] sleep 1 > + 09:15:49.994485037 [2764323] echo 'done ' > done > > > - posh: > exit:130 /tmp > posh test.sh > +export PS4=+ $(date +%T.%N) [$$] > + $(date +%T.%N) [$$] echo lets go > lets go > + $(date +%T.%N) [$$] sleep 1 > + $(date +%T.%N) [$$] echo done > done > > - dash: (stalls it set -x) > > /tmp > dash test.sh > +export PS4=+ $(date +%T.%N) [$$] > ^C^C This patch fixes the infinite loop caused by repeated expansions of PS4. Reported-by: Yaroslav Halchenko Signed-off-by: Herbert Xu --- src/eval.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/eval.c b/src/eval.c index 1b5d61d..d10be38 100644 --- a/src/eval.c +++ b/src/eval.c @@ -78,6 +78,9 @@ int exitstatus; /* exit status of last command */ int back_exitstatus; /* exit status of backquoted command */ int savestatus = -1; /* exit status of last command outside traps */ +/* Prevent PS4 nesting. */ +MKINIT int inps4; + #if !defined(__alpha__) || (defined(__GNUC__) && __GNUC__ >= 3) STATIC @@ -123,6 +126,7 @@ EXITRESET { } evalskip = 0; loopnest = 0; + inps4 = 0; } #endif @@ -855,12 +859,14 @@ evalcommand(union node *cmd, int flags) } /* Print the command if xflag is set. */ - if (xflag) { + if (xflag && !inps4) { struct output *out; int sep; out = &preverrout; + inps4 = 1; outstr(expandstr(ps4val()), out); + inps4 = 0; sep = 0; sep = eprintlist(out, varlist.list, sep); eprintlist(out, osp, sep); From 282bdbd228a5e6f86ca7eec488d852d3dd3f2957 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 28 May 2020 21:31:45 +1000 Subject: [PATCH 026/401] redir: Retry open64 on EINTR It is possible for open64 to block on named pipes, and therefore it can be interrupted by signals and return EINTR. We should only let it fail with EINTR if real signals are pending (i.e., it should not fail on SIGCHLD if SIGCHLD has not been trapped). This patch adds a new helper sh_open to retry the open64 call if necessary. It also calls sh_error when appropriate. Fixes: 3800d4934391 ("[JOBS] Fix dowait signal race") Reported-by: Samuel Thibault Signed-off-by: Herbert Xu --- src/input.c | 9 ++---- src/jobs.c | 5 ++-- src/redir.c | 86 ++++++++++++++++++++++++++++++++++------------------- src/redir.h | 1 + 4 files changed, 62 insertions(+), 39 deletions(-) diff --git a/src/input.c b/src/input.c index cf4efdc..4987732 100644 --- a/src/input.c +++ b/src/input.c @@ -403,12 +403,9 @@ setinputfile(const char *fname, int flags) int fd; INTOFF; - if ((fd = open64(fname, O_RDONLY)) < 0) { - if (flags & INPUT_NOFILE_OK) - goto out; - exitstatus = 127; - exerror(EXERROR, "Can't open %s", fname); - } + fd = sh_open(fname, O_RDONLY, flags & INPUT_NOFILE_OK); + if (fd < 0) + goto out; if (fd < 10) fd = savefd(fd, fd); setinputfd(fd, flags & INPUT_PUSH_FILE); diff --git a/src/jobs.c b/src/jobs.c index 0926360..94bf47e 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -196,7 +196,7 @@ setjobctl(int on) return; if (on) { int ofd; - ofd = fd = open64(_PATH_TTY, O_RDWR); + ofd = fd = sh_open(_PATH_TTY, O_RDWR, 1); if (fd < 0) { fd += 3; while (!isatty(fd)) @@ -887,8 +887,7 @@ static void forkchild(struct job *jp, union node *n, int mode) ignoresig(SIGQUIT); if (jp->nprocs == 0) { close(0); - if (open64(_PATH_DEVNULL, O_RDONLY) != 0) - sh_error("Can't open %s", _PATH_DEVNULL); + sh_open(_PATH_DEVNULL, O_RDONLY, 0); } } if (!oldlvl && iflag) { diff --git a/src/redir.c b/src/redir.c index 895140c..93abba3 100644 --- a/src/redir.c +++ b/src/redir.c @@ -55,6 +55,7 @@ #include "output.h" #include "memalloc.h" #include "error.h" +#include "trap.h" #define EMPTY -2 /* marks an unused slot in redirtab */ @@ -180,56 +181,83 @@ redirect(union node *redir, int flags) } +static int sh_open_fail(const char *, int, int) __attribute__((__noreturn__)); +static int sh_open_fail(const char *pathname, int flags, int e) +{ + const char *word; + int action; + + word = "open"; + action = E_OPEN; + if (flags & O_CREAT) { + word = "create"; + action = E_CREAT; + } + + sh_error("cannot %s %s: %s", word, pathname, errmsg(e, action)); +} + + +int sh_open(const char *pathname, int flags, int mayfail) +{ + int fd; + int e; + + do { + fd = open64(pathname, flags, 0666); + e = errno; + } while (fd < 0 && e == EINTR && !pending_sig); + + if (mayfail || fd >= 0) + return fd; + + sh_open_fail(pathname, flags, e); +} + + STATIC int openredirect(union node *redir) { struct stat64 sb; char *fname; + int flags; int f; switch (redir->nfile.type) { case NFROM: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_RDONLY)) < 0) - goto eopen; + flags = O_RDONLY; +do_open: + f = sh_open(redir->nfile.expfname, flags, 0); break; case NFROMTO: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_RDWR|O_CREAT, 0666)) < 0) - goto ecreate; - break; + flags = O_RDWR|O_CREAT; + goto do_open; case NTO: /* Take care of noclobber mode. */ if (Cflag) { fname = redir->nfile.expfname; if (stat64(fname, &sb) < 0) { - if ((f = open64(fname, O_WRONLY|O_CREAT|O_EXCL, 0666)) < 0) - goto ecreate; - } else if (!S_ISREG(sb.st_mode)) { - if ((f = open64(fname, O_WRONLY, 0666)) < 0) - goto ecreate; - if (!fstat64(f, &sb) && S_ISREG(sb.st_mode)) { - close(f); - errno = EEXIST; - goto ecreate; - } - } else { - errno = EEXIST; + flags = O_WRONLY|O_CREAT|O_EXCL; + goto do_open; + } + + if (S_ISREG(sb.st_mode)) + goto ecreate; + + f = sh_open(fname, O_WRONLY, 0); + if (!fstat64(f, &sb) && S_ISREG(sb.st_mode)) { + close(f); goto ecreate; } break; } /* FALLTHROUGH */ case NCLOBBER: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) - goto ecreate; - break; + flags = O_WRONLY|O_CREAT|O_TRUNC; + goto do_open; case NAPPEND: - fname = redir->nfile.expfname; - if ((f = open64(fname, O_WRONLY|O_CREAT|O_APPEND, 0666)) < 0) - goto ecreate; - break; + flags = O_WRONLY|O_CREAT|O_APPEND; + goto do_open; case NTOFD: case NFROMFD: f = redir->ndup.dupfd; @@ -249,9 +277,7 @@ openredirect(union node *redir) return f; ecreate: - sh_error("cannot create %s: %s", fname, errmsg(errno, E_CREAT)); -eopen: - sh_error("cannot open %s: %s", fname, errmsg(errno, E_OPEN)); + sh_open_fail(fname, O_CREAT, EEXIST); } diff --git a/src/redir.h b/src/redir.h index 1cf2761..16f5c20 100644 --- a/src/redir.h +++ b/src/redir.h @@ -49,4 +49,5 @@ int savefd(int, int); int redirectsafe(union node *, int); void unwindredir(struct redirtab *stop); struct redirtab *pushredir(union node *redir); +int sh_open(const char *pathname, int flags, int mayfail); From 7638476c18f2a8451b05d49f4e767ee1f98cfc17 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 28 May 2020 22:57:52 +1000 Subject: [PATCH 027/401] shell: Enable fnmatch/glob by default As fnmatch(3) and glob(3) from glibc are now working consistently, this patch enables them by default. Signed-off-by: Herbert Xu --- configure.ac | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index e783f30..ce5feec 100644 --- a/configure.ac +++ b/configure.ac @@ -37,9 +37,10 @@ if test "$enable_static" = "yes"; then export LDFLAGS="-static -Wl,--fatal-warnings" fi -AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--enable-fnmatch, \ - [Use fnmatch(3) from libc])) -AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) +AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--disable-fnmatch, \ + [Do not use fnmatch(3) from libc])) +AC_ARG_ENABLE(glob, AS_HELP_STRING(--disable-glob, \ + [Do not use glob(3) from libc])) dnl Checks for libraries. @@ -122,12 +123,12 @@ if test "$enable_test_workaround" = "yes"; then [Define if your faccessat tells root all files are executable]) fi -if test "$enable_fnmatch" = yes; then +if test "$enable_fnmatch" != no; then use_fnmatch= AC_CHECK_FUNCS(fnmatch, use_fnmatch=yes) fi -if test "$use_fnmatch" = yes && test "$enable_glob" = yes; then +if test "$use_fnmatch" = yes && test "$enable_glob" != no; then AC_CHECK_FUNCS(glob) fi From a09fe58449971226a885cc72e5983121cbc53e97 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 2 Jun 2020 21:51:15 +1000 Subject: [PATCH 028/401] expand: Make glob(3) interruptible by SIGINT If glob(3) is used then it can't be interrupted by SIGINT. This is bad when an expansion causes a large number of entries to be generated. This patch improves things by adding an int_pending check to gl_opendir call. Note that this is still not perfect, e.g., the sort would still be uninterruptible. Signed-off-by: Herbert Xu --- configure.ac | 7 +++++++ src/expand.c | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index ce5feec..955e2bb 100644 --- a/configure.ac +++ b/configure.ac @@ -151,6 +151,13 @@ AC_CHECK_FUNC(stat64,, [ [64-bit operations are the same as 32-bit]) ]) +AC_CHECK_FUNC(glob64,, [ + AC_DEFINE(glob64_t, glob_t, [64-bit operations are the same as 32-bit]) + AC_DEFINE(glob64, glob, [64-bit operations are the same as 32-bit]) + AC_DEFINE(globfree64, globfree, + [64-bit operations are the same as 32-bit]) +]) + dnl OS X apparently has stat64 but not open64. AC_CHECK_FUNC(open64,, [ AC_DEFINE(open64, open, [64-bit operations are the same as 32-bit]) diff --git a/src/expand.c b/src/expand.c index 1730670..aea5cc4 100644 --- a/src/expand.c +++ b/src/expand.c @@ -120,7 +120,7 @@ static size_t memtodest(const char *p, size_t len, int flags); STATIC ssize_t varvalue(char *, int, int, int); STATIC void expandmeta(struct strlist *); #ifdef HAVE_GLOB -STATIC void addglob(const glob_t *); +static void addglob(const glob64_t *); #else STATIC void expmeta(char *, unsigned, unsigned); STATIC struct strlist *expsort(struct strlist *); @@ -1154,6 +1154,20 @@ void ifsfree(void) */ #ifdef HAVE_GLOB +#ifdef __GLIBC__ +void *opendir_interruptible(const char *pathname) +{ + if (int_pending()) { + suppressint = 0; + onint(); + } + + return opendir(pathname); +} +#else +#define GLOB_ALTDIRFUNC 0 +#endif + STATIC void expandmeta(struct strlist *str) { @@ -1161,14 +1175,23 @@ expandmeta(struct strlist *str) while (str) { const char *p; - glob_t pglob; + glob64_t pglob; int i; if (fflag) goto nometa; + +#ifdef __GLIBC__ + pglob.gl_closedir = (void *)closedir; + pglob.gl_readdir = (void *)readdir64; + pglob.gl_opendir = opendir_interruptible; + pglob.gl_lstat = lstat64; + pglob.gl_stat = stat64; +#endif + INTOFF; p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); - i = glob(p, GLOB_NOMAGIC, 0, &pglob); + i = glob64(p, GLOB_ALTDIRFUNC | GLOB_NOMAGIC, 0, &pglob); if (p != str->text) ckfree(p); switch (i) { @@ -1177,12 +1200,12 @@ expandmeta(struct strlist *str) (GLOB_NOMAGIC | GLOB_NOCHECK)) goto nometa2; addglob(&pglob); - globfree(&pglob); + globfree64(&pglob); INTON; break; case GLOB_NOMATCH: nometa2: - globfree(&pglob); + globfree64(&pglob); INTON; nometa: *exparg.lastp = str; @@ -1201,9 +1224,7 @@ expandmeta(struct strlist *str) * Add the result of glob(3) to the list. */ -STATIC void -addglob(pglob) - const glob_t *pglob; +static void addglob(const glob64_t *pglob) { char **p = pglob->gl_pathv; From dc44524ebfcb69e4a21a8b6ae8c39dee35250185 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 2 Jun 2020 21:52:55 +1000 Subject: [PATCH 029/401] error: Remove USE_NORETURN ifdef The USE_NORETURN was added because gcc was buggy almost 20 years ago. This is no longer needed and this patch removes it. Signed-off-by: Herbert Xu --- src/error.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/error.h b/src/error.h index 94e30a2..661a8a0 100644 --- a/src/error.h +++ b/src/error.h @@ -116,11 +116,7 @@ void __inton(void); #define int_pending() intpending void exraise(int) __attribute__((__noreturn__)); -#ifdef USE_NORETURN void onint(void) __attribute__((__noreturn__)); -#else -void onint(void); -#endif extern int errlinno; void sh_error(const char *, ...) __attribute__((__noreturn__)); void exerror(int, const char *, ...) __attribute__((__noreturn__)); From 0b923162429afa275df4aabc7aab2509ad0992a6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 2 Jun 2020 23:46:48 +1000 Subject: [PATCH 030/401] jobs: Fix waitcmd busy loop We need to clear gotsigchld in waitproc because it is used as a loop conditional for the waitcmd case. Without it waitcmd may busy loop after a SIGCHLD. This patch also changes gotsigchld into a volatile sig_atomic_t to prevent compilers from optimising its accesses away. Fixes: 6c691b3e5099 ("jobs: Only clear gotsigchld when waiting...") Signed-off-by: Herbert Xu --- src/jobs.c | 2 +- src/trap.c | 2 +- src/trap.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 94bf47e..3417633 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1135,7 +1135,6 @@ static int dowait(int block, struct job *jp) rpid = 1; do { - gotsigchld = 0; pid = waitone(block, jp); rpid &= !!pid; @@ -1175,6 +1174,7 @@ waitproc(int block, int *status) #endif do { + gotsigchld = 0; do err = wait3(status, flags, NULL); while (err < 0 && errno == EINTR); diff --git a/src/trap.c b/src/trap.c index 82e7ece..cd84814 100644 --- a/src/trap.c +++ b/src/trap.c @@ -76,7 +76,7 @@ static char gotsig[NSIG - 1]; /* last pending signal */ volatile sig_atomic_t pending_sig; /* received SIGCHLD */ -int gotsigchld; +volatile sig_atomic_t gotsigchld; extern char *signal_names[]; diff --git a/src/trap.h b/src/trap.h index 4c455a8..beaf660 100644 --- a/src/trap.h +++ b/src/trap.h @@ -39,7 +39,7 @@ extern int trapcnt; extern char sigmode[]; extern volatile sig_atomic_t pending_sig; -extern int gotsigchld; +extern volatile sig_atomic_t gotsigchld; int trapcmd(int, char **); void setsignal(int); From 29d6f2148f10213de4e904d515e792d2cf8c968e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Jun 2020 21:53:55 +1000 Subject: [PATCH 031/401] eval: Check nflag in evaltree instead of cmdloop This patch moves the nflag check from cmdloop into evaltree. This is so that nflag will be in force even if we enter the shell via a path other than cmdloop, e.g., through sh -c. Reported-by: Joey Hess Signed-off-by: Herbert Xu --- src/eval.c | 3 +++ src/main.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/eval.c b/src/eval.c index d10be38..9476fbb 100644 --- a/src/eval.c +++ b/src/eval.c @@ -213,6 +213,9 @@ evaltree(union node *n, int flags) setstackmark(&smark); + if (nflag) + goto out; + if (n == NULL) { TRACE(("evaltree(NULL) called\n")); goto out; diff --git a/src/main.c b/src/main.c index 7a28534..5c49fdc 100644 --- a/src/main.c +++ b/src/main.c @@ -233,7 +233,7 @@ cmdloop(int top) out2str("\nUse \"exit\" to leave shell.\n"); } numeof++; - } else if (nflag == 0) { + } else { int i; job_warning = (job_warning == 2) ? 1 : 0; From dd9ef66a805087b871e7eda49f34bc03d1c385a1 Mon Sep 17 00:00:00 2001 From: Bjarni Ingi Gislason Date: Wed, 24 Jun 2020 02:54:25 +0000 Subject: [PATCH 032/401] man: fix formatting Fix formatting according to the output of "mandoc -Tlint". Overview: Start each sentence on a new line. Protect a punctuation mark in a macro call with '\&'. Trim trailing space. Add a missing comma in a row of words. Use an en-dash instead of '--' if there is space around it. An em-dash is used without space around it. Comment out ".Pp" macros that do nothing. Split long sentences after a punctuation mark. Remove a "-width ..." for a ".Bl -item" macro, as it has no influence Details: mandoc: ./src/bltin/echo.1:69:38: WARNING: new sentence, new line mandoc: ./src/bltin/echo.1:75:35: WARNING: new sentence, new line mandoc: ./src/bltin/printf.1:205:12: WARNING: skipping empty macro: No mandoc: ./src/bltin/printf.1:284:28: STYLE: whitespace at end of input line mandoc: ./src/bltin/printf.1:288:20: STYLE: whitespace at end of input line mandoc: ./src/bltin/printf.1:293:28: STYLE: whitespace at end of input line mandoc: ./src/bltin/printf.1:353:31: WARNING: new sentence, new line mandoc: ./src/bltin/printf.1:74:2: STYLE: useless macro: Tn mandoc: ./src/bltin/printf.1:111:2: STYLE: useless macro: Tn mandoc: ./src/bltin/printf.1:116:2: STYLE: useless macro: Tn mandoc: ./src/bltin/printf.1:279:2: STYLE: useless macro: Tn mandoc: ./src/bltin/printf.1:334:2: WARNING: unusual Xr punctuation: none before vis(3) mandoc: ./src/bltin/printf.1:334:2: WARNING: unusual Xr order: vis(3) after printf(9) mandoc: ./src/bltin/printf.1:348:2: STYLE: useless macro: Tn mandoc: ./src/bltin/printf.1:333:6: STYLE: referenced manual not found: Xr printf 9 mandoc: ./src/bltin/printf.1:334:6: STYLE: referenced manual not found: Xr vis 3 mandoc: ./src/bltin/test.1:46:16: WARNING: skipping empty macro: Cm mandoc: ./src/bltin/test.1:105:5: STYLE: useless macro: Tn mandoc: ./src/dash.1:1180:58: WARNING: new sentence, new line mandoc: ./src/dash.1:1186:13: STYLE: whitespace at end of input line mandoc: ./src/dash.1:1194:38: WARNING: new sentence, new line mandoc: ./src/dash.1:1200:35: WARNING: new sentence, new line mandoc: ./src/dash.1:1474:71: WARNING: new sentence, new line mandoc: ./src/dash.1:1783:62: WARNING: new sentence, new line mandoc: ./src/dash.1:2061:22: WARNING: new sentence, new line mandoc: ./src/dash.1:2311:54: WARNING: new sentence, new line mandoc: ./src/dash.1:2315:63: WARNING: new sentence, new line mandoc: ./src/dash.1:37:2: WARNING: prologue macros out of order: Dt after Os mandoc: ./src/dash.1:87:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:94:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:343:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:442:17: STYLE: verbatim "--", maybe consider using \(em mandoc: ./src/dash.1:466:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:581:34: STYLE: verbatim "--", maybe consider using \(em mandoc: ./src/dash.1:583:25: STYLE: verbatim "--", maybe consider using \(em mandoc: ./src/dash.1:585:43: STYLE: verbatim "--", maybe consider using \(em mandoc: ./src/dash.1:595:11: STYLE: verbatim "--", maybe consider using \(em mandoc: ./src/dash.1:618:29: STYLE: verbatim "--", maybe consider using \(em mandoc: ./src/dash.1:697:2: WARNING: skipping paragraph macro: Pp before Bd mandoc: ./src/dash.1:1344:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:1420:2: WARNING: skipping paragraph macro: Pp before Bd mandoc: ./src/dash.1:1434:2: WARNING: skipping paragraph macro: Pp before Bd mandoc: ./src/dash.1:1556:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:1587:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:1746:2: STYLE: useless macro: Tn mandoc: ./src/dash.1:1875:5: STYLE: useless macro: Tn mandoc: ./src/dash.1:1525:2: WARNING: skipping paragraph macro: Pp before It mandoc: ./src/dash.1:2182:2: WARNING: skipping paragraph macro: Pp before It mandoc: ./src/dash.1:2247:2: WARNING: sections out of conventional order: Sh ENVIRONMENT mandoc: ./src/dash.1:2323:11: WARNING: skipping -width argument: Bl -item mandoc: ./src/dash.1:2347:31: STYLE: consider using OS macro: Nx mandoc: ./src/dash.1:92:6: STYLE: referenced manual not found: Xr ksh 1 (2 times) mandoc: ./src/dash.1:253:6: STYLE: referenced manual not found: Xr emacs 1 mandoc: ./src/dash.1:2253:9: STYLE: referenced manual not found: Xr passwd 4 mandoc: ./src/dash.1:2330:6: STYLE: referenced manual not found: Xr csh 1 Signed-off-by: Bjarni Ingi Gislason Signed-off-by: Herbert Xu --- src/bltin/echo.1 | 6 +++-- src/bltin/printf.1 | 13 ++++++----- src/bltin/test.1 | 2 +- src/dash.1 | 57 +++++++++++++++++++++++++++------------------- 4 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/bltin/echo.1 b/src/bltin/echo.1 index fbc7fb4..4d1890f 100644 --- a/src/bltin/echo.1 +++ b/src/bltin/echo.1 @@ -66,13 +66,15 @@ and may be given. .Pp If any of the following sequences of characters is encountered during -output, the sequence is not output. Instead, the specified action is +output, the sequence is not output. +Instead, the specified action is performed: .Bl -tag -width indent .It Li \eb A backspace character is output. .It Li \ec -Subsequent output is suppressed. This is normally used at the end of the +Subsequent output is suppressed. +This is normally used at the end of the last argument to suppress the trailing newline that .Nm would otherwise output. diff --git a/src/bltin/printf.1 b/src/bltin/printf.1 index 3873173..409d434 100644 --- a/src/bltin/printf.1 +++ b/src/bltin/printf.1 @@ -202,7 +202,7 @@ and formats, or the maximum number of characters to be printed from a string .Sm off -.Pf ( Cm b No , +.Pf ( Cm b Ns \&, .Sm on .Cm B and @@ -281,16 +281,16 @@ value is the 1\-, 2\-, or 3\-digit octal number .Ar num . .It Cm \e^ Ns Ar c -Write the control character +Write the control character .Ar c . Generates characters `\e000' through `\e037`, and `\e177' (from `\e^?'). .It Cm \eM\- Ns Ar c -Write the character +Write the character .Ar c with the 8th bit set. Generates characters `\e241' through `\e376`. .It Cm \eM^ Ns Ar c -Write the control character +Write the control character .Ar c with the 8th bit set. Generates characters `\e000' through `\e037`, and `\e177' (from `\eM^?'). @@ -330,7 +330,7 @@ exits 0 on success, 1 on failure. .Sh SEE ALSO .Xr echo 1 , .Xr printf 3 , -.Xr printf 9 +.Xr printf 9 , .Xr vis 3 .Sh STANDARDS The @@ -350,5 +350,6 @@ to floating-point and then back again, floating-point precision may be lost. .Pp Hexadecimal character constants are restricted to, and should be specified -as, two character constants. This is contrary to the ISO C standard but +as, two character constants. +This is contrary to the ISO C standard but does guarantee detection of the end of the constant. diff --git a/src/bltin/test.1 b/src/bltin/test.1 index 42435fb..03abce8 100644 --- a/src/bltin/test.1 +++ b/src/bltin/test.1 @@ -43,7 +43,7 @@ .Nm test .Ar expression .Nm \&[ -.Ar expression Cm ] +.Ar expression Cm \&] .Sh DESCRIPTION The .Nm test diff --git a/src/dash.1 b/src/dash.1 index 32f6ac0..ff02237 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -33,8 +33,8 @@ .\" @(#)sh.1 8.6 (Berkeley) 5/4/95 .\" .Dd January 19, 2003 -.Os .Dt DASH 1 +.Os .Sh NAME .Nm dash .Nd command interpreter (shell) @@ -439,7 +439,7 @@ instead of then leading tabs in the here-doc-text are stripped. .Ss Search and Execution There are three types of commands: shell functions, builtin commands, and -normal programs -- and the command is searched for (by name) in that order. +normal programs \(en and the command is searched for (by name) in that order. They each are executed in a different way. .Pp When a shell function is executed, all of the shell positional parameters @@ -578,11 +578,11 @@ the preceding AND-OR-list. .Pp Note that unlike some other shells, each process in the pipeline is a child of the invoking shell (unless it is a shell builtin, in which case -it executes in the current shell -- but any effect it has on the +it executes in the current shell \(en but any effect it has on the environment is wiped). -.Ss Background Commands -- & +.Ss Background Commands \(en & If a command is terminated by the control operator ampersand (&), the -shell executes the command asynchronously -- that is, the shell does not +shell executes the command asynchronously \(en that is, the shell does not wait for the command to finish before executing the next command. .Pp The format for running a command in background is: @@ -592,7 +592,7 @@ The format for running a command in background is: If the shell is not interactive, the standard input of an asynchronous command is set to .Pa /dev/null . -.Ss Lists -- Generally Speaking +.Ss Lists \(en Generally Speaking A list is a sequence of zero or more commands separated by newlines, semicolons, or ampersands, and optionally terminated by one of these three characters. @@ -615,7 +615,7 @@ of the first command is nonzero. and .Dq || both have the same priority. -.Ss Flow-Control Constructs -- if, while, for, case +.Ss Flow-Control Constructs \(en if, while, for, case The syntax of the if command is .Bd -literal -offset indent if list @@ -694,7 +694,7 @@ Builtin commands grouped into a (list) will not affect the current shell. The second form does not fork another shell so is slightly more efficient. Grouping commands together this way allows you to redirect their output as though they were one program: -.Pp +.\".Pp .Bd -literal -offset indent { printf \*q hello \*q ; printf \*q world\\n" ; } \*[Gt] greeting .Ed @@ -1177,13 +1177,14 @@ mechanism was used or because the argument is a single dash. The .Fl P option causes the physical directory structure to be used, that is, all -symbolic links are resolved to their respective values. The +symbolic links are resolved to their respective values. +The .Fl L option turns off the effect of any preceding .Fl P options. .It Xo echo Op Fl n -.Ar args... +.Ar args... .Xc Print the arguments on the standard output, separated by spaces. Unless the @@ -1191,13 +1192,15 @@ Unless the option is present, a newline is output following the arguments. .Pp If any of the following sequences of characters is encountered during -output, the sequence is not output. Instead, the specified action is +output, the sequence is not output. +Instead, the specified action is performed: .Bl -tag -width indent .It Li \eb A backspace character is output. .It Li \ec -Subsequent output is suppressed. This is normally used at the end of the +Subsequent output is suppressed. +This is normally used at the end of the last argument to suppress the trailing newline that .Ic echo would otherwise output. @@ -1417,7 +1420,7 @@ and and the option .Op c , which requires an argument. -.Pp +.\".Pp .Bd -literal -offset indent while getopts abc: f do @@ -1431,7 +1434,7 @@ shift `expr $OPTIND - 1` .Ed .Pp This code will accept any of the following as equivalent: -.Pp +.\".Pp .Bd -literal -offset indent cmd \-acarg file file cmd \-a \-c arg file file @@ -1471,7 +1474,8 @@ will continue to print the old name for the directory. The .Fl P option causes the physical value of the current working directory to be shown, -that is, all symbolic links are resolved to their respective values. The +that is, all symbolic links are resolved to their respective values. +The .Fl L option turns off the effect of any preceding .Fl P @@ -1522,7 +1526,7 @@ variables. With the .Fl p option specified the output will be formatted suitably for non-interactive use. -.Pp +.\".Pp .It Xo printf Ar format .Op Ar arguments ... .Xc @@ -1780,9 +1784,11 @@ If options are given, it sets the specified option flags, or clears them as described in the section called .Sx Argument List Processing . As a special case, if the option is -o or +o and no argument is -supplied, the shell prints the settings of all its options. If the -option is -o, the settings are printed in a human-readable format; if -the option is +o, the settings are printed in a format suitable for +supplied, the shell prints the settings of all its options. +If the option is -o, +the settings are printed in a human-readable format; +if the option is +o, +the settings are printed in a format suitable for reinput to the shell to affect the same option settings. .Pp The third use of the set command is to set the values of the shell's @@ -2058,7 +2064,8 @@ operator has higher precedence than the operator. .It times Print the accumulated user and system times for the shell and for processes -run from the shell. The return status is 0. +run from the shell. +The return status is 0. .It Xo trap .Op Ar action Ar signal ... .Xc @@ -2179,7 +2186,7 @@ the current limit is displayed. Limits of an arbitrary process can be displayed or set using the .Xr sysctl 8 utility. -.Pp +.\".Pp .It umask Op Ar mask Set the value of umask (see .Xr umask 2 ) @@ -2308,11 +2315,13 @@ children of the shell, and is used in the history editing modes. .It Ev HISTSIZE The number of lines in the history buffer for the shell. .It Ev PWD -The logical value of the current working directory. This is set by the +The logical value of the current working directory. +This is set by the .Ic cd command. .It Ev OLDPWD -The previous logical value of the current working directory. This is set by +The previous logical value of the current working directory. +This is set by the .Ic cd command. @@ -2320,7 +2329,7 @@ command. The process ID of the parent process of the shell. .El .Sh FILES -.Bl -item -width HOMEprofilexxxx +.Bl -item .It .Pa $HOME/.profile .It From 6ba88b3ed28fb4b52f20b730194c4ad3d8aad037 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 22 Jul 2020 13:58:47 +1000 Subject: [PATCH 033/401] shell: Group readdir64/dirent64 with open64 The test for open64 is separate from stat64 for macOS. However, the newly introduced tests for readdir64/dirent64 should be grouped with open64 instead of stat64 as otherwise they cause similar build failures. Reported-by: Martijn Dekker Signed-off-by: Herbert Xu --- configure.ac | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 955e2bb..ab3c02e 100644 --- a/configure.ac +++ b/configure.ac @@ -145,10 +145,6 @@ AC_CHECK_FUNC(stat64,, [ AC_DEFINE(fstat64, fstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(lstat64, lstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(stat64, stat, [64-bit operations are the same as 32-bit]) - AC_DEFINE(readdir64, readdir, - [64-bit operations are the same as 32-bit]) - AC_DEFINE(dirent64, dirent, - [64-bit operations are the same as 32-bit]) ]) AC_CHECK_FUNC(glob64,, [ @@ -161,6 +157,10 @@ AC_CHECK_FUNC(glob64,, [ dnl OS X apparently has stat64 but not open64. AC_CHECK_FUNC(open64,, [ AC_DEFINE(open64, open, [64-bit operations are the same as 32-bit]) + AC_DEFINE(readdir64, readdir, + [64-bit operations are the same as 32-bit]) + AC_DEFINE(dirent64, dirent, + [64-bit operations are the same as 32-bit]) ]) dnl Check if struct stat has st_mtim. From 1f6fc2dc2f6e64248c43f94d4c52bb762c6fefce Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Nov 2020 13:52:22 +1100 Subject: [PATCH 034/401] shell: Disable glob again as it strips traing slashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Mon, Nov 16, 2020 at 01:47:48PM +1100, Herbert Xu wrote: > René Scharfe wrote: > > > > on Debian testing dash eats trailing slashes of parameters that happen > > to be regular files when expanding "$@". Example: > > > > $ rm -f foo bar > > $ touch foo > > $ dash -c 'echo "$0" "$@"' baz foo/ bar/ ./ > > baz foo bar/ ./ > > In fact you just have to do > > dash -c 'echo bar\/' > > This is a bug in glob(3). It's stripping the slash. > > I guess we'll just have to disable glob again. This patch disables glob(3) by default. Reported-by: René Scharfe Signed-off-by: Herbert Xu --- configure.ac | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index ab3c02e..e9ae792 100644 --- a/configure.ac +++ b/configure.ac @@ -39,8 +39,7 @@ fi AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--disable-fnmatch, \ [Do not use fnmatch(3) from libc])) -AC_ARG_ENABLE(glob, AS_HELP_STRING(--disable-glob, \ - [Do not use glob(3) from libc])) +AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) dnl Checks for libraries. @@ -128,7 +127,7 @@ if test "$enable_fnmatch" != no; then AC_CHECK_FUNCS(fnmatch, use_fnmatch=yes) fi -if test "$use_fnmatch" = yes && test "$enable_glob" != no; then +if test "$use_fnmatch" = yes && test "$enable_glob" = yes; then AC_CHECK_FUNCS(glob) fi From 6359d7aa739b9f02f622805f4dbddeaf0ae61981 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 2 Dec 2020 16:31:45 +1100 Subject: [PATCH 035/401] jobs: Only block in waitcmd on first run This patch ensures that waitcmd never blocks unless there are outstanding jobs. This could otherwise trigger a hang if children were created prior to the shell coming into existence, or if there are backgrounded children of other kinds (e.g., a here- document). Fixes: 6c691b3e5099 ("jobs: Only clear gotsigchld when waiting...") Reported-by: Michael Biebl Signed-off-by: Herbert Xu --- src/jobs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jobs.c b/src/jobs.c index 3417633..516786f 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -81,6 +81,7 @@ #define DOWAIT_NONBLOCK 0 #define DOWAIT_BLOCK 1 #define DOWAIT_WAITCMD 2 +#define DOWAIT_WAITCMD_ALL 4 /* array of jobs */ static struct job *jobtab; @@ -615,7 +616,7 @@ waitcmd(int argc, char **argv) jp->waited = 1; jp = jp->prev_job; } - if (!dowait(DOWAIT_WAITCMD, 0)) + if (!dowait(DOWAIT_WAITCMD_ALL, 0)) goto sigout; } } @@ -1138,6 +1139,7 @@ static int dowait(int block, struct job *jp) pid = waitone(block, jp); rpid &= !!pid; + block &= ~DOWAIT_WAITCMD_ALL; if (!pid || (jp && jp->state != JOBRUNNING)) block = DOWAIT_NONBLOCK; } while (pid >= 0); From c1166e98c119f187a2a0377a063fe66640980aee Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Tue, 22 Dec 2020 19:46:50 -0500 Subject: [PATCH 036/401] shell: Fail if building --with-libedit and can't find libedit Previously, configure --with-libedit would only fail in the case where libedit is available but its header file histedit.h is not. Fixes: 13537aaa484b ("[BUILD] Added --with-libedit option to...") Signed-off-by: Herbert Xu --- configure.ac | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e9ae792..44f2f95 100644 --- a/configure.ac +++ b/configure.ac @@ -182,7 +182,8 @@ if test "$with_libedit" = "yes"; then AC_CHECK_LIB(edit, history_init, [ AC_CHECK_HEADER([histedit.h], [use_libedit="yes"], AC_MSG_ERROR( - [Can't find required header files.]))]) + [Can't find required header files.]))], [ + AC_MSG_ERROR([Can't find libedit.])]) fi if test "$use_libedit" != "yes"; then AC_DEFINE([SMALL], 1, [Define if you build with -DSMALL]) From 41d875fa0941b4c827c6b598df2aa9ffb868183f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 23 Dec 2020 19:12:37 +1100 Subject: [PATCH 037/401] input: Clear unget on RESET On Sat, Dec 19, 2020 at 02:23:44PM +0100, Denys Vlasenko wrote: > Current git: > > $ ;l > dash: 1: Syntax error: ";" unexpected > $ s > COPYING ChangeLog.O Makefile.am aclocal.m4 autom4te.cache > config.h config.log configure dash > dollar_altvalue1.tests missing stamp-h1 > ChangeLog Makefile Makefile.in autogen.sh compile > config.h.in config.status configure.ac depcomp install-sh > src trace This patch fixes it by clearing ungetc on RESET. Fixes: 17db43b58415 ("input: Allow two consecutive calls to pungetc") Reported-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/input.c b/src/input.c index 4987732..d7c101b 100644 --- a/src/input.c +++ b/src/input.c @@ -87,6 +87,7 @@ INIT { RESET { /* clear input buffer */ basepf.lleft = basepf.nleft = 0; + basepf.unget = 0; popallfiles(); } From 9abaa470e60d6ac90650b0337db5d867c7f08864 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jan 2021 15:45:12 +1100 Subject: [PATCH 038/401] jobs: Block signals during tcsetpgrp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Harald van Dijk wrote: > On 19/12/2020 22:21, Steffen Nurpmeso wrote: >> Steffen Nurpmeso wrote in >> <20201219172838.1B-WB%steffen@sdaoden.eu>: >> |Long story short, after falsely accusing BSD make of not working >> >> After dinner i shortened it a bit more, and attach it again, ok? >> It is terrible, but now less redundant than before. >> Sorry for being so terse, that problem crosses my head for about >> a week, and i was totally mislead and if you bang your head >> against the wall so many hours bugs or misbehaviours in a handful >> of other programs is not the expected outcome. > > I think a minimal test case is simply > > all: > $(SHELL) -c 'trap "echo TTOU" TTOU; set -m; echo all good' > > unless I accidentally oversimplified. > > The SIGTTOU is caused by setjobctl's xtcsetpgrp(fd, pgrp) call to make > its newly started process group the foreground process group when job > control is enabled, where xtcsetpgrp is a wrapper for tcsetpgrp. (That's > in dash, the other variants may have some small differences.) tcsetpgrp > has this little bit in its specification: > > Attempts to use tcsetpgrp() from a process which is a member of > a background process group on a fildes associated with its con‐ > trolling terminal shall cause the process group to be sent a > SIGTTOU signal. If the calling thread is blocking SIGTTOU sig‐ > nals or the process is ignoring SIGTTOU signals, the process > shall be allowed to perform the operation, and no signal is > sent. > > Ordinarily, when job control is enabled, SIGTTOU is ignored. However, > when a trap action is specified for SIGTTOU, the signal is not ignored, > and there is no blocking in place either, so the tcsetpgrp() call is not > allowed. > > The lowest impact change to make here, the one that otherwise preserves > the existing shell behaviour, is to block signals before calling > tcsetpgrp and unblocking them afterwards. This ensures SIGTTOU does not > get raised here, but also ensures that if SIGTTOU is sent to the shell > for another reason, there is no window where it gets silently ignored. > > Another way to fix this is by not trying to make the shell start a new > process group, or at least not make it the foreground process group. > Most other shells appear to not try to do this. This patch implements the blocking of SIGTTOU (and everything else) while we call tcsetpgrp. Reported-by: Steffen Nurpmeso Signed-off-by: Herbert Xu --- src/jobs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/jobs.c b/src/jobs.c index 516786f..809f37c 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1512,7 +1512,13 @@ showpipe(struct job *jp, struct output *out) STATIC void xtcsetpgrp(int fd, pid_t pgrp) { - if (tcsetpgrp(fd, pgrp)) + int err; + + sigblockall(NULL); + err = tcsetpgrp(fd, pgrp); + sigclearmask(); + + if (err) sh_error("Cannot set tty process group (%s)", strerror(errno)); } #endif From 802ebd48c8febb1c1b4ce4e438518bf64677c078 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jan 2021 17:11:19 +1100 Subject: [PATCH 039/401] jobs: Always reset SIGINT/SIGQUIT handlers On Fri, Jan 08, 2021 at 08:55:41PM +0000, Harald van Dijk wrote: > On 18/05/2018 19:39, Herbert Xu wrote: > > This patch adds basic vfork support for the case of a simple command. > > ... @@ -879,17 +892,30 @@ forkchild(struct job *jp, union node *n, int > > mode) > > } > > } > > if (!oldlvl && iflag) { > > - setsignal(SIGINT); > > - setsignal(SIGQUIT); > > + if (mode != FORK_BG) { > > + setsignal(SIGINT); > > + setsignal(SIGQUIT); > > + } > > setsignal(SIGTERM); > > } > > + > > + if (lvforked) > > + return; > > + > > for (jp = curjob; jp; jp = jp->prev_job) > > freejob(jp); > > } > > This leaves SIGQUIT ignored in background jobs in interactive shells. > > ENV= dash -ic 'dash -c "kill -QUIT \$\$; echo huh" & wait' > > As of dash 0.5.11, this prints "huh". Before, the subprocess process killed > itself before it could print anything. Other shells do not leave SIGQUIT > ignored. > > (In a few other shells, this also prints "huh", but in those other shells, > that is because the inner shell chooses to ignore SIGQUIT, not because the > outer shell leaves it ignored.) Thanks for catching this. I have no idea how that got in there and it makes no sense whatsoever. This patch removes the if conditional. Fixes: e94a964e7dd0 ("eval: Add vfork support") Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/jobs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 809f37c..f3b9ffc 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -892,10 +892,8 @@ static void forkchild(struct job *jp, union node *n, int mode) } } if (!oldlvl && iflag) { - if (mode != FORK_BG) { - setsignal(SIGINT); - setsignal(SIGQUIT); - } + setsignal(SIGINT); + setsignal(SIGQUIT); setsignal(SIGTERM); } From de368ab886309d326a5984e565c7102cdf8b7858 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 May 2021 15:19:23 +0800 Subject: [PATCH 040/401] eval: Do not cache value of eflag in evaltree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patrick Brünn wrote: > > Since we are migrating to Debian bullseye, we discovered a new behavior > with our scripts, which look like this: >>#!/bin/sh >>cleanup() { >> set +e^M >> rmdir "" >>} >>set -eu >>trap 'cleanup' EXIT INT TERM >>echo 'Hello world!' > > With old dash v0.5.10.2 this script would return 0 as we expected it. > But since commit 62cf6955f8abe875752d7163f6f3adbc7e49ebae it returns > the last exit code of our cleanup function. > Reverting that commit gives a merge conflict, but it seems to fix _our_ > problem. As that topic appears too complex to us I want to ask the > experts here: > > Is this change in behavior intended, by dash? > > Our workaround at the moment would be: >>trap 'cleanup || true' EXIT INT TERM Thanks for the report. This is actually a fairly old bug with set -e that's just been exposed by the exit status change. What's really happening is that cleanup itself is triggering a set -e exit incorrectly because evaltree cached the value of eflag prior to the function call. This patch should fix the problem. Reported-by: Patrick Brünn Signed-off-by: Herbert Xu Tested-by: Patrick Brünn Signed-off-by: Herbert Xu --- src/eval.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/eval.c b/src/eval.c index 9476fbb..3337f71 100644 --- a/src/eval.c +++ b/src/eval.c @@ -252,18 +252,10 @@ evaltree(union node *n, int flags) popredir(0); goto setstatus; case NCMD: -#ifdef notyet - if (eflag && !(flags & EV_TESTED)) - checkexit = ~0; - status = evalcommand(n, flags, (struct backcmd *)NULL); - goto setstatus; -#else evalfn = evalcommand; checkexit: - if (eflag && !(flags & EV_TESTED)) - checkexit = ~0; + checkexit = ~flags & EV_TESTED; goto calleval; -#endif case NFOR: evalfn = evalfor; goto calleval; @@ -323,7 +315,7 @@ evaltree(union node *n, int flags) out: dotrap(); - if (checkexit & status) + if (eflag && checkexit && status) goto exexit; if (flags & EV_EXIT) { From 6f6d1f2da03468c0e131fdcbdcfa9771ffca2614 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 26 May 2021 13:49:39 +0800 Subject: [PATCH 041/401] shell: Call CHECK_DECL on stat64 On macOS it is possible to find stat64 at link-time but not at compile-time. To make the build process more robust we should check for the header file as well as the library. Reported-by: Saagar Jha Signed-off-by: Herbert Xu --- configure.ac | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 44f2f95..466df4c 100644 --- a/configure.ac +++ b/configure.ac @@ -140,11 +140,12 @@ if test "$ac_cv_func_signal" != yes; then fi dnl Check for stat64 (dietlibc/klibc). -AC_CHECK_FUNC(stat64,, [ +AC_CHECK_DECL(stat64, AC_CHECK_FUNC(stat64)) +if test "$ac_cv_func_stat64" != yes; then AC_DEFINE(fstat64, fstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(lstat64, lstat, [64-bit operations are the same as 32-bit]) AC_DEFINE(stat64, stat, [64-bit operations are the same as 32-bit]) -]) +fi AC_CHECK_FUNC(glob64,, [ AC_DEFINE(glob64_t, glob_t, [64-bit operations are the same as 32-bit]) From 057cd650a4edd5856213d431a974ff35c6594489 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Jun 2021 17:57:19 +0800 Subject: [PATCH 042/401] parser: Fix VSLENGTH parsing with trailing garbage On Sat, Jun 19, 2021 at 02:44:46PM +0200, Denys Vlasenko wrote: > > CTLVAR and CTLBACKQ are not properly handled if encountered > inside {$#...}. Testcase: > > dash -c "`printf 'echo ${#1\x82}'`" 00 111 222 > > It should execute "echo ${#1 }" and thus print "3" > (the length of $1, which is "111"). > > Instead, it segfaults. > > (Ideally, it should fail since "1 " is not a valid > variable name, but currently dash accepts e.g. "${#1abc}" > as if it is "${#1}bc". A separate, less serious bug...). In fact these two bugs are one and the same. This patch fixes both by detecting the invalid substitution and not emitting it into the node tree. Incidentally this reveals a bug in how we parse ${#10} that got introduced recently, which is also fixed here. Reported-by: Denys Vlasenko Fixes: 7710a926b321 ("parser: Only accept single-digit parameter...") Signed-off-by: Herbert Xu --- src/parser.c | 5 ++++- src/parser.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 3c80d17..13c2df5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1252,7 +1252,8 @@ parsesub: { do { STPUTC(c, out); c = pgetc_eatbnl(); - } while (!subtype && is_digit(c)); + } while ((subtype <= 0 || subtype >= VSLENGTH) && + is_digit(c)); } else if (c != '}') { int cc = c; @@ -1312,6 +1313,8 @@ parsesub: { break; } } else { + if (subtype == VSLENGTH && c != '}') + subtype = 0; badsub: pungetc(); } diff --git a/src/parser.h b/src/parser.h index 524ac1c..7d2749b 100644 --- a/src/parser.h +++ b/src/parser.h @@ -62,6 +62,7 @@ #define VSTRIMLEFT 0x8 /* ${var#pattern} */ #define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */ #define VSLENGTH 0xa /* ${#var} */ +/* VSLENGTH must come last. */ /* values of checkkwd variable */ #define CHKALIAS 0x1 From dcf4ee38025a06e7392063f02f37afc0c08e0e2f Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Sun, 5 Sep 2021 09:36:11 +0100 Subject: [PATCH 043/401] input: Remove special case for unget EOF Commit 17db43b5841504b694203952fb0e82246c06a97f (input: Allow two consecutive calls to pungetc) ensures that EOF is handled like any other character with respect to unget. As a result it's possible to remove the special case for unget of EOF in preadbuffer. Signed-off-by: Ron Yorston Signed-off-by: Herbert Xu --- src/input.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/input.c b/src/input.c index d7c101b..ec075f5 100644 --- a/src/input.c +++ b/src/input.c @@ -58,7 +58,6 @@ #include "myhistedit.h" #endif -#define EOF_NLEFT -99 /* value of parsenleft when EOF pushed back */ #define IBUFSIZ (BUFSIZ + 1) @@ -220,9 +219,8 @@ preadfd(void) * Refill the input buffer and return the next input character: * * 1) If a string was pushed back on the input, pop it; - * 2) If an EOF was pushed back (parsenleft == EOF_NLEFT) or we are reading - * from a string so we can't refill the buffer, return EOF. - * 3) If the is more stuff in this buffer, use it else call read to fill it. + * 2) If we are reading from a string we can't refill the buffer, return EOF. + * 3) If there is more stuff in this buffer, use it else call read to fill it. * 4) Process input up to the next newline, deleting nul characters. */ @@ -239,8 +237,7 @@ static int preadbuffer(void) popstring(); return __pgetc(); } - if (unlikely(parsefile->nleft == EOF_NLEFT || - parsefile->buf == NULL)) + if (parsefile->buf == NULL) return PEOF; flushall(); @@ -248,7 +245,7 @@ static int preadbuffer(void) if (more <= 0) { again: if ((more = preadfd()) <= 0) { - parsefile->lleft = parsefile->nleft = EOF_NLEFT; + parsefile->lleft = parsefile->nleft = 0; return PEOF; } } From 8f9cca055bc661c4c690a5f5e1ca71370d129bc3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 19 Jan 2022 16:37:54 +1100 Subject: [PATCH 044/401] expand: Always quote caret when using fnmatch This patch forces ^ to be a literal when we use fnmatch. In order to allow for the extra space to quote the caret, the function _rmescapes will allocate up to twice the memory if the flag RMESCAPE_GLOB is set. Fixes: 7638476c18f2 ("shell: Enable fnmatch/glob by default") Reported-by: Christoph Anton Mitterer Suggested-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/expand.c | 66 ++++++++++++++++++++++++++++++-------------------- src/mystring.c | 7 +++++- src/mystring.h | 9 ++++++- 3 files changed, 54 insertions(+), 28 deletions(-) diff --git a/src/expand.c b/src/expand.c index aea5cc4..9906d8a 100644 --- a/src/expand.c +++ b/src/expand.c @@ -135,8 +135,6 @@ STATIC int pmatch(const char *, const char *); #endif static size_t cvtnum(intmax_t num, int flags); STATIC size_t esclen(const char *, const char *); -STATIC char *scanleft(char *, char *, char *, char *, int, int); -STATIC char *scanright(char *, char *, char *, char *, int, int); STATIC void varunset(const char *, const char *, const char *, int) __attribute__((__noreturn__)); @@ -541,10 +539,8 @@ expbackq(union node *cmd, int flag) } -STATIC char * -scanleft( - char *startp, char *rmesc, char *rmescend, char *str, int quotes, - int zero +static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, + char *str, int quotes, int zero ) { char *loc; char *loc2; @@ -573,16 +569,14 @@ scanleft( } -STATIC char * -scanright( - char *startp, char *rmesc, char *rmescend, char *str, int quotes, - int zero +static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, + char *str, int quotes, int zero ) { int esc = 0; char *loc; char *loc2; - for (loc = str - 1, loc2 = rmescend; loc >= startp; loc2--) { + for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) { int match; char c = *loc2; const char *s = loc2; @@ -618,7 +612,9 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, long amount; char *rmesc, *rmescend; int zero; - char *(*scan)(char *, char *, char *, char *, int , int); + char *(*scan)(char *, char *, char *, char *, char *, int , int); + int nstrloc = strloc; + char *endp; char *p; p = argstr(start, (flag & EXP_DISCARD) | EXP_TILDE | @@ -646,33 +642,40 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, abort(); #endif - rmesc = startp; rmescend = stackblock() + strloc; + str = preglob(rmescend, FNMATCH_IS_ENABLED ? + RMESCAPE_ALLOC | RMESCAPE_GROW : 0); + if (FNMATCH_IS_ENABLED) { + startp = stackblock() + startloc; + rmescend = stackblock() + strloc; + nstrloc = str - (char *)stackblock(); + } + + rmesc = startp; if (quotes) { rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) { + if (rmesc != startp) rmescend = expdest; - startp = stackblock() + startloc; - } + startp = stackblock() + startloc; + str = stackblock() + nstrloc; } rmescend--; - str = stackblock() + strloc; - preglob(str, 0); /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ zero = subtype >> 1; /* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */ scan = (subtype & 1) ^ zero ? scanleft : scanright; - loc = scan(startp, rmesc, rmescend, str, quotes, zero); + endp = stackblock() + strloc - 1; + loc = scan(startp, endp, rmesc, rmescend, str, quotes, zero); if (loc) { if (zero) { - memmove(startp, loc, str - loc); - loc = startp + (str - loc) - 1; + memmove(startp, loc, endp - loc); + loc = startp + (endp - loc); } *loc = '\0'; } else - loc = str - 1; + loc = endp; out: amount = loc - expdest; @@ -1501,7 +1504,9 @@ msort(struct strlist *list, int len) STATIC inline int patmatch(char *pattern, const char *string) { - return pmatch(preglob(pattern, 0), string); + return pmatch(preglob(pattern, FNMATCH_IS_ENABLED ? + RMESCAPE_ALLOC | RMESCAPE_GROW : 0), + string); } @@ -1654,15 +1659,22 @@ _rmescapes(char *str, int flag) int notescaped; int globbing; - p = strpbrk(str, qchars); + p = strpbrk(str, cqchars); if (!p) { return str; } q = p; r = str; + globbing = flag & RMESCAPE_GLOB; + if (flag & RMESCAPE_ALLOC) { size_t len = p - str; - size_t fulllen = len + strlen(p) + 1; + size_t fulllen = strlen(p); + + if (FNMATCH_IS_ENABLED && globbing) + fulllen *= 2; + + fulllen += len + 1; if (flag & RMESCAPE_GROW) { int strloc = str - (char *)stackblock(); @@ -1680,7 +1692,6 @@ _rmescapes(char *str, int flag) q = mempcpy(q, str, len); } } - globbing = flag & RMESCAPE_GLOB; notescaped = globbing; while (*p) { if (*p == (char)CTLQUOTEMARK) { @@ -1693,8 +1704,11 @@ _rmescapes(char *str, int flag) notescaped = 0; goto copy; } + if (FNMATCH_IS_ENABLED && *p == '^') + goto add_escape; if (*p == (char)CTLESC) { p++; +add_escape: if (notescaped) *q++ = '\\'; } diff --git a/src/mystring.c b/src/mystring.c index de624b8..ed3c8f6 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -62,7 +62,12 @@ const char spcstr[] = " "; const char snlfmt[] = "%s\n"; const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL, '@', '=', CTLQUOTEMARK, '\0' }; -const char qchars[] = { CTLESC, CTLQUOTEMARK, 0 }; +const char cqchars[] = { +#ifdef HAVE_FNMATCH + '^', +#endif + CTLESC, CTLQUOTEMARK, 0 +}; const char illnum[] = "Illegal number: %s"; const char homestr[] = "HOME"; diff --git a/src/mystring.h b/src/mystring.h index 083ea98..564b911 100644 --- a/src/mystring.h +++ b/src/mystring.h @@ -37,11 +37,18 @@ #include #include +#ifdef HAVE_FNMATCH +#define FNMATCH_IS_ENABLED 1 +#else +#define FNMATCH_IS_ENABLED 0 +#endif + extern const char snlfmt[]; extern const char spcstr[]; extern const char dolatstr[]; #define DOLATSTRLEN 6 -extern const char qchars[]; +extern const char cqchars[]; +#define qchars (cqchars + FNMATCH_IS_ENABLED) extern const char illnum[]; extern const char homestr[]; From 9036ee8d7140f9f34988f00468dda94a69ace8f6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 5 Dec 2022 23:02:01 +0800 Subject: [PATCH 045/401] expand: Add ifsfree to expand to fix a logic error that causes a buffer over-read On Mon, Jun 20, 2022 at 02:27:10PM -0400, Alex Gorinson wrote: > Due to a logic error in the ifsbreakup function in expand.c if a > heredoc and normal command is run one after the other by means of a > semi-colon, when the second command drops into ifsbreakup the command > will be evaluated with the ifslastp/ifsfirst struct that was set when > the here doc was evaluated. This results in a buffer over-read that > can leak the program's heap, stack, and arena addresses which can be > used to beat ASLR. > > Steps to Reproduce: > First bug: > cmd args: ~/exampleDir/example> dash > $ M='AAAAAAAAAAAAAAAAA' > $ q00(){ > $ <<000;echo > $ ${D?$M$M$M$M$M$M} > $ 000 > $ } > $ q00 should be echo'd out; this works with ash, busybox ash, and dash and > with all option args.> > > Patch: > Adding the following to expand.c will fix both bugs in one go. > (Thank you to Harald van Dijk and Michael Greenberg for doing the > heavy lifting for this patch!) > ========================== > --- a/src/expand.c > +++ b/src/expand.c > @@ -859,6 +859,7 @@ > if (discard) > return -1; > > +ifsfree(); > sh_error("Bad substitution"); > } > > @@ -1739,6 +1740,7 @@ > } else > msg = umsg; > } > +ifsfree(); > sh_error("%.*s: %s%s", end - var - 1, var, msg, tail); > } > ========================== Thanks for the report! I think it's better to add the ifsfree() call to the exception handling path as other sh_error calls may trigger this too. Reported-by: Alex Gorinson Signed-off-by: Herbert Xu --- src/expand.c | 10 ++++++++++ src/expand.h | 3 +++ src/parser.c | 4 +--- src/redir.c | 4 +--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/expand.c b/src/expand.c index 9906d8a..fe6215a 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1777,6 +1777,16 @@ varunset(const char *end, const char *var, const char *umsg, int varflags) sh_error("%.*s: %s%s", end - var - 1, var, msg, tail); } +void restore_handler_expandarg(struct jmploc *savehandler, int err) +{ + handler = savehandler; + if (err) { + if (exception != EXERROR) + longjmp(handler->loc, 1); + ifsfree(); + } +} + #ifdef mkinit INCLUDE "expand.h" diff --git a/src/expand.h b/src/expand.h index c44b848..49a18f9 100644 --- a/src/expand.h +++ b/src/expand.h @@ -62,7 +62,9 @@ struct arglist { #define EXP_DISCARD 0x400 /* discard result of expansion */ +struct jmploc; union node; + void expandarg(union node *, struct arglist *, int); #define rmescapes(p) _rmescapes((p), 0) char *_rmescapes(char *, int); @@ -71,6 +73,7 @@ void recordregion(int, int, int); void removerecordregions(int); void ifsbreakup(char *, int, struct arglist *); void ifsfree(void); +void restore_handler_expandarg(struct jmploc *savehandler, int err); /* From arith.y */ intmax_t arith(const char *); diff --git a/src/parser.c b/src/parser.c index 13c2df5..bf94697 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1589,9 +1589,7 @@ expandstr(const char *ps) result = stackblock(); out: - handler = savehandler; - if (err && exception != EXERROR) - longjmp(handler->loc, 1); + restore_handler_expandarg(savehandler, err); doprompt = saveprompt; unwindfiles(file_stop); diff --git a/src/redir.c b/src/redir.c index 93abba3..5a5835c 100644 --- a/src/redir.c +++ b/src/redir.c @@ -473,9 +473,7 @@ redirectsafe(union node *redir, int flags) handler = &jmploc; redirect(redir, flags); } - handler = savehandler; - if (err && exception != EXERROR) - longjmp(handler->loc, 1); + restore_handler_expandarg(savehandler, err); RESTOREINT(saveint); return err; } From 7148b08ba58e29e8e57355230f467d269aa61b9c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 6 Dec 2022 16:49:14 +0800 Subject: [PATCH 046/401] eval: Always set exitstatus in evaltree There is no harm in setting exitstatus unconditionally in evaltree. Signed-off-by: Herbert Xu --- src/eval.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/eval.c b/src/eval.c index 3337f71..eda251e 100644 --- a/src/eval.c +++ b/src/eval.c @@ -239,7 +239,7 @@ evaltree(union node *n, int flags) #endif case NNOT: status = !evaltree(n->nnot.com, EV_TESTED); - goto setstatus; + break; case NREDIR: errlinno = lineno = n->nredir.linno; if (funcline) @@ -250,7 +250,7 @@ evaltree(union node *n, int flags) evaltree(n->nredir.n, flags & EV_TESTED); if (n->nredir.redirect) popredir(0); - goto setstatus; + break; case NCMD: evalfn = evalcommand; checkexit: @@ -292,7 +292,7 @@ evaltree(union node *n, int flags) evalfn = evaltree; calleval: status = evalfn(n, flags); - goto setstatus; + break; case NIF: status = evaltree(n->nif.test, EV_TESTED); if (evalskip) @@ -305,13 +305,14 @@ evaltree(union node *n, int flags) goto evaln; } status = 0; - goto setstatus; + break; case NDEFUN: defun(n); -setstatus: - exitstatus = status; break; } + + exitstatus = status; + out: dotrap(); From f42ee97f9e6fa15b7b6d85bb2faace4cadc1613e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Dec 2022 11:59:20 +0800 Subject: [PATCH 047/401] eval: Check eflag after redirection error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > > This is a POSIX violation, and quite a grave one at that: > set -e is oft[1] used to guard against precisely this type of error! > > The same happens if set -e is executed. > > All quotes POSIX.1, Issue 7, TC2: > sh, OPTIONS: > > The -a, -b, -C, -e, -f, -m, -n, -o option, -u, -v, and -x options > > are described as part of the set utility in Special Built-In > > Utilities. > > set, DESCRIPTION, -e: > > When this option is on, when any command fails (for any of the > > reasons listed in Consequences of Shell Errors or by returning an > > exit status greater than zero), the shell immediately shall exit, as > > if by executing the exit special built-in utility with no arguments, > > with the following exceptions: > > > > 1. The failure of any individual command in a multi-command pipeline > > shall not cause the shell to exit. Only the failure of the > > pipeline itself shall be considered. > > 2. The -e setting shall be ignored when executing the compound list > > following the while, until, if, or elif reserved word, a pipeline > > beginning with the ! reserved word, or any command of an AND-OR > > list other than the last. > > 3. If the exit status of a compound command other than a subshell > > command was the result of a failure while -e was being ignored, > > then -e shall not apply to this command. > > XCU, 2.9.4: Shell Command Language, Shell Commands, Compound Commands: > The while Loop: > > The format of the while loop is as follows: > > > > while compound-list-1 > > do > > compound-list-2 > > done > (until is equivalent). > The if Conditional Construct: > > The format for the if construct is as follows: > > > > if compound-list > > then > > compound-list > > [elif compound-list > > then > > compound-list] ... > > [else > > compound-list] > > fi > > It follows, therefore, that > * Exception 1. does not apply as there is no pipeline > * Exception 2. does not apply, as the redirection does /not/ follow > "while" or "if" directly and is /not/ part of the conditional > compound-list > * in the "for" case, there is no such provision, so this is likely not > a confusion w.r.t. the conditional compound-lists > * Exception 3. does not apply as -e was not being ignored while the > compound commands were being executed (indeed, the compound commands > do not run at all, as evidenced by the program terminating) > > [1]: https://salsa.debian.org/glibc-team/glibc/-/merge_requests/6#note_329899 > ----- End forwarded message ----- Yes we should check the exit status after redirections. Reported-by: наб Signed-off-by: Herbert Xu --- src/eval.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/eval.c b/src/eval.c index eda251e..7aa5cc2 100644 --- a/src/eval.c +++ b/src/eval.c @@ -246,15 +246,18 @@ evaltree(union node *n, int flags) lineno -= funcline - 1; expredir(n->nredir.redirect); pushredir(n->nredir.redirect); - status = redirectsafe(n->nredir.redirect, REDIR_PUSH) ?: - evaltree(n->nredir.n, flags & EV_TESTED); + status = redirectsafe(n->nredir.redirect, REDIR_PUSH); + if (status) + checkexit = EV_TESTED; + else + status = evaltree(n->nredir.n, flags & EV_TESTED); if (n->nredir.redirect) popredir(0); break; case NCMD: evalfn = evalcommand; checkexit: - checkexit = ~flags & EV_TESTED; + checkexit = EV_TESTED; goto calleval; case NFOR: evalfn = evalfor; @@ -316,7 +319,7 @@ evaltree(union node *n, int flags) out: dotrap(); - if (eflag && checkexit && status) + if (eflag && (~flags & checkexit) && status) goto exexit; if (flags & EV_EXIT) { From f0d57fded5b1a4b0aa6f0571a316cb9482ef3af8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Dec 2022 16:48:26 +0800 Subject: [PATCH 048/401] parser: Add VSBIT to ensure subtype is never zero Harald van Dijk wrote: > On 21/11/2022 13:08, Harald van Dijk wrote: >> On 21/11/2022 02:38, Christoph Anton Mitterer wrote: >>> reject_filtered_cmd() >>> { >>> reject_and_die "disallowed command${restrict_path_list:+ >>> (restrict-path: \"${restrict_path_list//|/\", \"}\")}" >>> } >>> >>> reject_filtered_cmd >>[...] >> This should either result in the ${...//...} being skipped, or the "Bad >> substitution" error. Currently, what happens instead is it attempts, but >> fails, to skip the ${...//...}. > > The reason it fails is because the word is cut off. > > Variable substitutions are encoded as a CTLVAR special character, > followed by a byte indicating the type of substitution, followed by the > rest of the substitution data. The type of substitution is the VSNORMAL, > VSMINUS, etc. seen in parser.h. An invalid substitution is encoded as a > value of 0. > > When we define a function, we clone the function body in order to > preserve it. Cloning the function body is done by cloning each node. > Cloning a "word" node (NARG) involves copying the characters that make > up the word up to and including the terminating null byte. > > These two interact badly. The invalid substitution is seen as > terminating the word, the rest of the word is not copied, but the > expansion code does not have any way of seeing that anything got cut off > and happily continues attempting to process the rest of the word. > > If dash decides to issue an error in this case, this is not a problem: > the null byte is guaranteed to be copied, and if processing is > guaranteed to stop if a null byte is encountered, everything works out. > > If dash decides to not issue an error in this case, the encoding of bad > substitutions needs to change to a non-null byte. It appears that if we > set the byte to VSNUL, the expansion logic is already able to handle it, > but I have not tested this extensively. Thanks for the analysis Harald! This patch does basically what you've described except it uses a new bit to avoid any confusion with a genuine VSNUL. Fixes: 3df3edd13389 ("[PARSER] Report substition errors at...") Reported-by: Christoph Anton Mitterer Signed-off-by: Herbert Xu Cheers, Signed-off-by: Herbert Xu --- src/expand.c | 2 +- src/mystring.c | 2 +- src/parser.c | 2 +- src/parser.h | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/expand.c b/src/expand.c index fe6215a..2ed02d6 100644 --- a/src/expand.c +++ b/src/expand.c @@ -704,7 +704,7 @@ evalvar(char *p, int flag) int discard; int quoted; - varflags = *p++; + varflags = *p++ & ~VSBIT; subtype = varflags & VSTYPE; quoted = flag & EXP_QUOTED; diff --git a/src/mystring.c b/src/mystring.c index ed3c8f6..f651521 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -60,7 +60,7 @@ char nullstr[1]; /* zero length string */ const char spcstr[] = " "; const char snlfmt[] = "%s\n"; -const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL, '@', '=', +const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL | VSBIT, '@', '=', CTLQUOTEMARK, '\0' }; const char cqchars[] = { #ifdef HAVE_FNMATCH diff --git a/src/parser.c b/src/parser.c index bf94697..a552c47 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1333,7 +1333,7 @@ parsesub: { synstack->dblquote = newsyn != BASESYNTAX; } - *((char *)stackblock() + typeloc) = subtype; + *((char *)stackblock() + typeloc) = subtype | VSBIT; if (subtype != VSNORMAL) { synstack->varnest++; if (synstack->dblquote) diff --git a/src/parser.h b/src/parser.h index 7d2749b..729c15c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -50,6 +50,7 @@ /* variable substitution byte (follows CTLVAR) */ #define VSTYPE 0x0f /* type of variable substitution */ #define VSNUL 0x10 /* colon--treat the empty string as unset */ +#define VSBIT 0x20 /* Ensure subtype is not zero */ /* values of VSTYPE field */ #define VSNORMAL 0x1 /* normal variable: $var or ${var} */ From d5d616126ccc309d441fa82bb6a5bbf3377bd0e9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Dec 2022 16:55:07 +0800 Subject: [PATCH 049/401] eval: Test evalskip before flipping status for NNOT On Tue, Dec 06, 2022 at 10:15:03AM +0000, Harald van Dijk wrote: > > There is a long-standing bug that may or may not be harder to fix if this > patch goes in, depending on how you want to fix it. Here's a script that > already fails on current dash. > > f() { > if ! return 0 > then : > fi > } > f > > This should return 0, and does return 0 in bash and ksh (and almost all > shells), but returns 1 in dash. > > There are a few possible ways of fixing it. Some of them rely on continuing > to conditionally set exitstatus. This can be fixed simply by testing evalskip prior to flipping the status. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/eval.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/eval.c b/src/eval.c index 7aa5cc2..fa43b68 100644 --- a/src/eval.c +++ b/src/eval.c @@ -238,7 +238,9 @@ evaltree(union node *n, int flags) break; #endif case NNOT: - status = !evaltree(n->nnot.com, EV_TESTED); + status = evaltree(n->nnot.com, EV_TESTED); + if (!evalskip) + status = !status; break; case NREDIR: errlinno = lineno = n->nredir.linno; From 4bbf8721a3ac6401ced6a0454956801f6ba37256 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 11 Dec 2022 14:33:43 +0800 Subject: [PATCH 050/401] Release 0.5.12. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 466df4c..52aa429 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([dash],[0.5.11]) +AC_INIT([dash],[0.5.12]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) From 07cad228ae4d1567aeb6d13d2af83d0a28d13b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 14 Dec 2022 02:06:05 +0100 Subject: [PATCH 051/401] parser: Invalid redirections are run-time, not syntax errors This fixes a long-standing bug where echo 'echo >&a' | sh errors out with sh: 2: Syntax error: Bad fd number despite the error being on line 1 This patch makes the error sh: 1: Bad fd number: a as expected Adapted-from: https://github.com/hvdijk/gwsh/commit/d279523041c1c380d64b6dec7760feba20bbf6b5 Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index a552c47..8a06b9e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -615,7 +615,7 @@ void fixredir(union node *n, const char *text, int err) else { if (err) - synerror("Bad fd number"); + sh_error("Bad fd number: %s", text); else n->ndup.vname = makename(); } From 8dbbff647de1e3a1510866fa1ec983c3b88f432c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 14 Dec 2022 03:51:13 +0100 Subject: [PATCH 052/401] builtin: Don't early-exit when first hash -r is found This fixes incorrectly-accepted "hash -rv" being equivalent to hash -r (well, hash -r[literally anything] being equivalent to hash -r) Also remove -v from the manual, it doesn't appear to have ever existed Link: https://bugs.debian.org/819829 Signed-off-by: Herbert Xu --- src/dash.1 | 6 ++---- src/exec.c | 8 +++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index ff02237..3e09090 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1441,7 +1441,8 @@ cmd \-a \-c arg file file cmd \-carg -a file file cmd \-a \-carg \-\- file file .Ed -.It hash Fl rv Ar command ... +.It hash Op Ar command ... +.It hash Fl r The shell maintains a hash table which remembers the locations of commands. With no arguments whatsoever, @@ -1457,9 +1458,6 @@ With arguments, the .Ic hash command removes the specified commands from the hash table (unless they are functions) and then locates them. -With the -.Fl v -option, hash prints the locations of the commands as it finds them. The .Fl r option causes the hash command to delete all the entries in the hash table diff --git a/src/exec.c b/src/exec.c index 87354d4..d7a1f53 100644 --- a/src/exec.c +++ b/src/exec.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #ifdef HAVE_PATHS_H #include @@ -271,11 +272,16 @@ hashcmd(int argc, char **argv) int c; struct cmdentry entry; char *name; + bool clear; - while ((c = nextopt("r")) != '\0') { + clear = false; + while ((c = nextopt("r")) != '\0') + clear = true; + if(clear) { clearcmdentry(); return 0; } + if (*argptr == NULL) { for (pp = cmdtable ; pp < &cmdtable[CMDTABLESIZE] ; pp++) { for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) { From 5c55b53f2b028228837f712b3c3e13363c3310e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 14 Dec 2022 17:52:04 +0100 Subject: [PATCH 053/401] man: Fix getopts documentation The explicit arguments were missing, also exchange expr subst for arithmetic and fix the spacing around Bell Labs Signed-off-by: Herbert Xu --- src/dash.1 | 13 +++++++++---- src/options.c | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index 3e09090..1683d43 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1342,13 +1342,12 @@ The number of previous commands that are accessible. .El .It fg Op Ar job Move the specified job or the current job to the foreground. -.It getopts Ar optstring var +.It getopts Ar optstring var Op Ar arg ... The .Tn POSIX .Ic getopts command, not to be confused with the -.Em Bell Labs --derived +.Em Bell Labs Ns -derived .Xr getopt 1 . .Pp The first argument should be a series of letters, each of which may be @@ -1386,6 +1385,12 @@ then .Ev OPTARG will be unset. .Pp +By default, the variables +.Va $1 , ... , $n +are inspected; if +.Ar arg Ns s +are specified, they'll be parsed instead. +.Pp .Va optstring is a string of recognized option letters (see .Xr getopt 3 ) . @@ -1430,7 +1435,7 @@ do \\?) echo $USAGE; exit 1;; esac done -shift `expr $OPTIND - 1` +shift $((OPTIND - 1)) .Ed .Pp This code will accept any of the following as equivalent: diff --git a/src/options.c b/src/options.c index a46c23b..3158498 100644 --- a/src/options.c +++ b/src/options.c @@ -410,7 +410,7 @@ getoptscmd(int argc, char **argv) char **optbase; if (argc < 3) - sh_error("Usage: getopts optstring var [arg]"); + sh_error("Usage: getopts optstring var [arg...]"); else if (argc == 3) { optbase = shellparam.p; if ((unsigned)shellparam.optind > shellparam.nparam + 1) { From d483fa794db46b4e09708b8c98b93d53bee6ed64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 15 Dec 2022 01:22:18 +0100 Subject: [PATCH 054/401] man: Document jobs builtin Link: https://bugs.debian.org/558607 Signed-off-by: Herbert Xu --- src/dash.1 | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/dash.1 b/src/dash.1 index 1683d43..6a9f673 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1467,6 +1467,20 @@ The .Fl r option causes the hash command to delete all the entries in the hash table except for functions. +.It jobs Oo Fl lp Oc Op Ar job ... +Display the status of all, or just the specified, +.Ar job Ns s : +.Bl -tag -compact -offset 5n -width "By default" +.It By default +display the job number, currency +.Pq Sy +- +status, if any, the job state, and its shell command. +.It Fl l +also output the PID of the group leader, and just the PID and shell commands +of other members of the job. +.It Fl p +Display only leader PIDs, one per line. +.El .It pwd Op Fl LP builtin command remembers what the current directory is rather than recomputing it each time. From 76d4cc6797251be200dfa25a2bcdfbaed5f6a47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 15 Dec 2022 01:22:54 +0100 Subject: [PATCH 055/401] man: Note chdir being the same builtin as cd Signed-off-by: Herbert Xu --- src/dash.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index 6a9f673..89a5a12 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1143,8 +1143,8 @@ Do not execute the command but search for the command and print the absolute pathname of utilities, the name for builtins or the expansion of aliases. .El -.It cd Ar - -.It Xo cd Op Fl LP +.It cd|chdir Ar - +.It Xo cd|chdir Op Fl LP .Op Ar directory .Xc Switch to the specified directory (default From 447bcdca000f218d56c6a6c5ef610e5d61a917cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 15 Dec 2022 01:23:01 +0100 Subject: [PATCH 056/401] man: Document kill builtin The manual now contains all built-ins Signed-off-by: Herbert Xu --- src/dash.1 | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/dash.1 b/src/dash.1 index 89a5a12..ce75ce3 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1481,6 +1481,33 @@ of other members of the job. .It Fl p Display only leader PIDs, one per line. .El +.It kill Oo Fl s Ar sigspec | Fl Ns Ar signum | Fl Ns Ar sigspec Oc Op Ar pid | job ... +Equivalent to +.Xr kill 1 , +but a +.Ar job +spec may also be specified. +Signals can be either case-insensitive names without +.Dv SIG +prefixes or decimal numbers; the default is +.Dv TERM . +.It kill Fl l Op Ar signum | exitstatus +List available signal names without the +.Dv SIG +prefix +.Pq Ar sigspec Ns s . +If +.Ar signum +specified, display just the +.Ar sigspec +for that signal. +If +.Ar exitstatus +specified +.Pq > Sy 128 , +display just the +.Ar sigspec +that caused it. .It pwd Op Fl LP builtin command remembers what the current directory is rather than recomputing it each time. From f1a72d429d3c0cf8449a5e8389f7991776d32c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 15 Dec 2022 01:25:54 +0100 Subject: [PATCH 057/401] man: Reword to avoid confusion v/v printf Ar argument[s]/arguments The current wording says that given printf a b c d a is the format, c and d are processed as noted, but b is unspecified Signed-off-by: Herbert Xu --- src/dash.1 | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index ce75ce3..98bb701 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1572,30 +1572,26 @@ With the option specified the output will be formatted suitably for non-interactive use. .\".Pp .It Xo printf Ar format -.Op Ar arguments ... +.Oo Ar value Oc Ns ... .Xc .Ic printf -formats and prints its arguments, after the first, under control -of the -.Ar format . -The -.Ar format -is a character string which contains three types of objects: plain characters, +formats and prints its arguments according to +.Ar format , +a character string which contains three types of objects: plain characters, which are simply copied to standard output, character escape sequences which are converted and copied to the standard output, and format specifications, each of which causes printing of the next successive -.Ar argument . +.Ar value . .Pp -The -.Ar arguments -after the first are treated as strings if the corresponding format is +Each +.Ar value +is treated as a string if the corresponding format specification is either .Cm b , -.Cm c +.Cm c , or .Cm s ; -otherwise it is evaluated as a C constant, with the following extensions: -.Pp +otherwise it is evaluated as a C constant, with the following additions: .Bl -bullet -offset indent -compact .It A leading plus or minus sign is allowed. @@ -1605,8 +1601,9 @@ If the leading character is a single or double quote, the value is the code of the next character. .El .Pp -The format string is reused as often as necessary to satisfy the -.Ar arguments . +The format string is reused as often as necessary until all +.Ar value Ns s +are consumed. Any extra format specifications are evaluated with zero or the null string. .Pp From fc61c2f1b0172c14ce7f6df65be755b54cee1875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 15 Dec 2022 01:25:59 +0100 Subject: [PATCH 058/401] man: printf 'X, X is a byte under dash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple issues: * the encoding is not always ASCII * what ASCII code is assigned to я * dash isn't internationalised (this is nonconformant but out of scope), and uses the next /byte/; in a UTF-8 locale: $ printf %d\\n \'ą 196 $ printf %d\\n \'я 196 this is in contrast to POSIX (and bash), which says: > If the leading character is a single-quote or double-quote, > the value shall be the numeric value in the underlying codeset > of the character following the single-quote or double-quote. (i.e. mbrtowc(&val, argv[n], ...)) Signed-off-by: Herbert Xu --- src/dash.1 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index 98bb701..d077e1d 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1596,9 +1596,7 @@ otherwise it is evaluated as a C constant, with the following additions: .It A leading plus or minus sign is allowed. .It -If the leading character is a single or double quote, the value is the -.Tn ASCII -code of the next character. +If the leading character is a single or double quote, the value of the next byte. .El .Pp The format string is reused as often as necessary until all From 2beac674c4862a7129eca3a8cb7d27cee7ff667b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 15 Dec 2022 21:51:19 +0100 Subject: [PATCH 059/401] man: Document false built-in Only true was documented, add false just below it (out of order, but so is true, and the grouping makes much more sense). Signed-off-by: Herbert Xu --- src/dash.1 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dash.1 b/src/dash.1 index d077e1d..f143a4a 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1095,6 +1095,8 @@ etc). .It : .It true A null command that returns a 0 (true) exit value. +.It false +A null command that returns a 1 (false) exit value. .It \&. file The commands in the specified file are read and executed by the shell. .It alias Op Ar name Ns Op Ar "=string ..." From 4bdefd16c6ea4b5b7c2b4dc2fccf5226401e13b7 Mon Sep 17 00:00:00 2001 From: Vincent Lefevre Date: Fri, 16 Dec 2022 18:20:19 +0100 Subject: [PATCH 060/401] builtin: Actually accept ulimit -r The original commit that added it supposes this works, but it only adds it to the ulimit -a listing and the manual, but doesn't allow it as an option. Fixes: 46abc8c6d8a5 ("[BUILTIN] Add support for ulimit -r") Link: https://bugs.debian.org/975326 Signed-off-by: Herbert Xu --- src/miscbltin.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/miscbltin.c b/src/miscbltin.c index 5ccbbcb..e553f9e 100644 --- a/src/miscbltin.c +++ b/src/miscbltin.c @@ -440,6 +440,9 @@ ulimitcmd(int argc, char **argv) #endif #ifdef RLIMIT_LOCKS "w" +#endif +#ifdef RLIMIT_RTPRIO + "r" #endif )) != '\0') switch (optc) { From c3b97c70d8ffec83122caf2bfd0489380610217c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 17 Dec 2022 16:17:17 +0100 Subject: [PATCH 061/401] man: Document ulimit -w And fix the synopsis. Fixes: 05c1076ba2d1 ("Initial import.)" Link: https://bugs.debian.org/850202 Signed-off-by: Herbert Xu --- src/dash.1 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dash.1 b/src/dash.1 index f143a4a..d3893bc 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -2162,7 +2162,7 @@ printed; for commands and tracked aliases the complete pathname of the command is printed. .It ulimit Xo .Op Fl H \*(Ba Fl S -.Op Fl a \*(Ba Fl tfdscmlpnv Op Ar value +.Op Fl a \*(Ba Fl tfdscmlpnvwr Op Ar value .Xc Inquire about or set the hard or soft limits on processes or set new limits. @@ -2215,6 +2215,8 @@ show or set the limit on the number files a process can have open at once .It Fl v show or set the limit on the total virtual memory that can be in use by a process (in kilobytes) +.It Fl w +show or set the limit on the total number of locks held by a process .It Fl r show or set the limit on the real-time scheduling priority of a process .El From 91a375576d37bb4db1eca48e6bf5bac0db6cc3fa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Jan 2023 13:32:41 +0800 Subject: [PATCH 062/401] input: Eat rest of line upon reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interactively, sh_error() doesn't terminate, so echo "|$(printf %10000s)echo bug" | sh -i would read the first 8KiB, see that it's invalid, then jump back to the parser, which would then read and execute the rest of the line as-if it were the next line. The fix for this is to explicitly consume the rest of the invalid line, so that the next line observed is /actually/ the next line. This is difficult to trigger accidentally right now, since we consume the entire icanon line buffer at once (provided it's <8k, which it ~always is interactively), so we always observe one line at a time, but the next patch would make even "| echo bug" blow up. Reported-by: наб Signed-off-by: Herbert Xu --- src/input.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/input.c b/src/input.c index ec075f5..cff15b5 100644 --- a/src/input.c +++ b/src/input.c @@ -77,6 +77,7 @@ INCLUDE INCLUDE INCLUDE "input.h" INCLUDE "error.h" +INCLUDE "syntax.h" INIT { basepf.nextc = basepf.buf = basebuf; @@ -85,9 +86,11 @@ INIT { RESET { /* clear input buffer */ - basepf.lleft = basepf.nleft = 0; - basepf.unget = 0; popallfiles(); + basepf.unget = 0; + while (basepf.lastc[0] != '\n' && + basepf.lastc[0] != PEOF) + pgetc(); } FORKRESET { From 5f094d08c5bcee876191404a4f3dd2d075571215 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Jan 2023 14:15:58 +0800 Subject: [PATCH 063/401] input: Read standard input byte-wise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POSIX Issue 7, XCU, sh, STDIN says: When the shell is using standard input and it invokes a command that also uses standard input, the shell shall ensure that the standard input file pointer points directly after the command it has read when the command begins execution. It shall not read ahead in such a manner that any characters intended to be read by the invoked command are consumed by the shell (whether interpreted by the shell or not) or that characters that are not read by the invoked command are not seen by the shell. I.e. sh < Signed-off-by: Herbert Xu --- src/input.c | 47 +++++++++++++++++++++++++++++++++++++---------- src/input.h | 6 ++++++ 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/input.c b/src/input.c index cff15b5..8691617 100644 --- a/src/input.c +++ b/src/input.c @@ -162,6 +162,17 @@ int pgetc(void) return __pgetc(); } +static int stdin_clear_nonblock(void) +{ + int flags = fcntl(0, F_GETFL, 0); + + if (flags >= 0) { + flags &=~ O_NONBLOCK; + flags = fcntl(0, F_SETFL, flags); + } + + return flags; +} static int preadfd(void) @@ -198,22 +209,38 @@ preadfd(void) } else #endif + if (parsefile->fd) nr = read(parsefile->fd, buf, IBUFSIZ - 1); + else { + unsigned len = IBUFSIZ - 1; + + nr = 0; + + do { + int err; + err = read(0, buf, 1); + if (err <= 0) { + if (nr) + break; + + nr = err; + if (errno != EWOULDBLOCK) + break; + if (stdin_clear_nonblock() < 0) + break; + + out2str("sh: turning off NDELAY mode\n"); + goto retry; + } + + nr++; + } while (!IS_DEFINED_SMALL && *buf++ != '\n' && --len); + } if (nr < 0) { if (errno == EINTR) goto retry; - if (parsefile->fd == 0 && errno == EWOULDBLOCK) { - int flags = fcntl(0, F_GETFL, 0); - if (flags >= 0 && flags & O_NONBLOCK) { - flags &=~ O_NONBLOCK; - if (fcntl(0, F_SETFL, flags) >= 0) { - out2str("sh: turning off NDELAY mode\n"); - goto retry; - } - } - } } return nr; } diff --git a/src/input.h b/src/input.h index 8c39f33..8830b66 100644 --- a/src/input.h +++ b/src/input.h @@ -34,6 +34,12 @@ * @(#)input.h 8.2 (Berkeley) 5/4/95 */ +#ifdef SMALL +#define IS_DEFINED_SMALL 1 +#else +#define IS_DEFINED_SMALL 0 +#endif + /* PEOF (the end of file marker) is defined in syntax.h */ enum { From 44ae22beedf8a3d68bbfa1d065ad677182372de2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Jan 2023 17:08:37 +0800 Subject: [PATCH 064/401] input: Disable lleft in SMALL mode Counting lleft is only necessary if history support is enabled. Therefore it can be safely disabled if SMALL is defined. Signed-off-by: Herbert Xu --- src/input.c | 63 +++++++++++++++++++++++------------------------- src/input.h | 18 ++++++++++++++ src/myhistedit.h | 18 ++++++++++++++ 3 files changed, 66 insertions(+), 33 deletions(-) diff --git a/src/input.c b/src/input.c index 8691617..7b37ae2 100644 --- a/src/input.c +++ b/src/input.c @@ -54,9 +54,7 @@ #include "alias.h" #include "parser.h" #include "main.h" -#ifndef SMALL #include "myhistedit.h" -#endif #define IBUFSIZ (BUFSIZ + 1) @@ -256,12 +254,10 @@ preadfd(void) static int preadbuffer(void) { - char *q; - int more; -#ifndef SMALL int something; -#endif char savec; + int more; + char *q; if (unlikely(parsefile->strpush)) { popstring(); @@ -271,11 +267,11 @@ static int preadbuffer(void) return PEOF; flushall(); - more = parsefile->lleft; + more = input_get_lleft(parsefile); if (more <= 0) { again: if ((more = preadfd()) <= 0) { - parsefile->lleft = parsefile->nleft = 0; + input_set_lleft(parsefile, parsefile->nleft = 0); return PEOF; } } @@ -283,37 +279,38 @@ static int preadbuffer(void) q = parsefile->nextc; /* delete nul characters */ -#ifndef SMALL something = 0; -#endif for (;;) { int c; more--; c = *q; - if (!c) + if (!c) { memmove(q, q + 1, more); - else { - q++; + goto check; + } - if (c == '\n') { - parsefile->nleft = q - parsefile->nextc - 1; - break; - } + q++; -#ifndef SMALL - switch (c) { - default: - something = 1; - /* fall through */ - case '\t': - case ' ': - break; - } -#endif + if (IS_DEFINED_SMALL) + goto check; + + switch (c) { + case '\n': + parsefile->nleft = q - parsefile->nextc - 1; + goto check; + + default: + something = 1; + /* fall through */ + + case '\t': + case ' ': + break; } +check: if (more <= 0) { parsefile->nleft = q - parsefile->nextc - 1; if (parsefile->nleft < 0) @@ -321,12 +318,12 @@ static int preadbuffer(void) break; } } - parsefile->lleft = more; + input_set_lleft(parsefile, more); - savec = *q; + if (!IS_DEFINED_SMALL) + savec = *q; *q = '\0'; -#ifndef SMALL if (parsefile->fd == 0 && hist && something) { HistEvent he; INTOFF; @@ -334,7 +331,6 @@ static int preadbuffer(void) parsefile->nextc); INTON; } -#endif if (vflag) { out2str(parsefile->nextc); @@ -343,7 +339,8 @@ static int preadbuffer(void) #endif } - *q = savec; + if (!IS_DEFINED_SMALL) + *q = savec; return (signed char)*parsefile->nextc++; } @@ -458,7 +455,7 @@ setinputfd(int fd, int push) parsefile->fd = fd; if (parsefile->buf == NULL) parsefile->buf = ckmalloc(IBUFSIZ); - parsefile->lleft = parsefile->nleft = 0; + input_set_lleft(parsefile, parsefile->nleft = 0); plinno = 1; } diff --git a/src/input.h b/src/input.h index 8830b66..1ff5773 100644 --- a/src/input.h +++ b/src/input.h @@ -76,7 +76,9 @@ struct parsefile { int linno; /* current line */ int fd; /* file descriptor (or -1 if string) */ int nleft; /* number of chars left in this line */ +#ifndef SMALL int lleft; /* number of chars left in this buffer */ +#endif char *nextc; /* next char in buffer */ char *buf; /* input buffer */ struct strpush *strpush; /* for pushing strings at this level */ @@ -110,3 +112,19 @@ void setinputstring(char *); void popfile(void); void unwindfiles(struct parsefile *); void popallfiles(void); + +static inline int input_get_lleft(struct parsefile *pf) +{ +#ifdef SMALL + return 0; +#else + return pf->lleft; +#endif +} + +static inline void input_set_lleft(struct parsefile *pf, int len) +{ +#ifndef SMALL + pf->lleft = len; +#endif +} diff --git a/src/myhistedit.h b/src/myhistedit.h index 22e5c43..1736f62 100644 --- a/src/myhistedit.h +++ b/src/myhistedit.h @@ -31,9 +31,27 @@ * @(#)myhistedit.h 8.2 (Berkeley) 5/4/95 */ +#ifdef SMALL +typedef void History; +typedef void EditLine; +typedef int HistEvent; + +enum { + H_APPEND, + H_ENTER, +}; + +#define hist NULL + +static inline void history(History *h, HistEvent *he, int action, char *p) +{ +} +#else #include extern History *hist; +#endif + extern EditLine *el; extern int displayhist; From b4ecd84eb4048522648bc16920d3615cb243a6bf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Jan 2023 17:51:18 +0800 Subject: [PATCH 065/401] var: Do not add 1 to return value of strchrnul MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a variable like OPTIND is unset dash may call the action function with a bogus pointer because it tries to add one to the return value of strchrnul unconditionally. Use strchr and nullstr instead. Link: https://bugs.debian.org/985478 Reported-by: наб Signed-off-by: Herbert Xu --- src/var.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/var.c b/src/var.c index ef9c2bd..b70d72c 100644 --- a/src/var.c +++ b/src/var.c @@ -154,6 +154,10 @@ RESET { } #endif +static char *varnull(const char *s) +{ + return (strchr(s, '=') ?: nullstr - 1) + 1; +} /* * This routine initializes the builtin variables. It is called when the @@ -266,7 +270,7 @@ struct var *setvareq(char *s, int flags) goto out; if (vp->func && (flags & VNOFUNC) == 0) - (*vp->func)(strchrnul(s, '=') + 1); + (*vp->func)(varnull(s)); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); @@ -531,7 +535,7 @@ poplocalvars(void) unsetvar(vp->text); } else { if (vp->func) - (*vp->func)(strchrnul(lvp->text, '=') + 1); + (*vp->func)(varnull(lvp->text)); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); vp->flags = lvp->flags; From 54485578e01017534dae30731f7682abadb38a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 4 Jan 2023 12:33:45 +0100 Subject: [PATCH 066/401] builtin: Ignore first -- in getopts per POSIX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue 7, XCU, getopts, OPTIONS reads "None.", and getopts isn't a special built-in listed in sexion 2.14 ‒ this means that XCU, 1. Introduction, 1.4 Utility Description Defaults, OPTIONS, Default Behavior applies: Default Behavior: When this section is listed as "None.", it means that the implementation need not support any options. Standard utilities that do not accept options, but that do accept operands, shall recognize "--" as a first argument to be discarded. Test with: getopts -- d: a Correct output is no output, exit 1 Wrong output errors out with d: being an invalid argument name Signed-off-by: Herbert Xu --- src/options.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/options.c b/src/options.c index 3158498..2d4bd3b 100644 --- a/src/options.c +++ b/src/options.c @@ -409,6 +409,9 @@ getoptscmd(int argc, char **argv) { char **optbase; + nextopt(nullstr); + argc -= argptr - argv - 1; + argv = argptr - 1; if (argc < 3) sh_error("Usage: getopts optstring var [arg...]"); else if (argc == 3) { From ba57b84b305dd16f9d3e0d798835a7e9e15454ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Wed, 4 Jan 2023 12:35:13 +0100 Subject: [PATCH 067/401] builtin: Ignore first -- in type for consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This appears to be the only remaining built-in that doesn't use nextopt() to parse its arguments (and isn't forbidden from doing so) ‒ users expect to be able to do this, and it's nice to be consistent here. Test with: type -- ls -- Correct output lists ls=/bin/ls, then --=ENOENT Wrong output lists --=ENOENT, ls=/bin/ls, --=ENOENT Fixes: https://bugs.debian.org/870317 Signed-off-by: Herbert Xu --- src/exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/exec.c b/src/exec.c index d7a1f53..83cba94 100644 --- a/src/exec.c +++ b/src/exec.c @@ -766,11 +766,11 @@ unsetfunc(const char *name) int typecmd(int argc, char **argv) { - int i; int err = 0; - for (i = 1; i < argc; i++) { - err |= describe_command(out1, argv[i], NULL, 1); + nextopt(nullstr); + while (*argptr) { + err |= describe_command(out1, *argptr++, NULL, 1); } return err; } From de23304e72857733e65355275cddf1415a22c558 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Jan 2023 15:12:52 +0800 Subject: [PATCH 068/401] input: Check for int_pending while clearing input If we receive SIGINT while clearing a partially read line from stdin we should bail out instead of continuing. Signed-off-by: Herbert Xu Thanks, Signed-off-by: Herbert Xu --- src/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/input.c b/src/input.c index 7b37ae2..4c86a75 100644 --- a/src/input.c +++ b/src/input.c @@ -87,7 +87,8 @@ RESET { popallfiles(); basepf.unget = 0; while (basepf.lastc[0] != '\n' && - basepf.lastc[0] != PEOF) + basepf.lastc[0] != PEOF && + !int_pending()) pgetc(); } From 1a0cc2a4f52aefb6ad00eb5b242f39d530460063 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Jan 2023 15:26:10 +0800 Subject: [PATCH 069/401] input: Only skip blank lines on PS1 Blank line should not be skipped if they're found on PS2. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/input.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/input.c b/src/input.c index 4c86a75..38969a7 100644 --- a/src/input.c +++ b/src/input.c @@ -255,6 +255,7 @@ preadfd(void) static int preadbuffer(void) { + int first = whichprompt == 1; int something; char savec; int more; @@ -280,7 +281,7 @@ static int preadbuffer(void) q = parsefile->nextc; /* delete nul characters */ - something = 0; + something = !first; for (;;) { int c; @@ -328,7 +329,7 @@ static int preadbuffer(void) if (parsefile->fd == 0 && hist && something) { HistEvent he; INTOFF; - history(hist, &he, whichprompt == 1? H_ENTER : H_APPEND, + history(hist, &he, first ? H_ENTER : H_APPEND, parsefile->nextc); INTON; } From dd73362d08d5aa1596cb4ca5b271a08bb4e123c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 5 Jan 2023 13:43:21 +0100 Subject: [PATCH 070/401] redir: Use F_DUPFD_CLOEXEC instead of F_DUPFD+F_SETFD if available This saves a syscall on every source file open, &c.; F_DUPFD_CLOEXEC is a mandatory part of POSIX since Issue 7 (Austin Group Interpretation 1003.1-2001 #171). Signed-off-by: Herbert Xu --- configure.ac | 11 +++++++++++ src/redir.c | 7 ++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 52aa429..5524650 100644 --- a/configure.ac +++ b/configure.ac @@ -177,6 +177,17 @@ if test "$have_st_mtim" = "yes"; then [Define if your `struct stat' has `st_mtim']) fi +dnl F_DUPFD_CLOEXEC is a mandatory part of POSIX since Issue 7 +AC_MSG_CHECKING(for F_DUPFD_CLOEXEC) +AC_COMPILE_IFELSE( +[AC_LANG_PROGRAM([#include +#include ], +[return fcntl(0, F_DUPFD_CLOEXEC, 0)])], +have_dupfd_cloexec=1, have_dupfd_cloexec=0) +AC_MSG_RESULT($(expr yes \& $have_dupfd_cloexec \| no)) +AC_DEFINE_UNQUOTED([HAVE_F_DUPFD_CLOEXEC], [$have_dupfd_cloexec], + [Define to 1 your system supports F_DUPFD_CLOEXEC]) + AC_ARG_WITH(libedit, AS_HELP_STRING(--with-libedit, [Compile with libedit support])) use_libedit= if test "$with_libedit" = "yes"; then diff --git a/src/redir.c b/src/redir.c index 5a5835c..631ddc9 100644 --- a/src/redir.c +++ b/src/redir.c @@ -446,13 +446,18 @@ savefd(int from, int ofd) int newfd; int err; +#if HAVE_F_DUPFD_CLOEXEC + newfd = fcntl(from, F_DUPFD_CLOEXEC, 10); +#else newfd = fcntl(from, F_DUPFD, 10); +#endif + err = newfd < 0 ? errno : 0; if (err != EBADF) { close(ofd); if (err) sh_error("%d: %s", from, strerror(err)); - else + else if(!HAVE_F_DUPFD_CLOEXEC) fcntl(newfd, F_SETFD, FD_CLOEXEC); } From 4ec545e8dc98a3f461cf56bed03adafa81c64aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 5 Jan 2023 13:49:47 +0100 Subject: [PATCH 071/401] alias: Quote name in printalias This ensures even something like alias 'a|b|c=d' is output by alias as 'a|b|c'='d' instead of a|b|c='d' which is both "suitable for reinput to the shell" per POSIX and doesn't execute the aliases as code. Signed-off-by: Herbert Xu --- src/alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alias.c b/src/alias.c index daeacbb..1375cdd 100644 --- a/src/alias.c +++ b/src/alias.c @@ -197,7 +197,7 @@ freealias(struct alias *ap) { void printalias(const struct alias *ap) { - out1fmt("%s=%s\n", ap->name, single_quote(ap->val)); + out1fmt("%s=%s\n", single_quote(ap->name), single_quote(ap->val)); } STATIC struct alias ** From d89761b0e1652e212e9354fd3c96f977de873a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Thu, 5 Jan 2023 14:42:04 +0100 Subject: [PATCH 072/401] parser: Don't keep alloca()ing in a loop for substitutions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When encountering printf %010000d | tr 0 \` | sh -n printf %09999d | tr 0 \` | sh -n you want no output and "Syntax error: EOF in backquote substitution", respectively; instead, current dash segfaults. This is because the alloca for the save buffer is run, naturally, in the same function, so first it allocates one byte, then two, then ..., then appx. 4000 (for me, depends on the binary), then it segfaults on the memcpy (it's even worse, since due to alignment, it usually allocates much more for the early stuff). Nevertheless, the stack frame grows unboundedly, until we completely destroy the stack. Instead of squirreling the out block away, then letting subsequent allocations override the original, mark it used, and just re-copy it to the top of the dash stack. This increases peak memory usage somewhat (in the most pathological case ‒ the above but with three nines ‒ from 23.26 to 173.7KiB according to massif, in parsing a regular program (ratrun from ratrun 0c) from 28.68 to 29.19; a simpler program (ibid., rat) stays at 5.422; parsing libtoolize, debootstrap, and dkms (the biggest shell programs in my /[s]bin by size + by `/$( count) likewise stay the same at 12.02, 41.48, and 6.438) but it's barely measurable outside of truly pathological conditions that were a step away from a segfault previously. Link: https://bugs.debian.org/966156 Signed-off-by: Herbert Xu --- src/parser.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/parser.c b/src/parser.c index 8a06b9e..f5f76d5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1360,12 +1360,9 @@ parsebackq: { struct heredoc *saveheredoclist; int uninitialized_var(saveprompt); - str = NULL; + str = stackblock(); savelen = out - (char *)stackblock(); - if (savelen > 0) { - str = alloca(savelen); - memcpy(str, stackblock(), savelen); - } + grabstackblock(savelen); if (oldstyle) { /* We must read until the closing backquote, giving special treatment to some slashes, and then push the string and @@ -1445,12 +1442,8 @@ parsebackq: { /* Ignore any pushed back tokens left from the backquote parsing. */ if (oldstyle) tokpushback = 0; - out = growstackto(savelen + 1); - if (str) { - memcpy(out, str, savelen); - STADJUST(savelen, out); - } - USTPUTC(CTLBACKQ, out); + out = stnputs(str, savelen, stackblock()); + STPUTC(CTLBACKQ, out); if (oldstyle) goto parsebackq_oldreturn; else From f96ec8765cf37eb0c222a563de2f767ebfbf56db Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 6 Jan 2023 11:15:55 +0800 Subject: [PATCH 073/401] parser: Print CTLBACKQ early in parsesub As we are allowed to perform 4 USTPUTC's we can save a growstackstr call by adding the CTLBACKQ before we save the string. Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index f5f76d5..299c260 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1360,6 +1360,7 @@ parsebackq: { struct heredoc *saveheredoclist; int uninitialized_var(saveprompt); + USTPUTC(CTLBACKQ, out); str = stackblock(); savelen = out - (char *)stackblock(); grabstackblock(savelen); @@ -1443,7 +1444,6 @@ parsebackq: { if (oldstyle) tokpushback = 0; out = stnputs(str, savelen, stackblock()); - STPUTC(CTLBACKQ, out); if (oldstyle) goto parsebackq_oldreturn; else From b00288fd28c1b39d0f1531b6e6d86de59de4be8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Mon, 9 Jan 2023 01:15:43 +0100 Subject: [PATCH 074/401] alias: fix name quoting in printalias single_quote() over-writes the stack string, so just output the name separately first. Reported-by: Harald van Dijk Fixes: 4ec545e8dc98 ("alias: Quote name in printalias") Signed-off-by: Herbert Xu --- src/alias.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alias.c b/src/alias.c index 1375cdd..fcad43b 100644 --- a/src/alias.c +++ b/src/alias.c @@ -197,7 +197,8 @@ freealias(struct alias *ap) { void printalias(const struct alias *ap) { - out1fmt("%s=%s\n", single_quote(ap->name), single_quote(ap->val)); + out1str(single_quote(ap->name)); + out1fmt("=%s\n", single_quote(ap->val)); } STATIC struct alias ** From 088f265909f5b85e49302d8c4d624b977ce2bd3c Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Sat, 2 Oct 2021 16:37:32 -0400 Subject: [PATCH 075/401] mail: Fix chkmail loop break condition padvance_magic() returns -1 when there are no more paths left, not zero. Fixes: 4f7527f8e492 ("exec: Do not allocate stack string in padvance") Signed-off-by: Herbert Xu --- src/mail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mail.c b/src/mail.c index 8eacb2d..49cd5fa 100644 --- a/src/mail.c +++ b/src/mail.c @@ -80,7 +80,7 @@ chkmail(void) int len; len = padvance_magic(&mpath, nullstr, 2); - if (!len) + if (len < 0) break; p = stackblock(); if (*p == '\0') From 6347b9fc52d742f36a0276cdea06cd9ad1f02c77 Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Fri, 22 Apr 2022 22:10:13 +0100 Subject: [PATCH 076/401] jobs: Implement pipefail option With the pipefail option set, a pipeline's exit status is the exit status of the rightmost command that failed, or zero if all commands succeeded. This is planned for inclusion in the next revision of POSIX [1], although the details are yet to be finalised. The semantics of this implementation are the same as those proposed in [2], which have also been adopted by the BSD shells. [1] https://www.austingroupbugs.net/view.php?id=789 [2] https://www.austingroupbugs.net/view.php?id=789#c4115 Signed-off-by: Herbert Xu --- src/dash.1 | 31 ++++++++++++++++++++++++------- src/jobs.c | 9 ++++++++- src/options.c | 2 ++ src/options.h | 5 +++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index d3893bc..31a9d31 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -553,13 +553,17 @@ by redirection operators that are part of the command. If the pipeline is not in the background (discussed later), the shell waits for all commands to complete. .Pp -If the reserved word ! does not precede the pipeline, the exit status is -the exit status of the last command specified in the pipeline. -Otherwise, the exit status is the logical NOT of the exit status of the -last command. -That is, if the last command returns zero, the exit status -is 1; if the last command returns greater than zero, the exit status is -zero. +If the +.Em pipefail +option was enabled when the shell began execution of the pipeline, the +pipeline's exit status is the exit status of the last command specified in +the pipeline that exited with non-zero status, or zero if all commands in +the pipeline exited with a status of zero. If the +.Em pipefail +option was not enabled, the pipeline's exit status is the exit status of +the last command specified in the pipeline; the exit statuses of any other +commands are not used. If the reserved word ! precedes the pipeline, its +exit status is the logical NOT of the exit status described above. .Pp Because pipeline assignment of standard input or standard output or both takes place before redirection, it can be modified by redirection. @@ -1832,6 +1836,19 @@ if the option is +o, the settings are printed in a format suitable for reinput to the shell to affect the same option settings. .Pp +In addition to the option names listed in the +.Sx Argument List Processing +section, the following options may be specified as arguments +to -o or +o: +.Bl -tag -width pipefail +.It Em pipefail +Derive the exit status of a pipeline from the exit statuses of all +of the commands in the pipeline, not just the last command, as +described in the +.Sx Pipelines +section. +.El +.Pp The third use of the set command is to set the values of the shell's positional parameters to the specified args. To change the positional diff --git a/src/jobs.c b/src/jobs.c index f3b9ffc..78c7bc6 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1526,8 +1526,15 @@ STATIC int getstatus(struct job *job) { int status; int retval; + struct procstat *ps; + + ps = job->ps + job->nprocs - 1; + status = ps->status; + if (pipefail) { + while (status == 0 && --ps >= job->ps) + status = ps->status; + } - status = job->ps[job->nprocs - 1].status; retval = WEXITSTATUS(status); if (!WIFEXITED(status)) { #if JOBS diff --git a/src/options.c b/src/options.c index 2d4bd3b..e192191 100644 --- a/src/options.c +++ b/src/options.c @@ -80,6 +80,7 @@ static const char *const optnames[NOPTS] = { "notify", "nounset", "nolog", + "pipefail", "debug", }; @@ -101,6 +102,7 @@ const char optletters[NOPTS] = { 'u', 0, 0, + 0, }; char optlist[NOPTS]; diff --git a/src/options.h b/src/options.h index 975fe33..f421316 100644 --- a/src/options.h +++ b/src/options.h @@ -60,9 +60,10 @@ struct shparam { #define bflag optlist[13] #define uflag optlist[14] #define nolog optlist[15] -#define debug optlist[16] +#define pipefail optlist[16] +#define debug optlist[17] -#define NOPTS 17 +#define NOPTS 18 extern const char optletters[NOPTS]; extern char optlist[NOPTS]; From e85e972e718b08c72579d24eb39e16563889cc29 Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Tue, 10 May 2022 09:16:55 +0100 Subject: [PATCH 077/401] var: move hashvar() calls into findvar() The first argument to findvar() is always obtained by a call to hashvar(), the return value of which is otherwise unused. Signed-off-by: Ron Yorston Signed-off-by: Herbert Xu --- src/var.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/var.c b/src/var.c index b70d72c..21e0abf 100644 --- a/src/var.c +++ b/src/var.c @@ -107,7 +107,7 @@ STATIC struct var *vartab[VTABSIZE]; STATIC struct var **hashvar(const char *); STATIC int vpcmp(const void *, const void *); -STATIC struct var **findvar(struct var **, const char *); +STATIC struct var **findvar(const char *); /* * Initialize the varable symbol tables and import the environment @@ -251,9 +251,8 @@ struct var *setvareq(char *s, int flags) { struct var *vp, **vpp; - vpp = hashvar(s); flags |= (VEXPORT & (((unsigned) (1 - aflag)) - 1)); - vpp = findvar(vpp, s); + vpp = findvar(s); vp = *vpp; if (vp) { if (vp->flags & VREADONLY) { @@ -315,7 +314,7 @@ lookupvar(const char *name) { struct var *v; - if ((v = *findvar(hashvar(name), name)) && !(v->flags & VUNSET)) { + if ((v = *findvar(name)) && !(v->flags & VUNSET)) { #ifdef WITH_LINENO if (v == &vlineno && v->text == linenovar) { fmtstr(linenovar+7, sizeof(linenovar)-7, "%d", lineno); @@ -422,7 +421,7 @@ exportcmd(int argc, char **argv) if ((p = strchr(name, '=')) != NULL) { p++; } else { - if ((vp = *findvar(hashvar(name), name))) { + if ((vp = *findvar(name))) { vp->flags |= flag; continue; } @@ -466,7 +465,6 @@ localcmd(int argc, char **argv) void mklocal(char *name, int flags) { struct localvar *lvp; - struct var **vpp; struct var *vp; INTOFF; @@ -479,8 +477,7 @@ void mklocal(char *name, int flags) } else { char *eq; - vpp = hashvar(name); - vp = *findvar(vpp, name); + vp = *findvar(name); eq = strchr(name, '='); if (vp == NULL) { if (eq) @@ -667,9 +664,11 @@ vpcmp(const void *a, const void *b) } STATIC struct var ** -findvar(struct var **vpp, const char *name) +findvar(const char *name) { - for (; *vpp; vpp = &(*vpp)->next) { + struct var **vpp; + + for (vpp = hashvar(name); *vpp; vpp = &(*vpp)->next) { if (varequal((*vpp)->text, name)) { break; } From 933e016f29ffd4863b9b2857d240716f7b2728b5 Mon Sep 17 00:00:00 2001 From: Forest Date: Mon, 16 May 2022 16:02:24 -0700 Subject: [PATCH 078/401] man: Fix swapped stdin/stdout for redirection operators The Redirections section incorrectly claimed that <& replaces stdout and >& replaces stdin. Swapped them to make it read correctly. Ref: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_07_05 Both errors were followed by extra text that looked like remains of a mostly-deleted sentence. Removed those. Fixes: 6adc14a0d4e4 ("man: Clarify two redirection mechanisms") Signed-off-by: Herbert Xu --- src/dash.1 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index 31a9d31..1f155ba 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -402,13 +402,11 @@ Append standard output (or n) to file. .It [n] Ns \*[Lt] file Redirect standard input (or n) from file. .It [n1] Ns \*[Lt]& Ns n2 -Copy file descriptor n2 as stdout (or fd n1). -fd n2. +Copy file descriptor n2 as stdin (or fd n1). .It [n] Ns \*[Lt]&- Close standard input (or n). .It [n1] Ns \*[Gt]& Ns n2 -Copy file descriptor n2 as stdin (or fd n1). -fd n2. +Copy file descriptor n2 as stdout (or fd n1). .It [n] Ns \*[Gt]&- Close standard output (or n). .It [n] Ns \*[Lt]\*[Gt] file From 96b972aa3d15317cc1e853543a918b2b727e4566 Mon Sep 17 00:00:00 2001 From: Subhaditya Nath Date: Mon, 6 Feb 2023 20:38:47 +0530 Subject: [PATCH 079/401] options: Fix getopts handling of colon in optstr Putting a colon at the beginning of optstring to silence errors doesn't mean that the colon is a valid option. Before this patch, dash treated -: as a valid option if the optstring started with a colon. This patch fixes that problem. Test: getopts :a opt -: echo $opt$OPTARG Correct output - ?: Invalid output - : Signed-off-by: Herbert Xu --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index e192191..8101cf5 100644 --- a/src/options.c +++ b/src/options.c @@ -467,7 +467,7 @@ getopts(char *optstr, char *optvar, char **optfirst) } c = *p++; - for (q = optstr; *q != c; ) { + for (q = optstr[0] == ':' ? optstr + 1 : optstr; *q != c; ) { if (*q == '\0') { if (optstr[0] == ':') { s[0] = c; From b9c069b0cc372821942aecd04829030f5710baac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 7 Feb 2023 20:33:25 +0100 Subject: [PATCH 080/401] histedit: Disallow fc -s first last The POSIX SYNOPSIS (and our manual which steals it verbatim) says: fc -s [old=new] [first] and, indeed, we only use the first non-= argument instead of enforcing the usage, which is confusing. bash: 2025 ls 2026 id $ fc -s ls=who 2025 2026 who nabijaczleweli pts/2 2023-02-07 17:36 (192.168.1.109) nabijaczleweli pts/3 2023-02-07 17:38 (192.168.1.109) nabijaczleweli pts/4 2023-02-07 16:58 (192.168.1.109) nabijaczleweli pts/5 2023-02-07 17:45 (192.168.1.109) ksh93: 240 id 241 ls $ fc -s ls=who 241 240 ksh: hist: -e - requires single argument yash: 2 ls 3 id $ fc -s ls=who 2 3 fc: too many operands are specified zsh: 2 id 3 ls tarta% fc -s ls=who 3 2 fc: bad option: -s dash (before): 1 ls 2 id $ fc -s ls=who 1 2 who nabijaczleweli pts/2 2023-02-07 17:36 (192.168.1.109) nabijaczleweli pts/3 2023-02-07 17:38 (192.168.1.109) nabijaczleweli pts/4 2023-02-07 16:58 (192.168.1.109) nabijaczleweli pts/5 2023-02-07 17:45 (192.168.1.109) dash (after): 1 ls 2 id $ fc -s ls=who 1 2 src/dash: 3: fc: -s takes one history argument Adapted-from: NetBSD src bin/sh/histedit.c rev 1.38 by aymeric@ Signed-off-by: Herbert Xu --- src/histedit.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/histedit.c b/src/histedit.c index f5c90ab..fc87283 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -296,6 +296,13 @@ histcmd(int argc, char **argv) *repl++ = '\0'; argc--, argv++; } + + /* + * If -s is specified, accept only one operand + */ + if (sflg && argc >= 2) + sh_error("too many args"); + /* * determine [first] and [last] */ From f0d4a2eeaf3cd151a37453ff1cd94fcc7a7b10af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 7 Feb 2023 20:33:30 +0100 Subject: [PATCH 081/401] histedit: Fix fs -s infinite loop $ id 1 uid=1(daemon) gid=1(daemon) groups=1(daemon) $ fc -s 2 fc -s 2 fc -s 2 fc -s 2 fc -s 2 src/dash: 1: fc: called recursively too many times and I'm happy to call this "behaving exactly as I expected when I was typing it in", so removing the XXX. Adapted-from: NetBSD src bin/sh/histedit.c rev 1.38 by aymeric@ Signed-off-by: Herbert Xu --- src/histedit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/histedit.c b/src/histedit.c index fc87283..28956ec 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -382,12 +382,10 @@ histcmd(int argc, char **argv) evalstring(s, 0); if (displayhist && hist) { - /* - * XXX what about recursive and - * relative histnums. - */ history(hist, &he, H_ENTER, s); } + + break; } else fputs(s, efp); } From 56f6355249bae99a09102b1f812f2f365923dc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 7 Feb 2023 20:33:34 +0100 Subject: [PATCH 082/401] histedit: Only parse old=new for fc -s Before (erroneously replaced): $ a=b set ... $ fc a=b 8 , b=b set After (used as search string): $ fc a=b 8 , a=b set Reported-by: Harald van Dijk Reported-in: https://marc.info/?l=dash&m=154707728009743&w=2 Signed-off-by: Herbert Xu --- src/histedit.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/histedit.c b/src/histedit.c index 28956ec..24631ca 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -288,21 +288,18 @@ histcmd(int argc, char **argv) } /* - * If executing, parse [old=new] now + * If -s is specified, accept [old=new] first only */ - if (lflg == 0 && argc > 0 && - ((repl = strchr(argv[0], '=')) != NULL)) { - pat = argv[0]; - *repl++ = '\0'; - argc--, argv++; + if (sflg) { + if (argc > 0 && ((repl = strchr(argv[0], '=')) != NULL)) { + pat = argv[0]; + *repl++ = '\0'; + argc--, argv++; + } + if (argc >= 2) + sh_error("too many args"); } - /* - * If -s is specified, accept only one operand - */ - if (sflg && argc >= 2) - sh_error("too many args"); - /* * determine [first] and [last] */ From 7884dcced4c04c036b5227207e63a0d06f19e9d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 7 Feb 2023 20:33:42 +0100 Subject: [PATCH 083/401] histedit: Fix "fc -3" breakage on glibc Before: $ echo a a $ echo b b $ fc -2 -1 src/dash: 3: fc: unknown option: -2 $ fc -- -2 -1 16 ,p echo b fc -2 -1 After: $ echo a a $ echo b b $ fc -2 -1 6 ,p echo a echo b Reported-by: Harald van Dijk Reported-in: https://marc.info/?l=dash&m=154707728009743&w=2 Signed-off-by: Herbert Xu --- src/histedit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/histedit.c b/src/histedit.c index 24631ca..fcd8af0 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -220,7 +220,7 @@ histcmd(int argc, char **argv) #else optreset = 1; optind = 1; /* initialize getopt */ #endif - while (not_fcnumber(argv[optind]) && + while (not_fcnumber(argv[optind ?: 1]) && (ch = getopt(argc, argv, ":e:lnrs")) != -1) switch ((char)ch) { case 'e': @@ -246,6 +246,7 @@ histcmd(int argc, char **argv) sh_error("unknown option: -%c", optopt); /* NOTREACHED */ } + optind = optind ?: 1; argc -= optind, argv += optind; /* From 459e51a2ae9d720b12e44fa725a6014a4d784d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 7 Feb 2023 20:33:47 +0100 Subject: [PATCH 084/401] histedit: Don't include the current fc in out-of-range last POSIX states: When a range of commands is used, it shall not be an error to specify first or last values that are not in the history list; fc shall substitute the value representing the oldest or newest command in the list, as appropriate. For example, if there are only ten commands in the history list, numbered 1 to 10: fc -l fc 1 99 shall list and edit, respectively, all ten commands. Which would seem to imply that the current fc shouldn't be included (well, in the POSIX model, no non--l fc enters the history, so that reinforces that). zsh, bash, mksh, yash all agree with this; oddly, ksh includes it. Before: $ 1 src/dash: 1: 1: not found $ 2 src/dash: 2: 2: not found $ 3 src/dash: 3: 3: not found $ 4 src/dash: 4: 4: not found $ 5 src/dash: 5: 5: not found $ 6 src/dash: 6: 6: not found $ fc 1 999 21 ,p 1 2 3 4 5 6 fc 1 999 After: $ fc 1 9999 12 ,p 1 2 3 4 5 6 Reported-by: Harald van Dijk Reported-in: https://marc.info/?l=dash&m=154707728009743&w=2 Signed-off-by: Herbert Xu --- src/histedit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/histedit.c b/src/histedit.c index fcd8af0..07dd1b4 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -479,6 +479,8 @@ str_to_event(const char *str, int last) */ retval = history(hist, &he, last ? H_FIRST : H_LAST); + if (retval != -1 && last) + retval = history(hist, &he, H_NEXT); } } if (retval == -1) From 9b9f39396a3b468237d6f361b36e9394ce6b3b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Tue, 7 Feb 2023 20:33:38 +0100 Subject: [PATCH 085/401] histedit: Don't require argument for fc This is already handled correctly (per POSIX) below: When the synopsis form with -s is used: If first is omitted, the previous command shall be used. For the synopsis forms without -s: If first and last are both omitted, the previous 16 commands shall be listed or the previous single command shall be edited (based on the -l option). Test log: $ ls autogen.sh ChangeLog ... $ id uid=1000(nabijaczleweli) gid=100(users) groups=100(users) $ who nabijaczleweli pts/2 2023-02-07 18:36 (192.168.1.109) $ fc 4 , who q nabijaczleweli pts/2 2023-02-07 18:36 (192.168.1.109) $ fc -l 1 ls 2 id 3 who 4 fc $ fc -s fc -l 1 ls 2 id 3 who 4 fc 5 fc -l Reported-by: Harald van Dijk Reported-in: https://marc.info/?l=dash&m=154707728009743&w=2 Signed-off-by: Herbert Xu --- src/histedit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/histedit.c b/src/histedit.c index 07dd1b4..7692776 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -212,9 +212,6 @@ histcmd(int argc, char **argv) if (hist == NULL) sh_error("history not active"); - if (argc == 1) - sh_error("missing history argument"); - #ifdef __GLIBC__ optind = 0; #else From b41b0d41228fe82991a63f475e0bef701f539db9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Fri, 10 Feb 2023 12:24:49 +0100 Subject: [PATCH 086/401] shell: Prototype all function definitions for C23 compat With this patch, you're just left with the histedit.c warning. Signed-off-by: Herbert Xu --- src/exec.c | 13 +++---------- src/jobs.c | 5 +---- src/mksignames.c | 4 +--- src/nodes.c.pat | 20 +++++--------------- src/options.c | 4 +--- src/redir.c | 12 +++--------- 6 files changed, 14 insertions(+), 44 deletions(-) diff --git a/src/exec.c b/src/exec.c index 83cba94..4b777d2 100644 --- a/src/exec.c +++ b/src/exec.c @@ -775,12 +775,8 @@ typecmd(int argc, char **argv) return err; } -STATIC int -describe_command(out, command, path, verbose) - struct output *out; - char *command; - const char *path; - int verbose; +static int describe_command(struct output *out, char *command, + const char *path, int verbose) { struct cmdentry entry; struct tblentry *cmdp; @@ -881,10 +877,7 @@ describe_command(out, command, path, verbose) return 0; } -int -commandcmd(argc, argv) - int argc; - char **argv; +int commandcmd(int argc, char **argv) { char *cmd; int c; diff --git a/src/jobs.c b/src/jobs.c index 78c7bc6..e16aeca 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -243,10 +243,7 @@ setjobctl(int on) #endif -int -killcmd(argc, argv) - int argc; - char **argv; +int killcmd(int argc, char **argv) { extern char *signal_names[]; int signo = -1; diff --git a/src/mksignames.c b/src/mksignames.c index a832eab..192728b 100644 --- a/src/mksignames.c +++ b/src/mksignames.c @@ -360,9 +360,7 @@ initialize_signames () } } -void -write_signames (stream) - FILE *stream; +void write_signames(FILE *stream) { register int i; diff --git a/src/nodes.c.pat b/src/nodes.c.pat index 9125bc7..636456c 100644 --- a/src/nodes.c.pat +++ b/src/nodes.c.pat @@ -87,18 +87,14 @@ copyfunc(union node *n) -STATIC void -calcsize(n) - union node *n; +static void calcsize(union node *n) { %CALCSIZE } -STATIC void -sizenodelist(lp) - struct nodelist *lp; +static void sizenodelist(struct nodelist *lp) { while (lp) { funcblocksize += SHELL_ALIGN(sizeof(struct nodelist)); @@ -109,9 +105,7 @@ sizenodelist(lp) -STATIC union node * -copynode(n) - union node *n; +static union node *copynode(union node *n) { union node *new; @@ -120,9 +114,7 @@ copynode(n) } -STATIC struct nodelist * -copynodelist(lp) - struct nodelist *lp; +static struct nodelist *copynodelist(struct nodelist *lp) { struct nodelist *start; struct nodelist **lpp; @@ -142,9 +134,7 @@ copynodelist(lp) -STATIC char * -nodesavestr(s) - char *s; +static char *nodesavestr(char *s) { char *rtn = funcstring; diff --git a/src/options.c b/src/options.c index 8101cf5..f157321 100644 --- a/src/options.c +++ b/src/options.c @@ -391,9 +391,7 @@ setcmd(int argc, char **argv) } -void -getoptsreset(value) - const char *value; +void getoptsreset(const char *value) { shellparam.optind = number(value) ?: 1; shellparam.optoff = -1; diff --git a/src/redir.c b/src/redir.c index 631ddc9..d74602c 100644 --- a/src/redir.c +++ b/src/redir.c @@ -281,18 +281,12 @@ openredirect(union node *redir) } -STATIC void #ifdef notyet -dupredirect(redir, f, memory) +static void dupredirect(union node *redir, int f, char memory[10]) #else -dupredirect(redir, f) +static void dupredirect(union node *redir, int f) #endif - union node *redir; - int f; -#ifdef notyet - char memory[10]; -#endif - { +{ int fd = redir->nfile.fd; int err = 0; From b23ae6de6c987e1448b65c3ecaa7659a28e8374c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 31 Mar 2023 14:51:57 +0200 Subject: [PATCH 087/401] jobs: drop unused node parameter in makejob() CC: dash@vger.kernel.org CC: Herbert Xu Signed-off-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/eval.c | 6 +++--- src/jobs.c | 7 +++---- src/jobs.h | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/eval.c b/src/eval.c index fa43b68..978a174 100644 --- a/src/eval.c +++ b/src/eval.c @@ -494,7 +494,7 @@ evalsubshell(union node *n, int flags) forkreset(); goto nofork; } - jp = makejob(n, 1); + jp = makejob(1); if (forkshell(jp, n, backgnd) == 0) { flags |= EV_EXIT; if (backgnd) @@ -571,7 +571,7 @@ evalpipe(union node *n, int flags) pipelen++; flags |= EV_EXIT; INTOFF; - jp = makejob(n, pipelen); + jp = makejob(pipelen); prevfd = -1; for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) { prehash(lp->n); @@ -637,7 +637,7 @@ evalbackcmd(union node *n, struct backcmd *result) if (pipe(pip) < 0) sh_error("Pipe call failed"); - jp = makejob(n, 1); + jp = makejob(1); if (forkshell(jp, n, FORK_NOJOB) == 0) { FORCEINTON; close(pip[0]); diff --git a/src/jobs.c b/src/jobs.c index e16aeca..a0f4d47 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -745,8 +745,7 @@ getjob(const char *name, int getctl) * Called with interrupts off. */ -struct job * -makejob(union node *node, int nprocs) +struct job *makejob(int nprocs) { int i; struct job *jp; @@ -777,7 +776,7 @@ makejob(union node *node, int nprocs) if (nprocs > 1) { jp->ps = ckmalloc(nprocs * sizeof (struct procstat)); } - TRACE(("makejob(0x%lx, %d) returns %%%d\n", (long)node, nprocs, + TRACE(("makejob(%d) returns %%%d\n", nprocs, jobno(jp))); return jp; } @@ -960,7 +959,7 @@ struct job *vforkexec(union node *n, char **argv, const char *path, int idx) struct job *jp; int pid; - jp = makejob(n, 1); + jp = makejob(1); sigblockall(NULL); vforked++; diff --git a/src/jobs.h b/src/jobs.h index 6ac6c56..2832d64 100644 --- a/src/jobs.h +++ b/src/jobs.h @@ -102,7 +102,7 @@ int jobscmd(int, char **); struct output; void showjobs(struct output *, int); int waitcmd(int, char **); -struct job *makejob(union node *, int); +struct job *makejob(int); int forkshell(struct job *, union node *, int); struct job *vforkexec(union node *n, char **argv, const char *path, int idx); int waitforjob(struct job *); From 14ac8fbcaa8faf2f4faf1e4d98c0168eaddf6a6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 22 Apr 2023 17:16:21 +0200 Subject: [PATCH 088/401] man: document what happens when IFS= (and when it's not) A question I just got from a user was "how do I make while read -r l; do ...; done < f not strip the initial tabs?". Turns out, the manual is silent on this, and POSIX just about implies this behaviour. (Indeed, our read is almost verbatim POSIX, and both defer to Field Splitting, but our Field Splitting isn't nearly as detailed, and thank god.) Even POSIX spends just one line describing this pivotal behaviour (Issue 8 Draft 2.1 line 75044-75045: "2. If the value of IFS is null, field splitting shall have no effect, except that if the input is empty the result shall be zero fields.)," and when I first encountered this it was also quite surprising to me. Spell it out explicitly: IFS= means that input is preserved, and the default value means whitespace is stripped from the front. Drive it home with an example because it's esoteric (and I know from that user that they first tried searching for read in the manual, but it was not very helpful). Reported-by: rozbrajaczpoziomow Signed-off-by: Herbert Xu --- src/dash.1 | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/dash.1 b/src/dash.1 index 1f155ba..1b14662 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -1023,6 +1023,19 @@ The shell treats each character of the .Ev IFS as a delimiter and uses the delimiters to split the results of parameter expansion and command substitution into fields. +.Pp +If +.Ev IFS +is empty, field splitting yields no fields if the input string was empty, +and one string with the unchanged value of the input otherwise. +For example, with the default +.Ev IFS , +.Dq Ic read Fl r Ev l +will remove any initial whitespace, +but +.Dq Ev IFS Ns = Ic read Fl r Ev l +will leave the entire line in +.Ev l . .Ss Pathname Expansion (File Name Generation) Unless the .Fl f From 419f334520c251fb9cdeac380312d38521dfca5c Mon Sep 17 00:00:00 2001 From: Fabrice Fontaine Date: Fri, 16 Feb 2024 17:33:19 +0100 Subject: [PATCH 089/401] configure.ac: drop -Wl,--fatal-warnings Drop -Wl,--fatal-warnings with --enable-static to avoid the following static build failure: configure:4778: checking for strtod configure:4778: /home/autobuild/autobuild/instance-8/output-1/host/bin/powerpc-buildroot-linux-uclibcspe-gcc -o conftest -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -mabi=spe -mfloat-gprs=single -Wa,-me500 -Os -g0 -static -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -static -Wl,--fatal-warnings conftest.c >&5 /home/autobuild/autobuild/instance-8/output-1/host/lib/gcc/powerpc-buildroot-linux-uclibcspe/8.4.0/../../../../powerpc-buildroot-linux-uclibcspe/bin/ld: warning: conftest has a LOAD segment with RWX permissions collect2: error: ld returned 1 exit status [...] In file included from arith_yylex.c:44: system.h:74:22: error: static declaration of 'strtod' follows non-static declaration static inline double strtod(const char *nptr, char **endptr) ^~~~~~ Fixes: - http://autobuild.buildroot.org/results/a54fdc7d1b94beb47203373ae35b08d9cea8d42c Signed-off-by: Fabrice Fontaine Signed-off-by: Herbert Xu --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5524650..6993364 100644 --- a/configure.ac +++ b/configure.ac @@ -34,7 +34,7 @@ fi AC_ARG_ENABLE(static, AS_HELP_STRING(--enable-static, \ [Build statical linked program])) if test "$enable_static" = "yes"; then - export LDFLAGS="-static -Wl,--fatal-warnings" + export LDFLAGS="-static" fi AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--disable-fnmatch, \ From d489f2e2e98268894a38a1c84da559e74020c47b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 5 Apr 2024 17:55:46 +0800 Subject: [PATCH 090/401] exec: Check executable bit when searching path Andrej Shadura wrote: > > Here's an old bug from 2017, but it was brought to my attention in some > recent discussion about which "which" is which. There's also a patch in > one of the follow-ups, but I'm afraid I don't know enough about that > part of code to judge the consequences of it being applied: > > https://bugs.debian.org/874264 > > -------- Forwarded Message -------- > Subject: dash: 'command -v' mistakenly returns a shell script whose > executable is not set > Date: Mon, 04 Sep 2017 10:45:48 -0400 > From: Norman Ramsey > To: Debian Bug Tracking System > > Package: dash > Version: 0.5.8-2.4 > Severity: normal > > Dear Maintainer, > > > I tracked a build bug in s-nail to a problem with dash. Symptom: > building s-nail tries to run /home/nr/bin/clang, a script whose > executable bit is not set. We tracked the problem to the result of > running `command -v clang` with /bin/sh: > > nr@homedog ~/n/s-nail> /bin/sh -c 'command -v clang' > /home/nr/bin/clang > nr@homedog ~/n/s-nail> ls -l /home/nr/bin/clang > -rw-rw-r-- 1 nr nr 1009 Aug 29 2011 /home/nr/bin/clang > nr@homedog ~/n/s-nail> ls -l /bin/sh > lrwxrwxrwx 1 root root 4 Jan 24 2017 /bin/sh -> dash > nr@homedog ~/n/s-nail> ksh -c 'command -v clang' > /usr/bin/clang > nr@homedog ~/n/s-nail> bash -c 'command -v clang' > /usr/bin/clang > nr@homedog ~/n/s-nail> sh -c 'command -v clang' > /home/nr/bin/clang > nr@homedog ~/n/s-nail> dash -c 'command -v clang' > /home/nr/bin/clang > nr@homedog ~/n/s-nail> fish -c 'command -v clang' > /usr/bin/clang > > When I run `command -v clang` I expect it to answer /usr/bin/clang. > > -- System Information: > Debian Release: 9.1 > APT prefers stable > APT policy: (990, 'stable'), (500, 'stable'), (1, 'experimental') > Architecture: i386 (x86_64) > Foreign Architectures: amd64 > > Kernel: Linux 4.9.0-3-amd64 (SMP w/4 CPU cores) > Locale: LANG=C, LC_CTYPE=C (charmap=UTF-8) (ignored: LC_ALL set to > en_US.utf8), LANGUAGE=C (charmap=UTF-8) (ignored: LC_ALL set to en_US.utf8) > Shell: /bin/sh linked to /bin/dash > Init: systemd (via /run/systemd/system) > > Versions of packages dash depends on: > ii debianutils 4.8.1.1 > ii dpkg 1.18.24 > ii libc6 2.24-11+deb9u1 > > dash recommends no packages. > > dash suggests no packages. > > -- debconf information: > * dash/sh: true This is inherited from NetBSD. There is even a commented-out block of code that tried to fix this. Anyway, we now have faccessat so we can simply use it. Reported-by: Norman Ramsey Reported-by: Nicola Lamacchia Signed-off-by: Herbert Xu --- src/bltin/test.c | 10 +++------- src/exec.c | 38 +++++++++++++++++++++----------------- src/exec.h | 5 +++++ 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/bltin/test.c b/src/bltin/test.c index c7fc479..fd8a43b 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -18,6 +18,7 @@ #include #include #include "bltin.h" +#include "../exec.h" /* test(1) accepts the following grammar: oexpr ::= aexpr | aexpr "-o" oexpr ; @@ -148,11 +149,6 @@ static int isoperand(char **); static int newerf(const char *, const char *); static int olderf(const char *, const char *); static int equalf(const char *, const char *); -#ifdef HAVE_FACCESSAT -static int test_file_access(const char *, int); -#else -static int test_access(const struct stat64 *, int); -#endif #ifdef HAVE_FACCESSAT # ifdef HAVE_TRADITIONAL_FACCESSAT @@ -527,7 +523,7 @@ static int has_exec_bit_set(const char *path) return st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH); } -static int test_file_access(const char *path, int mode) +int test_file_access(const char *path, int mode) { if (faccessat_confused_about_superuser() && mode == X_OK && geteuid() == 0 && !has_exec_bit_set(path)) @@ -657,7 +653,7 @@ static int test_file_access(const char *path, int mode) * (euid==uid&&egid==gid), but uses st_mode for '-x' iff running as root. * i.e. it does strictly conform to 1003.1-2001 (and presumably 1003.2b). */ -static int test_access(const struct stat64 *sp, int stmode) +int test_access(const struct stat64 *sp, int stmode) { gid_t *groups; register int n; diff --git a/src/exec.c b/src/exec.c index 4b777d2..6fe0fed 100644 --- a/src/exec.c +++ b/src/exec.c @@ -325,7 +325,22 @@ printentry(struct tblentry *cmdp) out1fmt(snlfmt, cmdp->rehash ? "*" : nullstr); } +static int test_exec(const char *fullname, struct stat64 *statb) +{ + if (!S_ISREG(statb->st_mode)) + return 0; + + if ((statb->st_mode & 0111) != 0111 && +#ifdef HAVE_FACCESSAT + !test_file_access(fullname, X_OK) +#else + !test_access(statb, X_OK) +#endif + ) + return 0; + return 1; +} /* * Resolve a command name. If you change this routine, you may have to @@ -354,9 +369,12 @@ find_command(char *name, struct cmdentry *entry, int act, const char *path) if (errno == EINTR) continue; #endif +absfail: entry->cmdtype = CMDUNKNOWN; return; } + if (!test_exec(name, &statb)) + goto absfail; } entry->cmdtype = CMDNORMAL; return; @@ -451,9 +469,6 @@ find_command(char *name, struct cmdentry *entry, int act, const char *path) e = errno; goto loop; } - e = EACCES; /* if we fail, this will be the error */ - if (!S_ISREG(statb.st_mode)) - continue; if (lpathopt) { /* this is a %func directory */ stalloc(len); readcmdfile(fullname); @@ -464,20 +479,9 @@ find_command(char *name, struct cmdentry *entry, int act, const char *path) stunalloc(fullname); goto success; } -#ifdef notdef - /* XXX this code stops root executing stuff, and is buggy - if you need a group from the group list. */ - if (statb.st_uid == geteuid()) { - if ((statb.st_mode & 0100) == 0) - goto loop; - } else if (statb.st_gid == getegid()) { - if ((statb.st_mode & 010) == 0) - goto loop; - } else { - if ((statb.st_mode & 01) == 0) - goto loop; - } -#endif + e = EACCES; /* if we fail, this will be the error */ + if (!test_exec(fullname, &statb)) + continue; TRACE(("searchexec \"%s\" returns \"%s\"\n", name, fullname)); if (!updatetbl) { entry->cmdtype = CMDNORMAL; diff --git a/src/exec.h b/src/exec.h index 423b07e..8707d36 100644 --- a/src/exec.h +++ b/src/exec.h @@ -62,6 +62,8 @@ union node; extern const char *pathopt; /* set by padvance */ +struct stat64; + void shellexec(char **, const char *, int) __attribute__((__noreturn__)); int padvance_magic(const char **path, const char *name, int magic); @@ -78,6 +80,9 @@ void unsetfunc(const char *); int typecmd(int, char **); int commandcmd(int, char **); +int test_file_access(const char *path, int mode); +int test_access(const struct stat64 *sp, int stmode); + static inline int padvance(const char **path, const char *name) { return padvance_magic(path, name, 1); From 74085cc28deb9d95867ad7c350efd11ea722a552 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 7 Apr 2024 17:04:37 +0800 Subject: [PATCH 091/401] jobs: Allow monitor mode without a tty in non-interactive mode When a tty is unavailable, or the shell is in the background, job control could still be used for the purpose of setting process groups. This is based on work by Jilles Tjoelker from FreeBSD and Steffen Nurpmeso. Reported-by: Steffen Nurpmeso Reported-by: Ganael Laplanche Signed-off-by: Herbert Xu --- src/jobs.c | 53 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index a0f4d47..2a2fe22 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -187,11 +187,21 @@ set_curjob(struct job *jp, unsigned mode) int jobctl; +static void xxtcsetpgrp(pid_t pgrp) +{ + int fd = ttyfd; + + if (fd < 0) + return; + + xtcsetpgrp(fd, pgrp); +} + void setjobctl(int on) { + int pgrp = -1; int fd; - int pgrp; if (on == jobctl || rootshell == 0) return; @@ -207,36 +217,43 @@ setjobctl(int on) fd = savefd(fd, ofd); do { /* while we are in the background */ if ((pgrp = tcgetpgrp(fd)) < 0) { +close: + close(fd); + fd = -1; out: + if (!iflag) + break; sh_warnx("can't access tty; job control turned off"); mflag = on = 0; - goto close; + return; } if (pgrp == getpgrp()) break; + if (!iflag) + goto close; killpg(0, SIGTTIN); } while (1); initialpgrp = pgrp; - - setsignal(SIGTSTP); - setsignal(SIGTTOU); - setsignal(SIGTTIN); pgrp = rootpid; - setpgid(0, pgrp); - xtcsetpgrp(fd, pgrp); } else { /* turning job control off */ fd = ttyfd; pgrp = initialpgrp; - xtcsetpgrp(fd, pgrp); + } + + setsignal(SIGTSTP); + setsignal(SIGTTOU); + setsignal(SIGTTIN); + if (fd >= 0) { setpgid(0, pgrp); - setsignal(SIGTSTP); - setsignal(SIGTTOU); - setsignal(SIGTTIN); -close: - close(fd); - fd = -1; + xtcsetpgrp(fd, pgrp); + + if (!on) { + close(fd); + fd = -1; + } } + ttyfd = fd; jobctl = on; } @@ -391,7 +408,7 @@ restartjob(struct job *jp, int mode) jp->state = JOBRUNNING; pgid = jp->ps->pid; if (mode == FORK_FG) - xtcsetpgrp(ttyfd, pgid); + xxtcsetpgrp(pgid); killpg(pgid, SIGCONT); ps = jp->ps; i = jp->nprocs; @@ -874,7 +891,7 @@ static void forkchild(struct job *jp, union node *n, int mode) /* This can fail because we are doing it in the parent also */ (void)setpgid(0, pgrp); if (mode == FORK_FG) - xtcsetpgrp(ttyfd, pgrp); + xxtcsetpgrp(pgrp); setsignal(SIGTSTP); setsignal(SIGTTOU); } else @@ -1014,7 +1031,7 @@ waitforjob(struct job *jp) st = getstatus(jp); #if JOBS if (jp->jobctl) { - xtcsetpgrp(ttyfd, rootpid); + xxtcsetpgrp(rootpid); /* * This is truly gross. * If we're doing job control, then we did a TIOCSPGRP which From 865f44f3fdbc97e21dd279cba46376984cb1e059 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 8 Apr 2024 12:55:03 +0800 Subject: [PATCH 092/401] alias: Fix out-of-bound access Check for empty string before searching for equal sign starting at n+1 in aliascmd. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/alias.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alias.c b/src/alias.c index fcad43b..cee07e9 100644 --- a/src/alias.c +++ b/src/alias.c @@ -143,7 +143,8 @@ aliascmd(int argc, char **argv) return (0); } while ((n = *++argv) != NULL) { - if ((v = strchr(n+1, '=')) == NULL) { /* n+1: funny ksh stuff */ + /* n + 1: funny ksh stuff (from 44lite) */ + if (!*n || !(v = strchr(n + 1, '='))) { if ((ap = *__lookupalias(n)) == NULL) { outfmt(out2, "%s: %s not found\n", "alias", n); ret = 1; From 1c8cf3e96d3ff221dbcf3f8447fd197cdca18939 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 11 Apr 2024 15:43:52 +0800 Subject: [PATCH 093/401] var: Fix unexporting of local variables using unset Local variables and other variables with the flag VSTRFIXED set could not be unexported using the unset command. Fix this by adding a special case in setvareq for them. Reported-by: Christoph Anton Mitterer Fixes: e3c9a7dd7097 ("[VAR] Move unsetvar functionality into setvareq") Signed-off-by: Herbert Xu --- src/var.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/var.c b/src/var.c index 21e0abf..6f85be3 100644 --- a/src/var.c +++ b/src/var.c @@ -255,6 +255,8 @@ struct var *setvareq(char *s, int flags) vpp = findvar(s); vp = *vpp; if (vp) { + unsigned bits; + if (vp->flags & VREADONLY) { const char *n; @@ -274,8 +276,11 @@ struct var *setvareq(char *s, int flags) if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); - if (((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) | - (vp->flags & VSTRFIXED)) == VUNSET) { + if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) != VUNSET) + bits = ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET); + else if ((vp->flags & VSTRFIXED)) + bits = VSTRFIXED; + else { *vpp = vp->next; ckfree(vp); out_free: @@ -284,7 +289,7 @@ struct var *setvareq(char *s, int flags) goto out; } - flags |= vp->flags & ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET); + flags |= vp->flags & bits; } else { if (flags & VNOSET) goto out; From 177072c2e718d2fa9758be9925b8558aedbc0227 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 11 Apr 2024 15:46:28 +0800 Subject: [PATCH 094/401] var: Remove unused VNOSET The bit VNOSET is no longer used. Remove it. Signed-off-by: Herbert Xu --- src/var.c | 5 ----- src/var.h | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/var.c b/src/var.c index 6f85be3..895eabc 100644 --- a/src/var.c +++ b/src/var.c @@ -267,9 +267,6 @@ struct var *setvareq(char *s, int flags) n); } - if (flags & VNOSET) - goto out; - if (vp->func && (flags & VNOFUNC) == 0) (*vp->func)(varnull(s)); @@ -291,8 +288,6 @@ struct var *setvareq(char *s, int flags) flags |= vp->flags & bits; } else { - if (flags & VNOSET) - goto out; if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET) goto out_free; /* not found */ diff --git a/src/var.h b/src/var.h index aa7575a..953a694 100644 --- a/src/var.h +++ b/src/var.h @@ -48,7 +48,7 @@ #define VSTACK 0x10 /* text is allocated on the stack */ #define VUNSET 0x20 /* the variable is not set */ #define VNOFUNC 0x40 /* don't call the callback function */ -#define VNOSET 0x80 /* do not set variable - just readonly test */ +/* #define VNOSET 0x80 do not set variable - just readonly test */ #define VNOSAVE 0x100 /* when text is on the heap before setvareq */ From c8db655b3c7056f20362d400a1b3fd2910900c76 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 12 Apr 2024 17:51:25 +0800 Subject: [PATCH 095/401] alias: Disallow non-CWORD characters Alias names containing control characters may match words from the parser that shouldn't be matched. Disallow such characters from appearing in an alias name. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/alias.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/alias.c b/src/alias.c index cee07e9..3cd3413 100644 --- a/src/alias.c +++ b/src/alias.c @@ -41,6 +41,7 @@ #include "mystring.h" #include "alias.h" #include "options.h" /* XXX for argptr (should remove?) */ +#include "syntax.h" #define ATABSIZE 39 @@ -55,6 +56,11 @@ void setalias(const char *name, const char *val) { struct alias *ap, **app; + const char *p; + + for (p = name; *p; p++) + if (BASESYNTAX[(signed char)*p] != CWORD) + sh_error("Invalid alias name: %s", name); app = __lookupalias(name); ap = *app; From 21847204559bf9a720f3863e19e8f046fdce0bf1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 14 Apr 2024 11:08:02 +0800 Subject: [PATCH 096/401] expand: Fix here-document file descriptor leak Swap the order of here-document expansion and pipe creation as otherwise the pipe file descriptors will become accessible in the expanded text. Fixes: f4ee8c859c3d ("[EXPAND] Expand here-documents in the...") Signed-off-by: Herbert Xu --- src/redir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/redir.c b/src/redir.c index d74602c..bcc81b4 100644 --- a/src/redir.c +++ b/src/redir.c @@ -335,15 +335,15 @@ openhere(union node *redir) int pip[2]; size_t len = 0; - if (pipe(pip) < 0) - sh_error("Pipe call failed"); - p = redir->nhere.doc->narg.text; if (redir->type == NXHERE) { expandarg(redir->nhere.doc, NULL, EXP_QUOTED); p = stackblock(); } + if (pipe(pip) < 0) + sh_error("Pipe call failed"); + len = strlen(p); if (len <= PIPESIZE) { xwrite(pip[1], p, len); From 7a11b3e330a36a2c33607ac77ceca656038b3798 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Apr 2024 16:47:33 +0800 Subject: [PATCH 097/401] parser: Extend coverage of CHKEOFMARK Extend the coverage of CHKEOFMARK to cover parameter expansion, arithmetic expansion, and command substitution. For command substitution, use the reconstruction from commandtext as the here-document marker. Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/jobs.c | 10 +++- src/jobs.h | 1 + src/parser.c | 140 ++++++++++++++++++++++++++++++++------------------- 3 files changed, 97 insertions(+), 54 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 2a2fe22..ac22ae5 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1242,13 +1242,19 @@ commandtext(union node *n) { char *name; - STARTSTACKSTR(cmdnextc); - cmdtxt(n); + STARTSTACKSTR(name); + commandtextcont(n, name); name = stackblock(); TRACE(("commandtext: name %p, end %p\n", name, cmdnextc)); return savestr(name); } +char *commandtextcont(union node *n, char *next) +{ + cmdnextc = next; + cmdtxt(n); + return cmdnextc; +} STATIC void cmdtxt(union node *n) diff --git a/src/jobs.h b/src/jobs.h index 2832d64..a58d2a2 100644 --- a/src/jobs.h +++ b/src/jobs.h @@ -107,6 +107,7 @@ int forkshell(struct job *, union node *, int); struct job *vforkexec(union node *n, char **argv, const char *path, int idx); int waitforjob(struct job *); int stoppedjobs(void); +char *commandtextcont(union node *n, char *next); #if ! JOBS #define setjobctl(on) ((void)(on)) /* do nothing */ diff --git a/src/parser.c b/src/parser.c index 299c260..e3168de 100644 --- a/src/parser.c +++ b/src/parser.c @@ -46,6 +46,7 @@ #include "syntax.h" #include "options.h" #include "input.h" +#include "jobs.h" #include "output.h" #include "var.h" #include "error.h" @@ -628,9 +629,10 @@ parsefname(void) union node *n = redirnode; if (n->type == NHERE) - checkkwd = CHKEOFMARK; + checkkwd |= CHKEOFMARK; if (readtoken() != TWORD) synexpect(-1); + checkkwd &= ~CHKEOFMARK; if (n->type == NHERE) { struct heredoc *here = heredoc; struct heredoc *p; @@ -901,6 +903,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) /* syntax stack */ struct synstack synbase = { .syntax = syntax }; struct synstack *synstack = &synbase; + int chkeofmark = checkkwd & CHKEOFMARK; if (syntax == DQSYNTAX) synstack->dblquote = 1; @@ -1010,39 +1013,35 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) synstack_pop(&synstack); else if (synstack->dqvarnest > 0) synstack->dqvarnest--; - USTPUTC(CTLENDVAR, out); - } else { - USTPUTC(c, out); + if (!chkeofmark) + c = CTLENDVAR; } + USTPUTC(c, out); break; case CLP: /* '(' in arithmetic */ synstack->parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ - if (synstack->parenlevel > 0) { - USTPUTC(c, out); + if (synstack->parenlevel > 0) --synstack->parenlevel; + else if (pgetc_eatbnl() == ')') { + synstack_pop(&synstack); + if (chkeofmark) + USTPUTC(c, out); + else + c = CTLENDARI; } else { - if (pgetc_eatbnl() == ')') { - USTPUTC(CTLENDARI, out); - synstack_pop(&synstack); - } else { - /* - * unbalanced parens - * (don't 2nd guess - no error) - */ - pungetc(); - USTPUTC(')', out); - } + /* + * unbalanced parens + * (don't 2nd guess - no error) + */ + pungetc(); } + USTPUTC(c, out); break; case CBQUOTE: /* '`' */ - if (checkkwd & CHKEOFMARK) { - USTPUTC('`', out); - break; - } - + USTPUTC('`', out); PARSEBACKQOLD(); break; case CEOF: @@ -1218,13 +1217,16 @@ parsesub: { static const char types[] = "}-+?="; c = pgetc_eatbnl(); - if ( - (checkkwd & CHKEOFMARK) || - (c != '(' && c != '{' && !is_name(c) && !is_special(c)) - ) { + if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) { USTPUTC('$', out); pungetc(); - } else if (c == '(') { /* $(command) or $((arith)) */ + goto parsesub_return; + } + + USTPUTC('$', out); + + if (c == '(') { /* $(command) or $((arith)) */ + USTPUTC(c, out); if (pgetc_eatbnl() == '(') { PARSEARITH(); } else { @@ -1234,11 +1236,15 @@ parsesub: { } else { const char *newsyn = synstack->syntax; - USTPUTC(CTLVAR, out); typeloc = out - (char *)stackblock(); - STADJUST(1, out); + if (!chkeofmark) { + out[-1] = CTLVAR; + STADJUST(1, out); + } subtype = VSNORMAL; if (likely(c == '{')) { + if (chkeofmark) + USTPUTC('{', out); c = pgetc_eatbnl(); subtype = 0; } @@ -1262,8 +1268,11 @@ parsesub: { if (!subtype && cc == '#') { subtype = VSLENGTH; - if (c == '_' || isalnum(c)) + if (c == '_' || isalnum(c)) { + if (chkeofmark) + USTPUTC('#', out); goto varname; + } cc = c; c = pgetc_eatbnl(); @@ -1272,7 +1281,8 @@ parsesub: { subtype = 0; c = cc; cc = '#'; - } + } else if (chkeofmark) + USTPUTC('#', out); } if (!is_special(cc)) { @@ -1288,10 +1298,15 @@ parsesub: { if (subtype == 0) { int cc = c; + if (chkeofmark) + STPUTC(c, out); + switch (c) { case ':': subtype = VSNUL; c = pgetc_eatbnl(); + if (chkeofmark) + STPUTC(c, out); /*FALLTHROUGH*/ default: p = strchr(types, c); @@ -1304,9 +1319,11 @@ parsesub: { subtype = c == '#' ? VSTRIMLEFT : VSTRIMRIGHT; c = pgetc_eatbnl(); - if (c == cc) + if (c == cc) { + if (chkeofmark) + STPUTC(c, out); subtype++; - else + } else pungetc(); newsyn = BASESYNTAX; @@ -1333,13 +1350,15 @@ parsesub: { synstack->dblquote = newsyn != BASESYNTAX; } - *((char *)stackblock() + typeloc) = subtype | VSBIT; if (subtype != VSNORMAL) { synstack->varnest++; if (synstack->dblquote) synstack->dqvarnest++; } - STPUTC('=', out); + if (!chkeofmark) { + *((char *)stackblock() + typeloc) = subtype | VSBIT; + STPUTC('=', out); + } } goto parsesub_return; } @@ -1353,14 +1372,19 @@ parsesub: { */ parsebackq: { + int uninitialized_var(saveprompt); + struct heredoc *saveheredoclist; struct nodelist **nlpp; + size_t psavelen; + size_t savelen; union node *n; + char *pstr; char *str; - size_t savelen; - struct heredoc *saveheredoclist; - int uninitialized_var(saveprompt); - USTPUTC(CTLBACKQ, out); + if (!chkeofmark) { + STADJUST(oldstyle - 1, out); + out[-1] = CTLBACKQ; + } str = stackblock(); savelen = out - (char *)stackblock(); grabstackblock(savelen); @@ -1370,9 +1394,6 @@ parsebackq: { reread it as input, interpreting it normally. */ char *pout; int pc; - size_t psavelen; - char *pstr; - STARTSTACKSTR(pout); for (;;) { @@ -1405,10 +1426,8 @@ parsebackq: { done: STPUTC('\0', pout); psavelen = pout - (char *)stackblock(); - if (psavelen > 0) { - pstr = grabstackstr(pout); - setinputstring(pstr); - } + pstr = grabstackstr(pout); + setinputstring(pstr); } nlpp = &bqlist; while (*nlpp) @@ -1440,14 +1459,26 @@ parsebackq: { (*nlpp)->n = n; /* Start reading from old file again. */ popfile(); - /* Ignore any pushed back tokens left from the backquote parsing. */ - if (oldstyle) - tokpushback = 0; + out = stnputs(str, savelen, stackblock()); - if (oldstyle) + + if (oldstyle) { + /* Ignore any pushed back tokens left from the backquote + * parsing. + */ + tokpushback = 0; + if (chkeofmark) { + pstr[psavelen - 1] = '`'; + out = stnputs(pstr, psavelen, out); + } goto parsebackq_oldreturn; - else + } else { + if (chkeofmark) { + out = commandtextcont(n, out); + USTPUTC(')', out); + } goto parsebackq_newreturn; + } } /* @@ -1459,7 +1490,12 @@ parsearith: { synstack->prev ?: alloca(sizeof(*synstack)), ARISYNTAX); synstack->dblquote = 1; - USTPUTC(CTLARI, out); + if (chkeofmark) + USTPUTC(c, out); + else { + STADJUST(-1, out); + out[-1] = CTLARI; + } goto parsearith_return; } From 94b1e82588289a477933f7075db7f098d4755fad Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 20 Apr 2024 08:36:04 +0800 Subject: [PATCH 098/401] trap: Preserve parent traps for trap-only command substitution Traps are reset when a subshell is started. When a subshell is started for command substitution with a simple command whose first word is "trap", preserve the parent trap text so that they can be printed. Signed-off-by: Herbert Xu --- src/eval.c | 4 ++-- src/init.h | 4 +++- src/jobs.c | 2 +- src/mkinit.c | 5 +++-- src/trap.c | 57 +++++++++++++++++++++++++++++++++++++++------------- 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/eval.c b/src/eval.c index 978a174..f65f55f 100644 --- a/src/eval.c +++ b/src/eval.c @@ -491,11 +491,11 @@ evalsubshell(union node *n, int flags) expredir(n->nredir.redirect); INTOFF; if (!backgnd && flags & EV_EXIT && !have_traps()) { - forkreset(); + forkreset(NULL); goto nofork; } jp = makejob(1); - if (forkshell(jp, n, backgnd) == 0) { + if (forkshell(jp, n->nredir.n, backgnd) == 0) { flags |= EV_EXIT; if (backgnd) flags &=~ EV_TESTED; diff --git a/src/init.h b/src/init.h index d56fb28..4f98b5d 100644 --- a/src/init.h +++ b/src/init.h @@ -34,7 +34,9 @@ * @(#)init.h 8.2 (Berkeley) 5/4/95 */ +union node; + void init(void); void exitreset(void); -void forkreset(void); +void forkreset(union node *); void reset(void); diff --git a/src/jobs.c b/src/jobs.c index ac22ae5..24dcae7 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -872,7 +872,7 @@ static void forkchild(struct job *jp, union node *n, int mode) if (!lvforked) { shlvl++; - forkreset(); + forkreset(mode == FORK_NOJOB ? n : NULL); #if JOBS /* do job control only in root shell */ diff --git a/src/mkinit.c b/src/mkinit.c index 9025862..870b64d 100644 --- a/src/mkinit.c +++ b/src/mkinit.c @@ -91,6 +91,7 @@ struct event { char *name; /* name of event (e.g. INIT) */ char *routine; /* name of routine called on event */ char *comment; /* comment describing routine */ + char *args; /* arguments to routine */ struct text code; /* code for handling event */ }; @@ -128,7 +129,7 @@ char reset[] = "\ struct event event[] = { {"INIT", "init", init}, {"EXITRESET", "exitreset", exitreset}, - {"FORKRESET", "forkreset", forkreset}, + {"FORKRESET", "forkreset", forkreset, "union node *n"}, {"RESET", "reset", reset}, {NULL, NULL} }; @@ -388,7 +389,7 @@ output(void) for (ep = event ; ep->name ; ep++) { fputs("\n\n\n", fp); fputs(ep->comment, fp); - fprintf(fp, "\nvoid\n%s() {\n", ep->routine); + fprintf(fp, "\nvoid\n%s(%s) {\n", ep->routine, ep->args ?: ""); writetext(&ep->code, fp); fprintf(fp, "}\n"); } diff --git a/src/trap.c b/src/trap.c index cd84814..75501d7 100644 --- a/src/trap.c +++ b/src/trap.c @@ -66,7 +66,9 @@ /* trap handler commands */ -MKINIT char *trap[NSIG]; +static char *trap[NSIG]; +/* traps have not been fully cleared */ +static int ptrap; /* number of non-null traps */ int trapcnt; /* current value of signal */ @@ -81,6 +83,7 @@ volatile sig_atomic_t gotsigchld; extern char *signal_names[]; static int decode_signum(const char *); +MKINIT void clear_traps(union node *); #ifdef mkinit INCLUDE "memalloc.h" @@ -92,19 +95,7 @@ INIT { } FORKRESET { - char **tp; - - INTOFF; - for (tp = trap ; tp < &trap[NSIG] ; tp++) { - if (*tp && **tp) { /* trap not NULL or SIG_IGN */ - ckfree(*tp); - *tp = NULL; - if (tp != &trap[0]) - setsignal(tp - trap); - } - } - trapcnt = 0; - INTON; + clear_traps(n); } #endif @@ -133,6 +124,8 @@ trapcmd(int argc, char **argv) } return 0; } + if (ptrap) + clear_traps(NULL); if (!ap[1] || decode_signum(*ap) >= 0) action = NULL; else @@ -168,6 +161,40 @@ trapcmd(int argc, char **argv) +/* + * Clear traps on a fork. + */ + +void clear_traps(union node *n) +{ + int simplecmd; + char **tp; + + simplecmd = n && n->type == NCMD && n->ncmd.args && + equal(n->ncmd.args->narg.text, "trap"); + + INTOFF; + for (tp = trap ; tp < &trap[NSIG] ; tp++) { + if (*tp && **tp) { /* trap not NULL or SIG_IGN */ + char *otp = *tp; + + *tp = NULL; + if (tp != &trap[0]) + setsignal(tp - trap); + + if (simplecmd) + *tp = otp; + else + ckfree(*tp); + } + } + trapcnt = 0; + ptrap = simplecmd; + INTON; +} + + + /* * Set the signal handler for the specified signal. The routine figures * out what it should be set to. @@ -390,6 +417,8 @@ exitshell(void) handler = &loc; if ((p = trap[0])) { trap[0] = NULL; + if (ptrap) + goto out; evalskip = 0; evalstring(p, 0); evalskip = SKIPFUNCDEF; From 9881d00e939e75e5348aebe6046ff80d3b7edb17 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Apr 2024 20:01:33 +0800 Subject: [PATCH 099/401] jobs: Preserve parent jobs for simple commands Do not free parent shell jobs if a simple command with the first word being "jobs" is executed as a command substitution. Signed-off-by: Herbert Xu --- src/jobs.c | 6 ++++++ src/parser.c | 6 ++++++ src/parser.h | 3 +++ src/trap.c | 5 +++-- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 24dcae7..840e37c 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -53,6 +53,7 @@ #include #undef CEOF /* syntax.h redefines this */ #endif +#include "builtins.h" #include "exec.h" #include "eval.h" #include "init.h" @@ -913,6 +914,11 @@ static void forkchild(struct job *jp, union node *n, int mode) if (lvforked) return; + freejob(jp); + + if (issimplecmd(n, JOBSCMD->name)) + return; + for (jp = curjob; jp; jp = jp->prev_job) freejob(jp); } diff --git a/src/parser.c b/src/parser.c index e3168de..27611f0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -133,6 +133,12 @@ int isassignment(const char *p) return *q == '='; } +int issimplecmd(union node *n, const char *name) +{ + return n && n->type == NCMD && n->ncmd.args && + equal(n->ncmd.args->narg.text, name); +} + static inline int realeofmark(const char *eofmark) { return eofmark && eofmark != FAKEEOFMARK; diff --git a/src/parser.h b/src/parser.h index 729c15c..433573d 100644 --- a/src/parser.h +++ b/src/parser.h @@ -36,6 +36,8 @@ #include "token.h" +union node; + /* control characters in argument strings */ #define CTL_FIRST -127 /* first 'special' character */ #define CTLESC -127 /* escape next character */ @@ -85,6 +87,7 @@ extern int checkkwd; int isassignment(const char *p); +int issimplecmd(union node *n, const char *name); union node *parsecmd(int); void fixredir(union node *, const char *, int); const char *getprompt(void *); diff --git a/src/trap.c b/src/trap.c index 75501d7..f871656 100644 --- a/src/trap.c +++ b/src/trap.c @@ -37,6 +37,7 @@ #include #include +#include "builtins.h" #include "shell.h" #include "main.h" #include "nodes.h" /* for other headers */ @@ -47,6 +48,7 @@ #include "options.h" #include "syntax.h" #include "output.h" +#include "parser.h" #include "memalloc.h" #include "error.h" #include "trap.h" @@ -170,8 +172,7 @@ void clear_traps(union node *n) int simplecmd; char **tp; - simplecmd = n && n->type == NCMD && n->ncmd.args && - equal(n->ncmd.args->narg.text, "trap"); + simplecmd = issimplecmd(n, TRAPCMD->name); INTOFF; for (tp = trap ; tp < &trap[NSIG] ; tp++) { From 509f5b0dcd710804fb6c66750c7c7fd2d30a3ec9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Apr 2024 08:33:53 +0800 Subject: [PATCH 100/401] redir: Use memfd_create instead of pipe Use memfd_create(2) instead of pipe(2). With pipe(2), a fork is required if the amount of data to be written exceeds the pipe size. This is not the case with memfd_create. Signed-off-by: Herbert Xu --- configure.ac | 2 +- src/eval.c | 3 +-- src/redir.c | 59 +++++++++++++++++++++++++++++++++++----------------- src/redir.h | 1 + src/system.h | 7 +++++++ 5 files changed, 50 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index 6993364..cb55c3f 100644 --- a/configure.ac +++ b/configure.ac @@ -87,7 +87,7 @@ AC_CHECK_DECL([PRIdMAX],, dnl Checks for library functions. AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \ - mempcpy \ + memfd_create mempcpy \ sigsetmask stpcpy strchrnul strsignal strtod strtoimax \ strtoumax sysconf) diff --git a/src/eval.c b/src/eval.c index f65f55f..d169eb8 100644 --- a/src/eval.c +++ b/src/eval.c @@ -635,8 +635,7 @@ evalbackcmd(union node *n, struct backcmd *result) goto out; } - if (pipe(pip) < 0) - sh_error("Pipe call failed"); + sh_pipe(pip, 0); jp = makejob(1); if (forkshell(jp, n, FORK_NOJOB) == 0) { FORCEINTON; diff --git a/src/redir.c b/src/redir.c index bcc81b4..bf5207d 100644 --- a/src/redir.c +++ b/src/redir.c @@ -32,6 +32,7 @@ * SUCH DAMAGE. */ +#include #include #include #include /* PIPE_BUF */ @@ -280,6 +281,21 @@ openredirect(union node *redir) sh_open_fail(fname, O_CREAT, EEXIST); } +static int sh_dup2(int ofd, int nfd, int cfd) +{ + if (nfd < 0) { + nfd = dup(ofd); + if (nfd >= 0) + cfd = -1; + } else + nfd = dup2(ofd, nfd); + if (likely(cfd >= 0)) + close(cfd); + if (nfd < 0) + sh_error("%d: %s", ofd, strerror(errno)); + + return nfd; +} #ifdef notyet static void dupredirect(union node *redir, int f, char memory[10]) @@ -288,7 +304,6 @@ static void dupredirect(union node *redir, int f) #endif { int fd = redir->nfile.fd; - int err = 0; #ifdef notyet memory[fd] = 0; @@ -301,26 +316,31 @@ static void dupredirect(union node *redir, int f) memory[fd] = 1; else #endif - if (dup2(f, fd) < 0) { - err = errno; - goto err; - } + sh_dup2(f, fd, -1); return; } f = fd; - } else if (dup2(f, fd) < 0) - err = errno; + } else + sh_dup2(f, fd, f); close(f); - if (err < 0) - goto err; +} - return; +int sh_pipe(int pip[2], int memfd) +{ + if (memfd) { + pip[0] = memfd_create("dash", 0); + if (pip[0] >= 0) { + pip[1] = sh_dup2(pip[0], -1, pip[0]); + return 1; + } + } -err: - sh_error("%d: %s", f, strerror(err)); -} + if (pipe(pip) < 0) + sh_error("Pipe call failed"); + return 0; +} /* * Handle here documents. Normally we fork off a process to write the @@ -331,9 +351,10 @@ static void dupredirect(union node *redir, int f) STATIC int openhere(union node *redir) { - char *p; - int pip[2]; size_t len = 0; + int pip[2]; + int memfd; + char *p; p = redir->nhere.doc->narg.text; if (redir->type == NXHERE) { @@ -341,12 +362,12 @@ openhere(union node *redir) p = stackblock(); } - if (pipe(pip) < 0) - sh_error("Pipe call failed"); - len = strlen(p); - if (len <= PIPESIZE) { + memfd = sh_pipe(pip, len > PIPESIZE); + + if (memfd || len <= PIPESIZE) { xwrite(pip[1], p, len); + lseek(pip[1], 0, SEEK_SET); goto out; } diff --git a/src/redir.h b/src/redir.h index 16f5c20..0be5f1a 100644 --- a/src/redir.h +++ b/src/redir.h @@ -50,4 +50,5 @@ int redirectsafe(union node *, int); void unwindredir(struct redirtab *stop); struct redirtab *pushredir(union node *redir); int sh_open(const char *pathname, int flags, int mayfail); +int sh_pipe(int pip[2], int memfd); diff --git a/src/system.h b/src/system.h index 007952c..371c64b 100644 --- a/src/system.h +++ b/src/system.h @@ -54,6 +54,13 @@ static inline void sigclearmask(void) #endif } +#ifndef HAVE_MEMFD_CREATE +static inline int memfd_create(const char *name, unsigned int flags) +{ + return -1; +} +#endif + #ifndef HAVE_MEMPCPY void *mempcpy(void *, const void *, size_t); #endif From f9af463600b1025e7e14914c9fcde972a2be991e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 27 Apr 2024 17:11:02 +0800 Subject: [PATCH 101/401] input: Fix history line reading regression When a newline is encountered with history support, terminate the loop immediately. Fixes: 44ae22beedf8 ("input: Disable lleft in SMALL mode") Signed-off-by: Herbert Xu --- src/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/input.c b/src/input.c index 38969a7..fb9858f 100644 --- a/src/input.c +++ b/src/input.c @@ -301,7 +301,7 @@ static int preadbuffer(void) switch (c) { case '\n': parsefile->nleft = q - parsefile->nextc - 1; - goto check; + goto done; default: something = 1; @@ -320,6 +320,7 @@ static int preadbuffer(void) break; } } +done: input_set_lleft(parsefile, more); if (!IS_DEFINED_SMALL) From 13fc32156f7ea9b683ff8d3c983db233129d04e8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 27 Apr 2024 17:11:31 +0800 Subject: [PATCH 102/401] main: Fix profiling on longjmp exit paths Ensure that longjmp exit paths also write the profiling file. Signed-off-by: Herbert Xu --- src/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main.c b/src/main.c index 5c49fdc..7beb280 100644 --- a/src/main.c +++ b/src/main.c @@ -112,7 +112,7 @@ main(int argc, char **argv) s = state; if (e == EXEND || e == EXEXIT || s == 0 || iflag == 0 || shlvl) - exitshell(); + goto exit; reset(); @@ -175,6 +175,7 @@ main(int argc, char **argv) state4: /* XXX ??? - why isn't this before the "if" statement */ cmdloop(1); } +exit: #if PROFILE monitor(0); #endif From 43ffd41e7c8233af79cfb74ac416de290711459b Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Sun, 28 Apr 2024 01:38:34 +0100 Subject: [PATCH 103/401] alias: Simplify alias storage Rather than storing the alias name and value separately, we can reduce simplify code and reduce code size by storing them in name=value form. This allows us to re-use some code from var.c to handle hashing and comparisons, so long as we update that to account for aliases' special handling of a leading = character. This is okay to do for variables as well, as for variables the leading character is guaranteed to not be =. Signed-off-by: Herbert Xu --- src/alias.c | 40 ++++++++++++++-------------------------- src/input.c | 4 ++-- src/var.c | 27 +++++++++++---------------- src/var.h | 15 +++++++++++++++ 4 files changed, 42 insertions(+), 44 deletions(-) diff --git a/src/alias.c b/src/alias.c index 3cd3413..bacf10f 100644 --- a/src/alias.c +++ b/src/alias.c @@ -42,6 +42,7 @@ #include "alias.h" #include "options.h" /* XXX for argptr (should remove?) */ #include "syntax.h" +#include "var.h" #define ATABSIZE 39 @@ -56,30 +57,31 @@ void setalias(const char *name, const char *val) { struct alias *ap, **app; - const char *p; + const char *p = name; + size_t namelen; - for (p = name; *p; p++) + do { if (BASESYNTAX[(signed char)*p] != CWORD) sh_error("Invalid alias name: %s", name); + } while (*++p != '='); app = __lookupalias(name); ap = *app; INTOFF; if (ap) { - if (!(ap->flag & ALIASINUSE)) { - ckfree(ap->val); - } - ap->val = savestr(val); + if (!(ap->flag & ALIASINUSE)) + ckfree(ap->name); ap->flag &= ~ALIASDEAD; } else { /* not found */ ap = ckmalloc(sizeof (struct alias)); - ap->name = savestr(name); - ap->val = savestr(val); ap->flag = 0; ap->next = 0; *app = ap; } + namelen = val - name; + ap->name = savestr(name); + ap->val = ap->name + namelen; INTON; } @@ -157,8 +159,7 @@ aliascmd(int argc, char **argv) } else printalias(ap); } else { - *v++ = '\0'; - setalias(n, v); + setalias(n, v + 1); } } @@ -197,36 +198,23 @@ freealias(struct alias *ap) { next = ap->next; ckfree(ap->name); - ckfree(ap->val); ckfree(ap); return next; } void printalias(const struct alias *ap) { - out1str(single_quote(ap->name)); - out1fmt("=%s\n", single_quote(ap->val)); + out1fmt("%s\n", single_quote(ap->name)); } STATIC struct alias ** __lookupalias(const char *name) { - unsigned int hashval; struct alias **app; - const char *p; - unsigned int ch; - - p = name; - ch = (unsigned char)*p; - hashval = ch << 4; - while (ch) { - hashval += ch; - ch = (unsigned char)*++p; - } - app = &atab[hashval % ATABSIZE]; + app = &atab[hashval(name) % ATABSIZE]; for (; *app; app = &(*app)->next) { - if (equal(name, (*app)->name)) { + if (varequal(name, (*app)->name)) { break; } } diff --git a/src/input.c b/src/input.c index fb9858f..c0c7410 100644 --- a/src/input.c +++ b/src/input.c @@ -387,7 +387,7 @@ pushstring(char *s, void *ap) sp->ap = (struct alias *)ap; if (ap) { ((struct alias *)ap)->flag |= ALIASINUSE; - sp->string = s; + sp->string = ((struct alias *)ap)->name; } parsefile->nextc = s; parsefile->nleft = len; @@ -406,7 +406,7 @@ static void popstring(void) parsefile->nextc[-1] == '\t') { checkkwd |= CHKALIAS; } - if (sp->string != sp->ap->val) { + if (sp->string != sp->ap->name) { ckfree(sp->string); } } diff --git a/src/var.c b/src/var.c index 895eabc..35ea7c6 100644 --- a/src/var.c +++ b/src/var.c @@ -622,12 +622,7 @@ void unsetvar(const char *s) STATIC struct var ** hashvar(const char *p) { - unsigned int hashval; - - hashval = ((unsigned char) *p) << 4; - while (*p && *p != '=') - hashval += (unsigned char) *p++; - return &vartab[hashval % VTABSIZE]; + return &vartab[hashval(p) % VTABSIZE]; } @@ -641,19 +636,19 @@ hashvar(const char *p) int varcmp(const char *p, const char *q) { - int c, d; - - while ((c = *p) == (d = *q)) { - if (!c || c == '=') - goto out; + int c = *p, d = *q; + while (c == d) { + if (!c) + break; p++; q++; + c = *p; + d = *q; + if (c == '=') + c = '\0'; + if (d == '=') + d = '\0'; } - if (c == '=') - c = 0; - if (d == '=') - d = 0; -out: return c - d; } diff --git a/src/var.h b/src/var.h index 953a694..f6fb320 100644 --- a/src/var.h +++ b/src/var.h @@ -153,6 +153,21 @@ int unsetcmd(int, char **); void unsetvar(const char *); int varcmp(const char *, const char *); +static inline unsigned int hashval(const char *p) +{ + unsigned int hashval; + + hashval = ((unsigned char) *p) << 4; + while (*p) { + hashval += (unsigned char) *p++; + if (*p == '=') + break; + } + + return hashval; +} + + static inline int varequal(const char *a, const char *b) { return !varcmp(a, b); } From 1f1e555aba99808a82cb5090b5ef980714dea09c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 May 2024 17:12:27 +0800 Subject: [PATCH 104/401] expand: Fix naked backslah leakage Naked backslashes in patterns may incorrectly unquote subsequent wild characters that are themselves quoted. Fix this by adding an extra backslash when necessary. Test case: a="\\*bc"; b="\\"; c="*"; echo "<${a##$b"$c"}>" Old result: <> New result: Signed-off-by: Herbert Xu --- src/expand.c | 10 ++++++++-- src/mystring.c | 1 + src/mystring.h | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/expand.c b/src/expand.c index 2ed02d6..0db2b29 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1658,6 +1658,7 @@ _rmescapes(char *str, int flag) char *p, *q, *r; int notescaped; int globbing; + int inquotes; p = strpbrk(str, cqchars); if (!p) { @@ -1692,16 +1693,17 @@ _rmescapes(char *str, int flag) q = mempcpy(q, str, len); } } + inquotes = 0; notescaped = globbing; while (*p) { if (*p == (char)CTLQUOTEMARK) { p++; - notescaped = globbing; + inquotes ^= globbing; continue; } if (*p == '\\') { /* naked back slash */ - notescaped = 0; + notescaped ^= globbing; goto copy; } if (FNMATCH_IS_ENABLED && *p == '^') @@ -1711,6 +1713,10 @@ _rmescapes(char *str, int flag) add_escape: if (notescaped) *q++ = '\\'; + else if (inquotes) { + *q++ = '\\'; + *q++ = '\\'; + } } notescaped = globbing; copy: diff --git a/src/mystring.c b/src/mystring.c index f651521..5eace6c 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -63,6 +63,7 @@ const char snlfmt[] = "%s\n"; const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL | VSBIT, '@', '=', CTLQUOTEMARK, '\0' }; const char cqchars[] = { + '\\', #ifdef HAVE_FNMATCH '^', #endif diff --git a/src/mystring.h b/src/mystring.h index 564b911..d0ec9dd 100644 --- a/src/mystring.h +++ b/src/mystring.h @@ -48,7 +48,7 @@ extern const char spcstr[]; extern const char dolatstr[]; #define DOLATSTRLEN 6 extern const char cqchars[]; -#define qchars (cqchars + FNMATCH_IS_ENABLED) +#define qchars (cqchars + FNMATCH_IS_ENABLED + 1) extern const char illnum[]; extern const char homestr[]; From 32fdc38848381389450aa736a82f7285ed61fc68 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 2 May 2024 18:22:20 +0800 Subject: [PATCH 105/401] input: Fix potential out-of-bounds read in popstring For an empty alias, the check on the last character of the alias in popstring may read a bogus byte. Fix this by checking whether the alias is empty or not before reading the last byte. Signed-off-by: Herbert Xu --- src/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/input.c b/src/input.c index c0c7410..1c598b2 100644 --- a/src/input.c +++ b/src/input.c @@ -401,7 +401,7 @@ static void popstring(void) struct strpush *sp = parsefile->strpush; INTOFF; - if (sp->ap) { + if (sp->ap && parsefile->nextc > sp->string) { if (parsefile->nextc[-1] == ' ' || parsefile->nextc[-1] == '\t') { checkkwd |= CHKALIAS; From 245a47b9a406b21f46d1aeb4a52329bbace6ef34 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 May 2024 13:18:02 +0800 Subject: [PATCH 106/401] mystring: Add a few more uses of snlfmt Use snlfmt in a few more places. Signed-off-by: Herbert Xu --- src/alias.c | 2 +- src/jobs.c | 2 +- src/miscbltin.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/alias.c b/src/alias.c index bacf10f..a443e05 100644 --- a/src/alias.c +++ b/src/alias.c @@ -204,7 +204,7 @@ freealias(struct alias *ap) { void printalias(const struct alias *ap) { - out1fmt("%s\n", single_quote(ap->name)); + out1fmt(snlfmt, single_quote(ap->name)); } STATIC struct alias ** diff --git a/src/jobs.c b/src/jobs.c index 840e37c..4cf6b8c 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -345,7 +345,7 @@ int killcmd(int argc, char **argv) pid = **argv == '-' ? -number(*argv + 1) : number(*argv); if (kill(pid, signo) != 0) { - sh_warnx("%s\n", strerror(errno)); + sh_warnx(snlfmt, strerror(errno)); i = 1; } } while (*++argv); diff --git a/src/miscbltin.c b/src/miscbltin.c index e553f9e..8a0ddf4 100644 --- a/src/miscbltin.c +++ b/src/miscbltin.c @@ -237,7 +237,7 @@ umaskcmd(int argc, char **argv) *ap++ = ','; } ap[-1] = '\0'; - out1fmt("%s\n", buf); + out1fmt(snlfmt, buf); } else { out1fmt("%.4o\n", mask); } From 257f6b36b8505dca9d073a2c5d4903c9c86faae9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 May 2024 13:30:33 +0800 Subject: [PATCH 107/401] alias: Mark printalias as noinline The function printalias is not any critical path and inlining it makes no sense. Signed-off-by: Herbert Xu --- src/alias.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/alias.c b/src/alias.c index a443e05..1b9b979 100644 --- a/src/alias.c +++ b/src/alias.c @@ -202,8 +202,7 @@ freealias(struct alias *ap) { return next; } -void -printalias(const struct alias *ap) { +void __attribute__((noinline)) printalias(const struct alias *ap) { out1fmt(snlfmt, single_quote(ap->name)); } From ea5e24281aaaead307b08bd3f646a138c19ea6bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 May 2024 15:21:22 +0800 Subject: [PATCH 108/401] redir: Fix double close in dupredirect For a redirection like "> /dev/null" dupredirect will close the newly opened file descriptor twice in a row because sh_dup2 also closes the new file descriptor. Remove the extra close in dupredirect. Fixes: 509f5b0dcd71 ("redir: Use memfd_create instead of pipe") Signed-off-by: Herbert Xu --- src/redir.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/redir.c b/src/redir.c index bf5207d..2505d49 100644 --- a/src/redir.c +++ b/src/redir.c @@ -319,11 +319,9 @@ static void dupredirect(union node *redir, int f) sh_dup2(f, fd, -1); return; } - f = fd; + close(fd); } else sh_dup2(f, fd, f); - - close(f); } int sh_pipe(int pip[2], int memfd) From f47009f9a76ef953e1fd298f7cd5cb082233cf1a Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Sun, 12 May 2024 22:02:51 +0100 Subject: [PATCH 109/401] redir: Fix non-Linux build As of 509f5b0d, the build fails on Darwin/macOS, which does not have memfd_create(2). The build fails on redir.c because the fallback for memfd_create, which is defined in system.h, is not included. Presumably it is the same on other systems without memfd_create. Fixes: 509f5b0dcd71 ("redir: Use memfd_create instead of pipe") Signed-off-by: Herbert Xu --- src/redir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redir.c b/src/redir.c index 2505d49..c57d745 100644 --- a/src/redir.c +++ b/src/redir.c @@ -56,6 +56,7 @@ #include "output.h" #include "memalloc.h" #include "error.h" +#include "system.h" #include "trap.h" From 5d5f9d7f638d8353de38e713083710f9c62cd881 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 May 2024 08:44:21 +0800 Subject: [PATCH 110/401] memalloc: Use void * instead of pointer Use void * instead of the obsolete pointer typedef. Signed-off-by: Herbert Xu --- src/memalloc.c | 16 ++++++---------- src/memalloc.h | 10 +++++----- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/memalloc.c b/src/memalloc.c index 60637da..7aa8c58 100644 --- a/src/memalloc.c +++ b/src/memalloc.c @@ -47,10 +47,9 @@ * Like malloc, but returns an error when out of space. */ -pointer -ckmalloc(size_t nbytes) +void *ckmalloc(size_t nbytes) { - pointer p; + void *p; p = malloc(nbytes); if (p == NULL) @@ -63,8 +62,7 @@ ckmalloc(size_t nbytes) * Same for realloc. */ -pointer -ckrealloc(pointer p, size_t nbytes) +void *ckrealloc(void *p, size_t nbytes) { p = realloc(p, nbytes); if (p == NULL) @@ -110,8 +108,7 @@ char *stacknxt = stackbase.space; size_t stacknleft = MINSIZE; char *sstrend = stackbase.space + MINSIZE; -pointer -stalloc(size_t nbytes) +void *stalloc(size_t nbytes) { char *p; size_t aligned; @@ -144,8 +141,7 @@ stalloc(size_t nbytes) } -void -stunalloc(pointer p) +void stunalloc(void *p) { #ifdef DEBUG if (!p || (stacknxt < (char *)p) || ((char *)p < stackp->space)) { @@ -221,7 +217,7 @@ static void growstackblock(size_t min) sp = stackp; prevstackp = sp->prev; grosslen = newlen + sizeof(struct stack_block) - MINSIZE; - sp = ckrealloc((pointer)sp, grosslen); + sp = ckrealloc(sp, grosslen); sp->prev = prevstackp; stackp = sp; stacknxt = sp->space; diff --git a/src/memalloc.h b/src/memalloc.h index b9adf76..a7f7996 100644 --- a/src/memalloc.h +++ b/src/memalloc.h @@ -48,11 +48,11 @@ extern char *stacknxt; extern size_t stacknleft; extern char *sstrend; -pointer ckmalloc(size_t); -pointer ckrealloc(pointer, size_t); +void *ckmalloc(size_t); +void *ckrealloc(void *, size_t); char *savestr(const char *); -pointer stalloc(size_t); -void stunalloc(pointer); +void *stalloc(size_t); +void stunalloc(void *); void pushstackmark(struct stackmark *mark, size_t len); void setstackmark(struct stackmark *); void popstackmark(struct stackmark *); @@ -98,4 +98,4 @@ static inline char *_STPUTC(int c, char *p) { #define ungrabstackstr(s, p) stunalloc((s)) #define stackstrend() ((void *)sstrend) -#define ckfree(p) free((pointer)(p)) +#define ckfree(p) free((void *)(p)) From 9549169af923964ddf3d137432712c12abd3d3f2 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sat, 18 May 2024 10:38:30 +0200 Subject: [PATCH 111/401] trap: Allow trap to un-ignore SIGINT/SIGQUIT in async subshells Unlike in Bash or Zsh, this asynchronous job ignores SIGINT, despite builtin trap explicitly resetting the SIGINT handler. dash -c '( trap - INT; sleep inf ) & read _' POSIX Section 2.11 on [Signals] and Error Handling says about background execution: > If job control is disabled (see the description of set -m) when > the shell executes an asynchronous list, the commands in the list > shall inherit from the shell a signal action of ignored (SIG_IGN) > for the SIGINT and SIGQUIT signals. Builtin [trap] has this requirement: > Signals that were ignored on entry to a non-interactive shell > cannot be trapped or reset, although no error need be reported when > attempting to do so. Apparently this only applies to signals that were inherited as ignored, not to the special case of SIGINT/SIGQUIT begin ignored in asynchronous subshells. Make it so. This means that either of trap - INT; trap - QUIT set -i in a backgrounded subshell will now un-ignore SIGINT and SIGQUIT. [Signals]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html [trap]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#trap {{{ Test cases: shell=src/dash set -e SubshellWith() { parent_pid=$(setsid "$shell" -c "( $1; sleep 99 ) /dev/null 2>&1 & echo \$\$") sleep 1 subshell_pid=$(ps -o pid= -$parent_pid | tail -n 1) } trap 'kill -TERM -$parent_pid 2>/dev//null ||:' EXIT # Tear down after a failure. echo Scenario 0: '"set -i"' makes a subshell un-ignore SIGINT. SubshellWith 'set -i' kill -INT $subshell_pid ! ps -p $subshell_pid | grep sleep || exit 1 kill -TERM -$parent_pid 2>/dev//null ||: # Tear down. echo Scenario 1: resetting SIGINT handler. SubshellWith 'trap - INT' kill -INT -$parent_pid # kill the whole process group since that's the my use case ! ps -p $subshell_pid | grep sleep || exit 1 kill -TERM -$parent_pid 2>/dev//null ||: # Tear down. echo Scenario 2: ignoring SIGINT. SubshellWith 'trap "" INT' kill -INT $subshell_pid ps -p $subshell_pid | grep sleep || exit 1 kill -TERM -$parent_pid 2>/dev//null ||: # Tear down. }}} {{{ Backstory/motivation: The Kakoune[1] editor likes to run noninteractive shell commands that boil down to mkfifo /tmp/fifo ( trap - INT make ) >/tmp/fifo 2>&1 & On Control-C, the editor sends SIGINT to its process group, which should terminate the subshell running make[2]. We experimented with sending SIGTERM instead but found issues, specifically if the editor is invoked (without exec) from a wrapper script, sending SIGTERM to the whole process group would kill the wrapper script, which in turn makes it send SIGTERM to the editor, which then terminates. [1]: https://kakoune.org/ [2]: https://lists.sr.ht/~mawww/kakoune/%3C20240307135831.1967826-3-aclopte@gmail.com%3E }}} Signed-off-by: Herbert Xu --- src/trap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/trap.c b/src/trap.c index f871656..4eb9849 100644 --- a/src/trap.c +++ b/src/trap.c @@ -296,11 +296,11 @@ setsignal(int signo) void ignoresig(int signo) { - if (sigmode[signo - 1] != S_IGN && sigmode[signo - 1] != S_HARD_IGN) { - signal(signo, SIG_IGN); - } + if (sigmode[signo - 1] == S_IGN || sigmode[signo - 1] == S_HARD_IGN) + return; + signal(signo, SIG_IGN); if (!vforked) - sigmode[signo - 1] = S_HARD_IGN; + sigmode[signo - 1] = S_IGN; } From 6e526a3924d952614fb92642ef910a9f7d1cb49a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 May 2024 12:26:55 +0800 Subject: [PATCH 112/401] expand: Always compile test fnmatch and glob Always compile test code paths with and without fnmatch and glob by avoiding the use of ifdefs. Signed-off-by: Herbert Xu --- src/expand.c | 28 +++++++----------------- src/mystring.h | 6 ++++++ src/system.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 20 deletions(-) diff --git a/src/expand.c b/src/expand.c index 0db2b29..399a9e6 100644 --- a/src/expand.c +++ b/src/expand.c @@ -119,20 +119,13 @@ static size_t strtodest(const char *p, int flags); static size_t memtodest(const char *p, size_t len, int flags); STATIC ssize_t varvalue(char *, int, int, int); STATIC void expandmeta(struct strlist *); -#ifdef HAVE_GLOB static void addglob(const glob64_t *); -#else STATIC void expmeta(char *, unsigned, unsigned); STATIC struct strlist *expsort(struct strlist *); STATIC struct strlist *msort(struct strlist *, int); -#endif STATIC void addfname(char *); STATIC int patmatch(char *, const char *); -#ifndef HAVE_FNMATCH STATIC int pmatch(const char *, const char *); -#else -#define pmatch(a, b) !fnmatch((a), (b), 0) -#endif static size_t cvtnum(intmax_t num, int flags); STATIC size_t esclen(const char *, const char *); STATIC void varunset(const char *, const char *, const char *, int) @@ -1156,9 +1149,8 @@ void ifsfree(void) * should be escapes. The results are stored in the list exparg. */ -#ifdef HAVE_GLOB #ifdef __GLIBC__ -void *opendir_interruptible(const char *pathname) +static void *opendir_interruptible(const char *pathname) { if (int_pending()) { suppressint = 0; @@ -1171,11 +1163,8 @@ void *opendir_interruptible(const char *pathname) #define GLOB_ALTDIRFUNC 0 #endif -STATIC void -expandmeta(struct strlist *str) +static void expandmeta_glob(struct strlist *str) { - /* TODO - EXP_REDIR */ - while (str) { const char *p; glob64_t pglob; @@ -1236,8 +1225,6 @@ static void addglob(const glob64_t *pglob) } while (*++p); } - -#else /* HAVE_GLOB */ STATIC char *expdir; STATIC unsigned expdir_max; @@ -1250,6 +1237,9 @@ expandmeta(struct strlist *str) }; /* TODO - EXP_REDIR */ + if (GLOB_IS_ENABLED) + return expandmeta_glob(str); + while (str) { struct strlist **savelastp; struct strlist *sp; @@ -1416,7 +1406,6 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) if (! atend) endname[-esc - 1] = esc ? '\\' : '/'; } -#endif /* HAVE_GLOB */ /* @@ -1435,7 +1424,6 @@ addfname(char *name) } -#ifndef HAVE_GLOB /* * Sort the results of file name expansion. It calculates the number of * strings to sort and then calls msort (short for merge sort) to do the @@ -1494,7 +1482,6 @@ msort(struct strlist *list, int len) } return list; } -#endif /* @@ -1510,7 +1497,6 @@ patmatch(char *pattern, const char *string) } -#ifndef HAVE_FNMATCH STATIC int ccmatch(const char *p, int chr, const char **r) { static const struct class { @@ -1553,6 +1539,9 @@ pmatch(const char *pattern, const char *string) const char *p, *q; char c; + if (FNMATCH_IS_ENABLED) + return !fnmatch(pattern, string, 0); + p = pattern; q = string; for (;;) { @@ -1644,7 +1633,6 @@ dft: default: return 0; return 1; } -#endif diff --git a/src/mystring.h b/src/mystring.h index d0ec9dd..07d0c73 100644 --- a/src/mystring.h +++ b/src/mystring.h @@ -39,8 +39,14 @@ #ifdef HAVE_FNMATCH #define FNMATCH_IS_ENABLED 1 +#ifdef HAVE_GLOB +#define GLOB_IS_ENABLED 1 +#else +#define GLOB_IS_ENABLED 0 +#endif #else #define FNMATCH_IS_ENABLED 0 +#define GLOB_IS_ENABLED 0 #endif extern const char snlfmt[]; diff --git a/src/system.h b/src/system.h index 371c64b..6b31d52 100644 --- a/src/system.h +++ b/src/system.h @@ -118,6 +118,64 @@ long sysconf(int) __attribute__((__noreturn__)); int isblank(int c); #endif +#ifndef HAVE_FNMATCH +static inline int fnmatch(const char *pattern, const char *string, int flags) +{ + return -1; +} +#endif + +#ifndef HAVE_GLOB +#define GLOB_ERR (1 << 0)/* Return on read errors. */ +#define GLOB_MARK (1 << 1)/* Append a slash to each name. */ +#define GLOB_NOSORT (1 << 2)/* Don't sort the names. */ +#define GLOB_DOOFFS (1 << 3)/* Insert PGLOB->gl_offs NULLs. */ +#define GLOB_NOCHECK (1 << 4)/* If nothing matches, return the pattern. */ +#define GLOB_APPEND (1 << 5)/* Append to results of a previous call. */ +#define GLOB_NOESCAPE (1 << 6)/* Backslashes don't quote metacharacters. */ +#define GLOB_PERIOD (1 << 7)/* Leading `.' can be matched by metachars. */ +#define GLOB_MAGCHAR (1 << 8)/* Set in gl_flags if any metachars seen. */ +#define GLOB_ALTDIRFUNC (1 << 9)/* Use gl_opendir et al functions. */ +#define GLOB_BRACE (1 << 10)/* Expand "{a,b}" to "a" "b". */ +#define GLOB_NOMAGIC (1 << 11)/* If no magic chars, return the pattern. */ +#define GLOB_TILDE (1 << 12)/* Expand ~user and ~ to home directories. */ +#define GLOB_ONLYDIR (1 << 13)/* Match only directories. */ +#define GLOB_TILDE_CHECK (1 << 14)/* Like GLOB_TILDE but return an error + if the user name is not available. */ + +#define GLOB_NOSPACE 1 /* Ran out of memory. */ +#define GLOB_ABORTED 2 /* Read error. */ +#define GLOB_NOMATCH 3 /* No matches found. */ +#define GLOB_NOSYS 4 /* Not implemented. */ + +struct dirent64; +struct stat64; + +typedef struct { + size_t gl_pathc; + char **gl_pathv; + size_t gl_offs; + int gl_flags; + + void (*gl_closedir)(void *); + struct dirent64 *(*gl_readdir)(void *); + void *(*gl_opendir)(const char *); + int (*gl_lstat)(const char *, struct stat64 *); + int (*gl_stat)(const char *, struct stat64 *); +} glob64_t; + +static inline int glob64(const char *pattern, int flags, + int (*errfunc)(const char *epath, int eerrno), + glob64_t *restrict pglob) +{ + return -1; +} + +static inline void globfree64(glob64_t *pglob) +{ +} +#endif + /* * A trick to suppress uninitialized variable warning without generating any * code From 20250295979b75279ae10690e49685fb529beffc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 May 2024 12:35:22 +0800 Subject: [PATCH 113/401] expand: Add support for caret Rather than making dash bigger by actively excluding carets when fnmatch is enabled, add caret support when fnmatch is disabled. Signed-off-by: Herbert Xu --- src/expand.c | 5 +---- src/mksyntax.c | 4 ++-- src/mystring.c | 3 --- src/mystring.h | 2 +- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/expand.c b/src/expand.c index 399a9e6..d8b354c 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1580,7 +1580,7 @@ pmatch(const char *pattern, const char *string) startp = p; invert = 0; - if (*p == '!') { + if (*p == '!' || *p == '^') { invert++; p++; } @@ -1694,11 +1694,8 @@ _rmescapes(char *str, int flag) notescaped ^= globbing; goto copy; } - if (FNMATCH_IS_ENABLED && *p == '^') - goto add_escape; if (*p == (char)CTLESC) { p++; -add_escape: if (notescaped) *q++ = '\\'; else if (inquotes) { diff --git a/src/mksyntax.c b/src/mksyntax.c index da18f5d..4d7280b 100644 --- a/src/mksyntax.c +++ b/src/mksyntax.c @@ -178,14 +178,14 @@ main(int argc, char **argv) add("$", "CVAR"); add("}", "CENDVAR"); /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */ - add("!*?[=~:/-]", "CCTL"); + add("^!*?[=~:/-]", "CCTL"); print("dqsyntax"); init(); fputs("\n/* syntax table used when in single quotes */\n", cfile); add("\n", "CNL"); add("'", "CENDQUOTE"); /* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */ - add("!*?[=~:/-]\\", "CCTL"); + add("^!*?[=~:/-]\\", "CCTL"); print("sqsyntax"); init(); fputs("\n/* syntax table used when in arithmetic */\n", cfile); diff --git a/src/mystring.c b/src/mystring.c index 5eace6c..978bbb5 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -64,9 +64,6 @@ const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL | VSBIT, '@', '=', CTLQUOTEMARK, '\0' }; const char cqchars[] = { '\\', -#ifdef HAVE_FNMATCH - '^', -#endif CTLESC, CTLQUOTEMARK, 0 }; const char illnum[] = "Illegal number: %s"; diff --git a/src/mystring.h b/src/mystring.h index 07d0c73..d178990 100644 --- a/src/mystring.h +++ b/src/mystring.h @@ -54,7 +54,7 @@ extern const char spcstr[]; extern const char dolatstr[]; #define DOLATSTRLEN 6 extern const char cqchars[]; -#define qchars (cqchars + FNMATCH_IS_ENABLED + 1) +#define qchars (cqchars + 1) extern const char illnum[]; extern const char homestr[]; From 041ccc75852fd4d041f12543c80d5a0cc5a93bdf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 May 2024 17:16:55 +0800 Subject: [PATCH 114/401] man: Remove reference to PS expansion The variables PS1/PS2/PS4 have been expanded since 2004. Signed-off-by: Herbert Xu --- src/dash.1 | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index 1b14662..2f4a9b3 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -2432,6 +2432,3 @@ in 2002. .Sh BUGS Setuid shell scripts should be avoided at all costs, as they are a significant security risk. -.Pp -PS1, PS2, and PS4 should be subject to parameter expansion before -being displayed. From 8f158f6375cbca0061d1d08f3571fef9fd17678d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 May 2024 17:24:43 +0800 Subject: [PATCH 115/401] man: Document the value unlimited for ulimit Add a reference to the unlimited value for ulimit in the manual. Reported-by: Martin Guy Signed-off-by: Herbert Xu --- src/dash.1 | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/dash.1 b/src/dash.1 index 2f4a9b3..6c4ee2d 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -2251,8 +2251,13 @@ show or set the limit on the real-time scheduling priority of a process .Pp If none of these is specified, it is the limit on file size that is shown or set. -If value is specified, the limit is set to that number; otherwise -the current limit is displayed. +If +.Ar value +is specified, the limit is set to that number; otherwise +the current limit is displayed. The special +.Ar value +.Cm unlimited +represents the lack of any limit. .Pp Limits of an arbitrary process can be displayed or set using the .Xr sysctl 8 From c4a9b31b9b4b0adc6e0ad9352126fc4bea0bc2a7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 May 2024 22:21:42 +0800 Subject: [PATCH 116/401] options: Always reset OPTIND in getoptsreset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always reset OPTIND if it is modified by the user, regardless of its value. Do not call getoptsreset when returning from a function because of "local OPTIND" as this simply trashes the caller's getopts state. Reported-by: наб Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/options.c | 2 +- src/var.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/options.c b/src/options.c index f157321..34e500a 100644 --- a/src/options.c +++ b/src/options.c @@ -393,7 +393,7 @@ setcmd(int argc, char **argv) void getoptsreset(const char *value) { - shellparam.optind = number(value) ?: 1; + shellparam.optind = 1; shellparam.optoff = -1; } diff --git a/src/var.c b/src/var.c index 35ea7c6..bb5420a 100644 --- a/src/var.c +++ b/src/var.c @@ -93,7 +93,7 @@ struct var varinit[] = { { 0, VSTRFIXED|VTEXTFIXED, "PS1=$ ", 0 }, { 0, VSTRFIXED|VTEXTFIXED, "PS2=> ", 0 }, { 0, VSTRFIXED|VTEXTFIXED, "PS4=+ ", 0 }, - { 0, VSTRFIXED|VTEXTFIXED, defoptindvar, getoptsreset }, + { 0, VSTRFIXED|VTEXTFIXED|VNOFUNC, defoptindvar, getoptsreset }, #ifdef WITH_LINENO { 0, VSTRFIXED|VTEXTFIXED, linenovar, 0 }, #endif @@ -531,7 +531,7 @@ poplocalvars(void) vp->flags &= ~(VSTRFIXED|VREADONLY); unsetvar(vp->text); } else { - if (vp->func) + if (vp->func && !(vp->flags & VNOFUNC)) (*vp->func)(varnull(lvp->text)); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); From 176e792f7dc915bb096d14d7abb70f11ded9abb7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 19 May 2024 22:36:19 +0800 Subject: [PATCH 117/401] shell: Disable fnmatch by default As fnmatch(3) is terribly slow with multi-byte strings, disable it by default. Signed-off-by: Herbert Xu --- configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index cb55c3f..9b4db72 100644 --- a/configure.ac +++ b/configure.ac @@ -37,8 +37,8 @@ if test "$enable_static" = "yes"; then export LDFLAGS="-static" fi -AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--disable-fnmatch, \ - [Do not use fnmatch(3) from libc])) +AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--enable-fnmatch, \ + [Use fnmatch(3) from libc])) AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) dnl Checks for libraries. @@ -122,7 +122,7 @@ if test "$enable_test_workaround" = "yes"; then [Define if your faccessat tells root all files are executable]) fi -if test "$enable_fnmatch" != no; then +if test "$enable_fnmatch" = yes; then use_fnmatch= AC_CHECK_FUNCS(fnmatch, use_fnmatch=yes) fi From 0c55f437598a911fe449871641ed6466e9eeaba8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 24 May 2024 22:41:00 +0800 Subject: [PATCH 118/401] parser: Fix here-doc EOF marker bug with negative chars The here-document EOF marker parsing code incorrectly treats all negative bytes as EOF instead of just PEOF. Fix this by testing against PEOF. Fixes: 81daadfce8d5 ("[PARSER] Removed noexpand/length check on eofmark") Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 27611f0..09b1cb8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1127,7 +1127,7 @@ checkend: { len = out - p; if (len) { - len -= c < 0; + len -= c <= PEOF; c = p[-1]; if (len) { From 186c9eb1f23032e119a46f1bc1ac0c1c97f8ff4a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 26 May 2024 18:41:06 +0800 Subject: [PATCH 119/401] mystring: Add dotdir Merge all occurrences of "." into dotdir. Signed-off-by: Herbert Xu --- src/cd.c | 2 +- src/expand.c | 2 +- src/mystring.c | 1 + src/mystring.h | 1 + src/var.c | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/cd.c b/src/cd.c index 1ef1dc5..bcd1484 100644 --- a/src/cd.c +++ b/src/cd.c @@ -126,7 +126,7 @@ cdcmd(int argc, char **argv) } } if (!*dest) - dest = "."; + dest = dotdir; path = bltinlookup("CDPATH"); while (p = path, (len = padvance_magic(&path, dest, 0)) >= 0) { c = *p; diff --git a/src/expand.c b/src/expand.c index d8b354c..e7e8ce0 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1358,7 +1358,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) cp = expdir; expdir_len = enddir - cp; if (!expdir_len) - cp = "."; + cp = dotdir; if ((dirp = opendir(cp)) == NULL) return; if (*endname == 0) { diff --git a/src/mystring.c b/src/mystring.c index 978bbb5..7bf61e3 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -68,6 +68,7 @@ const char cqchars[] = { }; const char illnum[] = "Illegal number: %s"; const char homestr[] = "HOME"; +const char dotdir[] = "."; /* * equal - #defined in mystring.h diff --git a/src/mystring.h b/src/mystring.h index d178990..0857c32 100644 --- a/src/mystring.h +++ b/src/mystring.h @@ -57,6 +57,7 @@ extern const char cqchars[]; #define qchars (cqchars + 1) extern const char illnum[]; extern const char homestr[]; +extern const char dotdir[]; #if 0 void scopyn(const char *, char *, int); diff --git a/src/var.c b/src/var.c index bb5420a..e2102c3 100644 --- a/src/var.c +++ b/src/var.c @@ -143,7 +143,7 @@ INIT { p = lookupvar("PWD"); if (p) - if (*p != '/' || stat64(p, &st1) || stat64(".", &st2) || + if (*p != '/' || stat64(p, &st1) || stat64(dotdir, &st2) || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) p = 0; setpwd(p, 0); From c1d868ac6474d8a1959e632521571e7f9cdcd3c8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 26 May 2024 18:42:08 +0800 Subject: [PATCH 120/401] memalloc: Force functions to be out-of-line Force gcc to build library functions out-of-line, even if they happen to be in the same file. Signed-off-by: Herbert Xu --- src/memalloc.c | 59 ++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/src/memalloc.c b/src/memalloc.c index 7aa8c58..3275e51 100644 --- a/src/memalloc.c +++ b/src/memalloc.c @@ -43,18 +43,28 @@ #include "mystring.h" #include "system.h" +static __attribute__((__always_inline__)) inline void outofspace(void) +{ + sh_error("Out of space"); +} + +static void *checknull(void *p) +{ + if (!p) + outofspace(); + return p; +} + /* * Like malloc, but returns an error when out of space. */ -void *ckmalloc(size_t nbytes) +__attribute__((__noinline__)) void *ckmalloc(size_t nbytes) { void *p; p = malloc(nbytes); - if (p == NULL) - sh_error("Out of space"); - return p; + return checknull(p); } @@ -62,12 +72,10 @@ void *ckmalloc(size_t nbytes) * Same for realloc. */ -void *ckrealloc(void *p, size_t nbytes) +__attribute__((__noinline__)) void *ckrealloc(void *p, size_t nbytes) { p = realloc(p, nbytes); - if (p == NULL) - sh_error("Out of space"); - return p; + return checknull(p); } @@ -78,10 +86,7 @@ void *ckrealloc(void *p, size_t nbytes) char * savestr(const char *s) { - char *p = strdup(s); - if (!p) - sh_error("Out of space"); - return p; + return checknull(strdup(s)); } @@ -124,7 +129,7 @@ void *stalloc(size_t nbytes) blocksize = MINSIZE; len = sizeof(struct stack_block) - MINSIZE + blocksize; if (len < blocksize) - sh_error("Out of space"); + outofspace(); INTOFF; sp = ckmalloc(len); sp->prev = stackp; @@ -155,7 +160,8 @@ void stunalloc(void *p) -void pushstackmark(struct stackmark *mark, size_t len) +__attribute__((__noinline__)) void pushstackmark(struct stackmark *mark, + size_t len) { mark->stackp = stackp; mark->stacknxt = stacknxt; @@ -197,13 +203,14 @@ popstackmark(struct stackmark *mark) * part of the block that has been used. */ -static void growstackblock(size_t min) +static char *growstackblock(size_t min) { size_t newlen; + char *p; newlen = stacknleft * 2; if (newlen < stacknleft) - sh_error("Out of space"); + outofspace(); min = SHELL_ALIGN(min | 128); if (newlen < min) newlen += min; @@ -220,19 +227,22 @@ static void growstackblock(size_t min) sp = ckrealloc(sp, grosslen); sp->prev = prevstackp; stackp = sp; - stacknxt = sp->space; + p = stacknxt = sp->space; stacknleft = newlen; sstrend = sp->space + newlen; INTON; } else { char *oldspace = stacknxt; int oldlen = stacknleft; - char *p = stalloc(newlen); + + p = stalloc(newlen); /* free the space we just allocated */ stacknxt = memcpy(p, oldspace, oldlen); stacknleft += newlen; } + + return p; } /* @@ -258,14 +268,13 @@ growstackstr(void) { size_t len = stackblocksize(); - growstackblock(0); - return stackblock() + len; + return growstackblock(0) + len; } -char *growstackto(size_t len) +__attribute__((__noinline__)) char *growstackto(size_t len) { if (stackblocksize() < len) - growstackblock(len); + return growstackblock(len); return stackblock(); } @@ -273,16 +282,14 @@ char *growstackto(size_t len) * Called from CHECKSTRSPACE. */ -char * -makestrspace(size_t newlen, char *p) +__attribute__((__noinline__)) char *makestrspace(size_t newlen, char *p) { size_t len = p - stacknxt; return growstackto(len + newlen) + len; } -char * -stnputs(const char *s, size_t n, char *p) +__attribute__((__noinline__)) char *stnputs(const char *s, size_t n, char *p) { p = makestrspace(n, p); p = mempcpy(p, s, n); From fb0eae45072d0dd2c6c4c781fca15f8b60592a57 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 26 May 2024 18:43:08 +0800 Subject: [PATCH 121/401] eval: Fix pipe fd leakage in evalbackcmd If makejob or forkshell fails in evalbackcmd, the two pipe file descriptors will be leaked. Fix this by adding an EXITRESET handler that frees them. Signed-off-by: Herbert Xu --- src/eval.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/eval.c b/src/eval.c index d169eb8..1839623 100644 --- a/src/eval.c +++ b/src/eval.c @@ -81,6 +81,7 @@ int savestatus = -1; /* exit status of last command outside traps */ /* Prevent PS4 nesting. */ MKINIT int inps4; +MKINIT int tpip[2] = { -1 }; #if !defined(__alpha__) || (defined(__GNUC__) && __GNUC__ >= 3) STATIC @@ -127,6 +128,11 @@ EXITRESET { evalskip = 0; loopnest = 0; inps4 = 0; + + if (tpip[0] >= 0) { + close(tpip[0]); + close(tpip[1]); + } } #endif @@ -624,8 +630,9 @@ evalpipe(union node *n, int flags) void evalbackcmd(union node *n, struct backcmd *result) { - int pip[2]; struct job *jp; + int pip[2]; + int pid; result->fd = -1; result->buf = NULL; @@ -636,8 +643,12 @@ evalbackcmd(union node *n, struct backcmd *result) } sh_pipe(pip, 0); + tpip[0] = pip[0]; + tpip[1] = pip[1]; jp = makejob(1); - if (forkshell(jp, n, FORK_NOJOB) == 0) { + pid = forkshell(jp, n, FORK_NOJOB); + tpip[0] = -1; + if (pid == 0) { FORCEINTON; close(pip[0]); if (pip[1] != 1) { From f822a30a3b76216c10bfd96595f6526f56f9dd8b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 26 May 2024 18:43:55 +0800 Subject: [PATCH 122/401] jobs: Fix resource leak in makejob If the ps ckmalloc call fails in makejob, the job will still be marked as used and thus leaked. Fix this by moving the ckmalloc call earlier. Signed-off-by: Herbert Xu --- src/jobs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 4cf6b8c..02ec6f4 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -765,8 +765,9 @@ getjob(const char *name, int getctl) struct job *makejob(int nprocs) { - int i; + struct procstat *ps; struct job *jp; + int i; for (i = njobs, jp = jobtab ; ; jp++) { if (--i < 0) { @@ -783,6 +784,9 @@ struct job *makejob(int nprocs) break; } memset(jp, 0, sizeof(*jp)); + ps = &jp->ps0; + if (nprocs > 1) + ps = ckmalloc(nprocs * sizeof(*ps)); #if JOBS if (jobctl) jp->jobctl = 1; @@ -790,10 +794,7 @@ struct job *makejob(int nprocs) jp->prev_job = curjob; curjob = jp; jp->used = 1; - jp->ps = &jp->ps0; - if (nprocs > 1) { - jp->ps = ckmalloc(nprocs * sizeof (struct procstat)); - } + jp->ps = ps; TRACE(("makejob(%d) returns %%%d\n", nprocs, jobno(jp))); return jp; From 509c4fba0d3ff5bb836d75ae05b9cfc29c0bacd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 1 Jun 2024 12:49:53 +0800 Subject: [PATCH 123/401] expand: Fix expmeta resource leakage If memory allocation fails in expmeta, the DIRP will be leaked along with expdir. Fix this by adding setjmp/longjmp, and converting expdir to use stack memory. Signed-off-by: Herbert Xu --- src/expand.c | 167 ++++++++++++++++++++++++++++----------------------- 1 file changed, 92 insertions(+), 75 deletions(-) diff --git a/src/expand.c b/src/expand.c index e7e8ce0..ce245e4 100644 --- a/src/expand.c +++ b/src/expand.c @@ -32,49 +32,50 @@ * SUCH DAMAGE. */ -#include -#include -#include +#include #include -#include -#ifdef HAVE_GETPWNAM -#include -#endif -#include -#include -#include -#include -#include #ifdef HAVE_FNMATCH #include #endif #ifdef HAVE_GLOB #include #endif -#include +#include +#include +#ifdef HAVE_GETPWNAM +#include +#endif +#include #include +#include +#include +#include +#include +#include +#include +#include /* * Routines to expand arguments to commands. We have to deal with * backquotes, shell variables, and file metacharacters. */ -#include "shell.h" -#include "main.h" -#include "nodes.h" +#include "error.h" #include "eval.h" #include "expand.h" -#include "syntax.h" -#include "parser.h" #include "jobs.h" -#include "options.h" -#include "var.h" -#include "output.h" +#include "main.h" #include "memalloc.h" -#include "error.h" #include "mystring.h" +#include "nodes.h" +#include "options.h" +#include "output.h" +#include "parser.h" +#include "shell.h" #include "show.h" +#include "syntax.h" #include "system.h" +#include "var.h" /* * _rmescape() flags @@ -120,7 +121,7 @@ static size_t memtodest(const char *p, size_t len, int flags); STATIC ssize_t varvalue(char *, int, int, int); STATIC void expandmeta(struct strlist *); static void addglob(const glob64_t *); -STATIC void expmeta(char *, unsigned, unsigned); +static char *expmeta(char *, unsigned, size_t); STATIC struct strlist *expsort(struct strlist *); STATIC struct strlist *msort(struct strlist *, int); STATIC void addfname(char *); @@ -1225,10 +1226,6 @@ static void addglob(const glob64_t *pglob) } while (*++p); } -STATIC char *expdir; -STATIC unsigned expdir_max; - - STATIC void expandmeta(struct strlist *str) { @@ -1255,11 +1252,8 @@ expandmeta(struct strlist *str) INTOFF; p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); len = strlen(p); - expdir_max = len + PATH_MAX; - expdir = ckmalloc(expdir_max); expmeta(p, len, 0); - ckfree(expdir); if (p != str->text) ckfree(p); INTON; @@ -1282,27 +1276,59 @@ expandmeta(struct strlist *str) } } +static void addfname_common(char *name) +{ + struct strlist *sp; + + sp = (struct strlist *)stalloc(sizeof *sp); + sp->text = name; + *exparg.lastp = sp; + exparg.lastp = &sp->next; +} + +static char *addfnamealt(char *enddir, size_t expdir_len) +{ + char *name; + + name = grabstackstr(enddir); + addfname_common(name); + + STARTSTACKSTR(enddir); + return stnputs(name, expdir_len, enddir) - expdir_len; +} /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ -STATIC void -expmeta(char *name, unsigned name_len, unsigned expdir_len) +static char *expmeta(char *name, unsigned name_len, size_t expdir_len) { - char *enddir = expdir + expdir_len; - char *p; - const char *cp; - char *start; + struct jmploc *volatile savehandler; + struct jmploc jmploc; + struct stat64 statb; + struct dirent64 *dp; + volatile int err; char *endname; + char *enddir; int metaflag; - struct stat64 statb; + int matchdot; + char *start; + size_t len; DIR *dirp; - struct dirent64 *dp; int atend; - int matchdot; + char *cp; + char *p; int esc; + *(DIR *volatile *)&dirp = NULL; + savehandler = handler; + if (unlikely(err = setjmp(jmploc.loc))) + goto out; + + len = expdir_len + name_len + 1; + cp = growstackto(len); + enddir = cp + expdir_len; + metaflag = 0; start = name; for (p = name; esc = 0, *p; p += esc + 1) { @@ -1334,16 +1360,16 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) } if (metaflag == 0) { /* we've reached the end of the file name */ if (!expdir_len) - return; + goto out_opendir; p = name; do { if (*p == '\\' && p[1]) p++; *enddir++ = *p; } while (*p++); - if (lstat64(expdir, &statb) >= 0) - addfname(expdir); - return; + if (lstat64(cp, &statb) >= 0) + cp = addfnamealt(enddir, expdir_len); + goto out_opendir; } endname = p; if (name < start) { @@ -1355,12 +1381,11 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) } while (p < start); } *enddir = 0; - cp = expdir; expdir_len = enddir - cp; - if (!expdir_len) - cp = dotdir; - if ((dirp = opendir(cp)) == NULL) - return; + + *(DIR *volatile *)&dirp = opendir(expdir_len ? cp : dotdir); + if (!dirp) + goto out_opendir; if (*endname == 0) { atend = 1; } else { @@ -1379,32 +1404,29 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) if (dp->d_name[0] == '.' && ! matchdot) continue; if (pmatch(start, dp->d_name)) { - if (atend) { - scopy(dp->d_name, enddir); - addfname(expdir); - } else { - unsigned offset; - unsigned len; - - p = stpcpy(enddir, dp->d_name); - *p = '/'; - - offset = p - expdir + 1; - len = offset + name_len + NAME_MAX; - if (len > expdir_max) { - len += PATH_MAX; - expdir = ckrealloc(expdir, len); - expdir_max = len; - } - - expmeta(endname, name_len, offset); - enddir = expdir + expdir_len; + len = strlen(dp->d_name) + 1; + + enddir = cp + expdir_len; + enddir = stnputs(dp->d_name, len, enddir); + if (atend) + cp = addfnamealt(enddir, expdir_len); + else { + enddir[-1] = '/'; + len += expdir_len; + cp = expmeta(endname, name_len, len); } } } - closedir(dirp); if (! atend) endname[-esc - 1] = esc ? '\\' : '/'; + +out: + closedir(*(DIR *volatile *)&dirp); +out_opendir: + handler = savehandler; + if (err) + longjmp(handler->loc, 1); + return cp; } @@ -1415,12 +1437,7 @@ expmeta(char *name, unsigned name_len, unsigned expdir_len) STATIC void addfname(char *name) { - struct strlist *sp; - - sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = sstrdup(name); - *exparg.lastp = sp; - exparg.lastp = &sp->next; + addfname_common(sstrdup(name)); } From cb669294463ef454b59516016d91868a22d0eaef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:45 +0800 Subject: [PATCH 124/401] shell: Call setlocale Call setlocale to initialise locale settings for libc. Signed-off-by: Herbert Xu --- src/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main.c b/src/main.c index 7beb280..1e192f8 100644 --- a/src/main.c +++ b/src/main.c @@ -32,6 +32,7 @@ * SUCH DAMAGE. */ +#include #include #include #include @@ -101,6 +102,9 @@ main(int argc, char **argv) #if PROFILE monitor(4, etext, profile_buf, sizeof profile_buf, 50); #endif + + setlocale(LC_ALL, ""); + state = 0; if (unlikely(setjmp(main_handler.loc))) { int e; From 597850ae1fb9d70baf8dae7da0b5f5872654afba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:48 +0800 Subject: [PATCH 125/401] shell: Use strcoll instead of strcmp where applicable Use strcoll instead of strcmp so that the locale is taken into account when sorting strings during pathname expansion, and for the built-in test(1) string comparison operators. Signed-off-by: Herbert Xu --- src/bltin/test.c | 8 ++++---- src/expand.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/bltin/test.c b/src/bltin/test.c index fd8a43b..2db4d0f 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -353,13 +353,13 @@ binop(void) /* NOTREACHED */ #endif case STREQ: - return strcmp(opnd1, opnd2) == 0; + return strcoll(opnd1, opnd2) == 0; case STRNE: - return strcmp(opnd1, opnd2) != 0; + return strcoll(opnd1, opnd2) != 0; case STRLT: - return strcmp(opnd1, opnd2) < 0; + return strcoll(opnd1, opnd2) < 0; case STRGT: - return strcmp(opnd1, opnd2) > 0; + return strcoll(opnd1, opnd2) > 0; case INTEQ: return getn(opnd1) == getn(opnd2); case INTNE: diff --git a/src/expand.c b/src/expand.c index ce245e4..db46cf4 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1481,7 +1481,7 @@ msort(struct strlist *list, int len) p = msort(p, len - half); /* sort second half */ lpp = &list; for (;;) { - if (strcmp(p->text, q->text) < 0) { + if (strcoll(p->text, q->text) < 0) { *lpp = p; lpp = &p->next; if ((p = *lpp) == NULL) { From c0c860df08c96aaa68a0744cc7d60778845f43f1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:50 +0800 Subject: [PATCH 126/401] expand: Count multi-byte characters for VSLENGTH Count multi-byte characters in variables and rather than bytes and return that as the length expansion. Signed-off-by: Herbert Xu --- src/expand.c | 107 ++++++++++++++++++++++++++++++++++++++++--------- src/memalloc.h | 10 ++--- 2 files changed, 94 insertions(+), 23 deletions(-) diff --git a/src/expand.c b/src/expand.c index db46cf4..0a868d5 100644 --- a/src/expand.c +++ b/src/expand.c @@ -54,6 +54,7 @@ #include #include #include +#include /* * Routines to expand arguments to commands. We have to deal with @@ -790,6 +791,41 @@ evalvar(char *p, int flag) return p; } +static char *chtodest(int c, const char *syntax, char *out) +{ + if (syntax[c] == CCTL) + USTPUTC(CTLESC, out); + USTPUTC(c, out); + + return out; +} + +struct mbpair { + unsigned ml; + unsigned ql; +}; + +static struct mbpair mbtodest(const char *p, char *q, const char *syntax, + size_t len) +{ + mbstate_t mbs = {}; + struct mbpair mbp; + char *q0 = q; + size_t ml; + + ml = mbrlen(--p, len, &mbs); + if (ml == -2 || ml == -1 || ml < 2) + ml = 1; + + len = ml; + do { + q = chtodest((signed char)*p++, syntax, q); + } while (--len); + + mbp.ml = ml - 1; + mbp.ql = q - q0; + return mbp; +} /* * Put a string on the stack. @@ -797,38 +833,72 @@ evalvar(char *p, int flag) static size_t memtodest(const char *p, size_t len, int flags) { - const char *syntax = flags & EXP_QUOTED ? DQSYNTAX : BASESYNTAX; + const char *syntax; + size_t count = 0; + int expq; char *q; - char *s; if (unlikely(!len)) return 0; q = makestrspace(len * 2, expdest); - s = q; - do { +#if QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100 +#error QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100 +#endif + expq = flags & EXP_QUOTED; + if (likely(!(flags & (expq >> 4 | expq >> 8) & QUOTES_ESC))) { + while (len >= 8) { + uint64_t x = *(uint64_t *)(p + count); + + if ((x | (x - 0x0101010101010101)) & + 0x8080808080808080) + break; + + *(uint64_t *)(q + count) = x; + + count += 8; + len -= 8; + } + + q += count; + p += count; + + syntax = flags & QUOTES_ESC ? BASESYNTAX : is_type; + } else + syntax = SQSYNTAX; + + for (; len; len--) { int c = (signed char)*p++; - if (c) { - if ((flags & QUOTES_ESC) && - ((syntax[c] == CCTL) || - (flags & EXP_QUOTED && syntax[c] == CBACK))) - USTPUTC(CTLESC, q); - } else if (!(flags & EXP_KEEPNUL)) + + if (unlikely(!c && !(flags & EXP_KEEPNUL))) continue; - USTPUTC(c, q); - } while (--len); + + count++; + + if (unlikely(c < 0)) { + struct mbpair mbp = mbtodest(p, q, syntax, len); + unsigned mlm; + + q += mbp.ql; + mlm = mbp.ml; + p += mlm; + len -= mlm; + continue; + } + + q = chtodest(c, syntax, q); + } expdest = q; - return q - s; + return count; } static size_t strtodest(const char *p, int flags) { size_t len = strlen(p); - memtodest(p, len, flags); - return len; + return memtodest(p, len, flags); } @@ -850,6 +920,7 @@ varvalue(char *name, int varflags, int flags, int quoted) int discard = (subtype == VSPLUS || subtype == VSLENGTH) | (flags & EXP_DISCARD); ssize_t len = 0; + size_t start; char c; if (!subtype) { @@ -859,9 +930,9 @@ varvalue(char *name, int varflags, int flags, int quoted) sh_error("Bad substitution"); } - flags |= EXP_KEEPNUL; flags &= discard ? ~QUOTES_ESC : ~0; sep = (flags & EXP_FULL) << CHAR_BIT; + start = expdest - (char *)stackblock(); switch (*name) { case '$': @@ -921,7 +992,7 @@ varvalue(char *name, int varflags, int flags, int quoted) if (*ap && sep) { len++; - memtodest(&sepc, 1, flags); + memtodest(&sepc, 1, flags | EXP_KEEPNUL); } } break; @@ -951,7 +1022,7 @@ varvalue(char *name, int varflags, int flags, int quoted) } if (discard) - STADJUST(-len, expdest); + expdest = (char *)stackblock() + start; return len; } diff --git a/src/memalloc.h b/src/memalloc.h index a7f7996..1895c1e 100644 --- a/src/memalloc.h +++ b/src/memalloc.h @@ -81,11 +81,11 @@ static inline char *_STPUTC(int c, char *p) { #define STPUTC(c, p) ((p) = _STPUTC((c), (p))) #define CHECKSTRSPACE(n, p) \ ({ \ - char *q = (p); \ - size_t l = (n); \ - size_t m = sstrend - q; \ - if (l > m) \ - (p) = makestrspace(l, q); \ + char *_q = (p); \ + size_t _l = (n); \ + size_t _m = sstrend - _q; \ + if (_l > _m) \ + (p) = makestrspace(_l, _q); \ 0; \ }) #define USTPUTC(c, p) (*p++ = (c)) From 990bbd15346d3178895ed2d3cbd4cca72610cb33 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:52 +0800 Subject: [PATCH 127/401] expand: Process multi-byte characters in subevalvar When trimming variables in subevalvar, process multi-byte characters as one unit instead of their constituent bytes. Signed-off-by: Herbert Xu --- src/expand.c | 170 +++++++++++++++++++++++++++++++++++-------------- src/expand.h | 1 + src/mystring.c | 2 +- src/parser.h | 1 + 4 files changed, 125 insertions(+), 49 deletions(-) diff --git a/src/expand.c b/src/expand.c index 0a868d5..5d73f8e 100644 --- a/src/expand.c +++ b/src/expand.c @@ -544,8 +544,10 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, loc = startp; loc2 = rmesc; do { - int match; const char *s = loc2; + unsigned ml; + int match; + c = *loc2; if (zero) { *loc2 = '\0'; @@ -554,12 +556,26 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *loc2 = c; if (match) - return loc; - if (quotes && *loc == (char)CTLESC) + return quotes ? loc : loc2; + + if (!c) + break; + + if (*loc != (char)CTLMBCHAR) { + if (*loc == (char)CTLESC) + loc++; + loc++; + loc2++; + continue; + } + + if (*++loc == (char)CTLESC) loc++; - loc++; - loc2++; - } while (c); + + ml = (unsigned char)*loc; + loc += ml + 3; + loc2 += ml; + } while (1); return 0; } @@ -567,14 +583,16 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, char *str, int quotes, int zero ) { - int esc = 0; + size_t esc = 0; char *loc; char *loc2; for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) { - int match; - char c = *loc2; const char *s = loc2; + char c = *loc2; + unsigned ml; + int match; + if (zero) { *loc2 = '\0'; s = rmesc; @@ -582,17 +600,23 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *loc2 = c; if (match) - return loc; + return quotes ? loc : loc2; loc--; - if (quotes) { - if (--esc < 0) { - esc = esclen(startp, loc); - } - if (esc % 2) { - esc--; - loc--; - } + if (!esc--) + esc = esclen(startp, loc); + if (esc % 2) { + esc--; + loc--; + continue; } + if (*loc != (char)CTLMBCHAR) + continue; + + ml = (unsigned char)*--loc; + loc -= ml + 2; + if (*loc == (char)CTLESC) + loc--; + loc2 -= ml - 1; } return 0; } @@ -646,14 +670,11 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, nstrloc = str - (char *)stackblock(); } - rmesc = startp; - if (quotes) { - rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) - rmescend = expdest; - startp = stackblock() + startloc; - str = stackblock() + nstrloc; - } + rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); + if (rmesc != startp) + rmescend = expdest; + startp = stackblock() + startloc; + str = stackblock() + nstrloc; rmescend--; /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ @@ -663,16 +684,29 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, endp = stackblock() + strloc - 1; loc = scan(startp, endp, rmesc, rmescend, str, quotes, zero); - if (loc) { - if (zero) { - memmove(startp, loc, endp - loc); - loc = startp + (endp - loc); + if (!loc) { + if (quotes) { + rmesc = startp; + rmescend = endp; } - *loc = '\0'; - } else - loc = endp; + } else if (!quotes) { + if (zero) + rmesc = loc; + else + rmescend = loc; + } else if (zero) { + rmesc = loc; + rmescend = endp; + } else { + rmesc = startp; + rmescend = loc; + } + + memmove(startp, rmesc, rmescend - rmesc); + loc = startp + (rmescend - rmesc); out: + *loc = '\0'; amount = loc - expdest; STADJUST(amount, expdest); @@ -698,6 +732,7 @@ evalvar(char *p, int flag) ssize_t varlen; int discard; int quoted; + int mbchar; varflags = *p++ & ~VSBIT; subtype = varflags & VSTYPE; @@ -707,8 +742,18 @@ evalvar(char *p, int flag) startloc = expdest - (char *)stackblock(); p = strchr(p, '=') + 1; + mbchar = 0; + switch (subtype) { + case VSTRIMLEFT: + case VSTRIMLEFTMAX: + case VSTRIMRIGHT: + case VSTRIMRIGHTMAX: + mbchar = EXP_MBCHAR; + break; + } + again: - varlen = varvalue(var, varflags, flag, quoted); + varlen = varvalue(var, varflags, flag | mbchar, quoted); if (varflags & VSNUL) varlen--; @@ -814,14 +859,31 @@ static struct mbpair mbtodest(const char *p, char *q, const char *syntax, size_t ml; ml = mbrlen(--p, len, &mbs); - if (ml == -2 || ml == -1 || ml < 2) + if (ml == -2 || ml == -1 || ml < 2) { + q = chtodest((signed char)*p, syntax, q); ml = 1; + goto out; + } len = ml; do { q = chtodest((signed char)*p++, syntax, q); } while (--len); + goto out; + + if (syntax[CTLMBCHAR] == CCTL) { + USTPUTC(CTLMBCHAR, q); + USTPUTC(ml, q); + } + + q = mempcpy(q, p, ml); + if (syntax[CTLMBCHAR] == CCTL) { + USTPUTC(ml, q); + USTPUTC(CTLMBCHAR, q); + } + +out: mbp.ml = ml - 1; mbp.ql = q - q0; return mbp; @@ -841,13 +903,15 @@ static size_t memtodest(const char *p, size_t len, int flags) if (unlikely(!len)) return 0; - q = makestrspace(len * 2, expdest); + /* CTLMBCHAR, 2, c, c, 2, CTLMBCHAR */ + q = makestrspace(len * 3, expdest); -#if QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100 -#error QUOTES_ESC != 0x11 || EXP_QUOTED != 0x100 +#if QUOTES_ESC != 0x11 || EXP_MBCHAR != 0x20 || EXP_QUOTED != 0x100 +#error QUOTES_ESC != 0x11 || EXP_MBCHAR != 0x20 || EXP_QUOTED != 0x100 #endif expq = flags & EXP_QUOTED; - if (likely(!(flags & (expq >> 4 | expq >> 8) & QUOTES_ESC))) { + if (likely(!(flags & (expq >> 3 | expq >> 4 | expq >> 8) & + (QUOTES_ESC | EXP_MBCHAR)))) { while (len >= 8) { uint64_t x = *(uint64_t *)(p + count); @@ -864,7 +928,8 @@ static size_t memtodest(const char *p, size_t len, int flags) q += count; p += count; - syntax = flags & QUOTES_ESC ? BASESYNTAX : is_type; + syntax = flags & (QUOTES_ESC | EXP_MBCHAR) ? + BASESYNTAX : is_type; } else syntax = SQSYNTAX; @@ -1772,17 +1837,25 @@ _rmescapes(char *str, int flag) inquotes = 0; notescaped = globbing; while (*p) { + unsigned ml; + int newnesc = globbing; + if (*p == (char)CTLQUOTEMARK) { p++; inquotes ^= globbing; continue; - } - if (*p == '\\') { + } else if (*p == '\\') { /* naked back slash */ - notescaped ^= globbing; - goto copy; - } - if (*p == (char)CTLESC) { + newnesc ^= notescaped; + } else if (*p == (char)CTLMBCHAR) { + if (*++p == (char)CTLESC) + p++; + + ml = (unsigned char)*p++; + q = mempcpy(q, p, ml); + p += ml + 2; + goto setnesc; + } else if (*p == (char)CTLESC) { p++; if (notescaped) *q++ = '\\'; @@ -1791,9 +1864,10 @@ _rmescapes(char *str, int flag) *q++ = '\\'; } } - notescaped = globbing; -copy: + *q++ = *p++; +setnesc: + notescaped = newnesc; } *q = '\0'; if (flag & RMESCAPE_GROW) { diff --git a/src/expand.h b/src/expand.h index 49a18f9..a78564f 100644 --- a/src/expand.h +++ b/src/expand.h @@ -55,6 +55,7 @@ struct arglist { #define EXP_VARTILDE 0x4 /* expand tildes in an assignment */ #define EXP_REDIR 0x8 /* file glob for a redirection (1 match only) */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ +#define EXP_MBCHAR 0x20 /* mark multi-byte characters */ #define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ #define EXP_WORD 0x80 /* expand word in parameter expansion */ #define EXP_QUOTED 0x100 /* expand word in double quotes */ diff --git a/src/mystring.c b/src/mystring.c index 7bf61e3..ca0cd39 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -64,7 +64,7 @@ const char dolatstr[] = { CTLQUOTEMARK, CTLVAR, VSNORMAL | VSBIT, '@', '=', CTLQUOTEMARK, '\0' }; const char cqchars[] = { '\\', - CTLESC, CTLQUOTEMARK, 0 + CTLESC, CTLMBCHAR, CTLQUOTEMARK, 0 }; const char illnum[] = "Illegal number: %s"; const char homestr[] = "HOME"; diff --git a/src/parser.h b/src/parser.h index 433573d..14bfc4f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -44,6 +44,7 @@ union node; #define CTLVAR -126 /* variable defn */ #define CTLENDVAR -125 #define CTLBACKQ -124 +#define CTLMBCHAR -123 #define CTLARI -122 /* arithmetic expression */ #define CTLENDARI -121 #define CTLQUOTEMARK -120 From a9012f4078be32012f240c6208c3df53db3b6086 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:54 +0800 Subject: [PATCH 128/401] expand: Process multi-byte characters in expmeta When glob(3) is not in use, make sure that expmeta processes multi-byte characters correctly. Signed-off-by: Herbert Xu --- src/expand.c | 105 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 33 deletions(-) diff --git a/src/expand.c b/src/expand.c index 5d73f8e..03eafc2 100644 --- a/src/expand.c +++ b/src/expand.c @@ -85,6 +85,7 @@ #define RMESCAPE_GLOB 0x2 /* Add backslashes for glob */ #define RMESCAPE_GROW 0x8 /* Grow strings instead of stalloc */ #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ +#define RMESCAPE_EMETA 0x20 /* Remove backslashes too */ /* Add CTLESC when necessary. */ #define QUOTES_ESC (EXP_FULL | EXP_CASE) @@ -1386,12 +1387,10 @@ expandmeta(struct strlist *str) savelastp = exparg.lastp; INTOFF; - p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); + p = str->text; len = strlen(p); expmeta(p, len, 0); - if (p != str->text) - ckfree(p); INTON; if (exparg.lastp == savelastp) { /* @@ -1433,6 +1432,41 @@ static char *addfnamealt(char *enddir, size_t expdir_len) return stnputs(name, expdir_len, enddir) - expdir_len; } +static void expmeta_rmescapes(char *enddir, char *name) +{ + preglob(strcpy(enddir, name), RMESCAPE_EMETA); +} + +static unsigned mbcharlen(char *p) +{ + int esc = 0; + + if (*++p == (char)CTLESC) + esc++; + + return esc + 3 + (unsigned char)p[esc]; +} + +static size_t skipesc(char *p) +{ + size_t esc = 0; + + if (p[esc] == (char)CTLMBCHAR) + esc += mbcharlen(p); + else if (p[esc] == (char)CTLESC) + esc++; + else if (p[esc] == '\\' && p[esc + 1]) { + while (p[++esc] == (char)CTLQUOTEMARK) + ; + if (p[esc] == (char)CTLMBCHAR) + esc += mbcharlen(p + esc); + else if (p[esc] == (char)CTLESC) + esc++; + } + + return esc; +} + /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ @@ -1451,12 +1485,14 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) char *start; size_t len; DIR *dirp; - int atend; + char *pat; char *cp; char *p; int esc; + int c; *(DIR *volatile *)&dirp = NULL; + *(char *volatile *)&pat = NULL; savehandler = handler; if (unlikely(err = setjmp(jmploc.loc))) goto out; @@ -1472,11 +1508,8 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) metaflag = 1; else if (*p == '[') { char *q = p + 1; - if (*q == '!') - q++; for (;;) { - if (*q == '\\') - q++; + q += skipesc(q); if (*q == '/' || *q == '\0') break; if (*++q == ']') { @@ -1485,8 +1518,7 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) } } } else { - if (*p == '\\' && p[1]) - esc++; + esc = skipesc(p); if (p[esc] == '/') { if (metaflag) break; @@ -1497,24 +1529,18 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) if (metaflag == 0) { /* we've reached the end of the file name */ if (!expdir_len) goto out_opendir; - p = name; - do { - if (*p == '\\' && p[1]) - p++; - *enddir++ = *p; - } while (*p++); + expmeta_rmescapes(enddir, name); if (lstat64(cp, &statb) >= 0) - cp = addfnamealt(enddir, expdir_len); + cp = addfnamealt(strchrnul(enddir, 0), expdir_len); goto out_opendir; } endname = p; if (name < start) { - p = name; - do { - if (*p == '\\' && p[1]) - p++; - *enddir++ = *p++; - } while (p < start); + c = *start; + *start = 0; + expmeta_rmescapes(enddir, name); + *start = c; + enddir += strlen(enddir); } *enddir = 0; expdir_len = enddir - cp; @@ -1522,16 +1548,16 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) *(DIR *volatile *)&dirp = opendir(expdir_len ? cp : dotdir); if (!dirp) goto out_opendir; - if (*endname == 0) { - atend = 1; - } else { - atend = 0; + c = *endname; + if (c) { *endname = '\0'; endname += esc + 1; } name_len -= endname - name; matchdot = 0; - p = start; + *(char *volatile *)&pat = + preglob(start, RMESCAPE_ALLOC | RMESCAPE_HEAP); + p = pat; if (*p == '\\') p++; if (*p == '.') @@ -1539,12 +1565,12 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) while (! int_pending() && (dp = readdir64(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; - if (pmatch(start, dp->d_name)) { + if (pmatch(pat, dp->d_name)) { len = strlen(dp->d_name) + 1; enddir = cp + expdir_len; enddir = stnputs(dp->d_name, len, enddir); - if (atend) + if (!c) cp = addfnamealt(enddir, expdir_len); else { enddir[-1] = '/'; @@ -1553,10 +1579,13 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) } } } - if (! atend) - endname[-esc - 1] = esc ? '\\' : '/'; + if (c) + endname[-esc - 1] = c; out: + pat = *(char *volatile *)&pat; + if (pat != start) + ckfree(pat); closedir(*(DIR *volatile *)&dirp); out_opendir: handler = savehandler; @@ -1800,6 +1829,7 @@ _rmescapes(char *str, int flag) int notescaped; int globbing; int inquotes; + int expmeta; p = strpbrk(str, cqchars); if (!p) { @@ -1808,6 +1838,7 @@ _rmescapes(char *str, int flag) q = p; r = str; globbing = flag & RMESCAPE_GLOB; + expmeta = (flag & RMESCAPE_EMETA) ? RMESCAPE_GLOB : 0; if (flag & RMESCAPE_ALLOC) { size_t len = p - str; @@ -1847,6 +1878,12 @@ _rmescapes(char *str, int flag) } else if (*p == '\\') { /* naked back slash */ newnesc ^= notescaped; + /* naked backslashes can only occur outside quotes */ + inquotes = 0; + if (expmeta & ~newnesc) { + p++; + goto setnesc; + } } else if (*p == (char)CTLMBCHAR) { if (*++p == (char)CTLESC) p++; @@ -1857,7 +1894,9 @@ _rmescapes(char *str, int flag) goto setnesc; } else if (*p == (char)CTLESC) { p++; - if (notescaped) + if (expmeta) + ; + else if (notescaped) *q++ = '\\'; else if (inquotes) { *q++ = '\\'; From c0674f487c7aec2a3bdf6795cea7e60c9530c360 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:57 +0800 Subject: [PATCH 129/401] expand: Support multi-byte characters during field splitting When multi-byte characters are used in IFS, they will be used for field splitting. Signed-off-by: Herbert Xu --- src/expand.c | 461 ++++++++++++++++++++++++++++++++++----------------- src/expand.h | 1 + src/var.c | 12 +- 3 files changed, 312 insertions(+), 162 deletions(-) diff --git a/src/expand.c b/src/expand.c index 03eafc2..2223045 100644 --- a/src/expand.c +++ b/src/expand.c @@ -55,6 +55,7 @@ #include #include #include +#include /* * Routines to expand arguments to commands. We have to deal with @@ -102,6 +103,14 @@ struct ifsregion { int nulonly; /* search for nul bytes only */ }; +struct ifs_state { + const char *ifs; + char *start; + char *r; + int maxargs; + int ifsspc; +}; + /* output of current string */ static char *expdest; /* list of back quote expressions */ @@ -113,6 +122,11 @@ static struct ifsregion *ifslastp; /* holds expanded arg list */ static struct arglist exparg; +static char ifsmap[128]; +static const char *ncifs; +static size_t ifsmb0len; +static wchar_t *wcifs; + static char *argstr(char *p, int flag); static char *exptilde(char *startp, int flag); static char *expari(char *start, int flag); @@ -120,7 +134,7 @@ STATIC void expbackq(union node *, int); STATIC char *evalvar(char *, int); static size_t strtodest(const char *p, int flags); static size_t memtodest(const char *p, size_t len, int flags); -STATIC ssize_t varvalue(char *, int, int, int); +STATIC ssize_t varvalue(char *, int, unsigned); STATIC void expandmeta(struct strlist *); static void addglob(const glob64_t *); static char *expmeta(char *, unsigned, size_t); @@ -158,6 +172,30 @@ esclen(const char *start, const char *p) { return esc; } +static __attribute__((noinline)) unsigned mbnext(const char *p) +{ + unsigned start = 0; + unsigned end = 0; + unsigned ml; + int c; + + c = (signed char)p[end++]; + + switch (__builtin_expect(c, 0)) { + case CTLMBCHAR: + if ((signed char)p[end] == CTLESC) + end++; + ml = (unsigned char)p[end++]; + start = end; + end = ml + 2; + break; + case CTLESC: + start++; + break; + } + + return start | end << 8; +} static inline const char *getpwhome(const char *name) { @@ -546,6 +584,7 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, loc2 = rmesc; do { const char *s = loc2; + unsigned mb; unsigned ml; int match; @@ -562,19 +601,9 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, if (!c) break; - if (*loc != (char)CTLMBCHAR) { - if (*loc == (char)CTLESC) - loc++; - loc++; - loc2++; - continue; - } - - if (*++loc == (char)CTLESC) - loc++; - - ml = (unsigned char)*loc; - loc += ml + 3; + mb = mbnext(loc); + loc += (mb & 0xff) + (mb >> 8); + ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; loc2 += ml; } while (1); return 0; @@ -754,7 +783,7 @@ evalvar(char *p, int flag) } again: - varlen = varvalue(var, varflags, flag | mbchar, quoted); + varlen = varvalue(var, varflags, flag | mbchar); if (varflags & VSNUL) varlen--; @@ -973,23 +1002,23 @@ static size_t strtodest(const char *p, int flags) * Add the value of a specialized variable to the stack string. */ -STATIC ssize_t -varvalue(char *name, int varflags, int flags, int quoted) +static ssize_t varvalue(char *name, int varflags, unsigned flags) { - int num; - char *p; - int i; - int sep; - char sepc; - char **ap; int subtype = varflags & VSTYPE; - int discard = (subtype == VSPLUS || subtype == VSLENGTH) | - (flags & EXP_DISCARD); + unsigned long seplen; + const char *seps; ssize_t len = 0; size_t start; - char c; + int discard; + char **ap; + int num; + char *p; + int i; + + discard = (subtype == VSPLUS || subtype == VSLENGTH) | + (flags & EXP_DISCARD); - if (!subtype) { + if (unlikely(!subtype)) { if (discard) return -1; @@ -997,7 +1026,8 @@ varvalue(char *name, int varflags, int flags, int quoted) } flags &= discard ? ~QUOTES_ESC : ~0; - sep = (flags & EXP_FULL) << CHAR_BIT; + seps = nullstr; + seplen = flags & EXP_FULL; start = expdest - (char *)stackblock(); switch (*name) { @@ -1028,13 +1058,14 @@ varvalue(char *name, int varflags, int flags, int quoted) expdest = p; break; case '@': - if (quoted && sep) + if ((flags & (EXP_QUOTED | EXP_FULL)) == + (EXP_QUOTED | EXP_FULL)) goto param; /* fall through */ case '*': - /* We will set c to 0 or ~0 depending on whether + /* We will set seplen to 0 or !0 depending on whether * we're doing field splitting. We won't do field - * splitting if either we're quoted or sep is zero. + * splitting if either we're quoted or seplen is zero. * * Instead of testing (quoted || !sep) the following * trick optimises away any branches by using the @@ -1046,20 +1077,22 @@ varvalue(char *name, int varflags, int flags, int quoted) #if EXP_QUOTED >> CHAR_BIT != EXP_FULL #error The following two lines expect EXP_QUOTED == EXP_FULL << CHAR_BIT #endif - c = !((quoted | ~sep) & EXP_QUOTED) - 1; - sep &= ~quoted; - sep |= ifsset() ? (unsigned char)(c & ifsval()[0]) : ' '; + seplen &= ~(flags >> CHAR_BIT); + if (!seplen) + seps = ncifs; + seplen = ((seplen - 1) & (ifsmb0len - 1)) + 1; param: - sepc = sep; if (!(ap = shellparam.p)) return -1; - while ((p = *ap++)) { + if (!(p = *ap)) + break; + for (;;) { len += strtodest(p, flags); - if (*ap && sep) { - len++; - memtodest(&sepc, 1, flags | EXP_KEEPNUL); - } + if (!(p = *++ap)) + break; + + len += memtodest(seps, seplen, flags | EXP_KEEPNUL); } break; case '0': @@ -1120,7 +1153,126 @@ recordregion(int start, int end, int nulonly) ifslastp->nulonly = nulonly; } +static unsigned ifsisifs(const char *p, unsigned ml, const char *ifs) +{ + bool isdefifs = false; + bool isifs = false; + wchar_t wc = *p; + wchar_t ifs0; + + if (likely(ifs[0]) && unlikely(wcifs)) { + if (wc & 0x80) { + mbstate_t mbst = {}; + wchar_t wc2; + + if (mbrtowc(&wc2, p, ml, &mbst) != ml) + goto out; + wc = wc2; + } + + isifs = wcschr(wcifs, wc); + ifs0 = wcifs[0]; + } else if (likely(!ml)) { + isifs = strchr(ifs, wc); + ifs0 = ifs[0]; + } + + if (isifs) + isdefifs = iswspace(wc ?: ifs0); + +out: + return isifs << 1 | isdefifs; +} + +static char *ifsbreakup_slow(struct ifs_state *ifst, struct arglist *arglist, + int nulonly, char *p) +{ + struct strlist *sp; + unsigned ifschar; + unsigned sisifs; + bool isdefifs; + unsigned ml; + bool isifs; + char *q; + + q = p; + + ifschar = mbnext(p); + p += ifschar & 0xff; + ml = (ifschar >> 8) > 3 ? + (ifschar >> 8) - 2 : 0; + + sisifs = ifsisifs(p, ml, ifst->ifs); + p += ifschar >> 8; + + isifs = sisifs >> 1; + isdefifs = sisifs & 1; + + /* If only reading one more argument: + * If we have exactly one field, + * read that field without its terminator. + * If we have more than one field, + * read all fields including their terminators, + * except for trailing IFS whitespace. + * + * This means that if we have only IFS + * characters left, and at most one + * of them is non-whitespace, we stop + * reading here. + * Otherwise, we read all the remaining + * characters except for trailing + * IFS whitespace. + * + * In any case, r indicates the start + * of the characters to remove, or NULL + * if no characters should be removed. + */ + if (!ifst->maxargs) { + if (isdefifs) { + if (!ifst->r) + ifst->r = q; + return p; + } + + if (!(isifs && ifst->ifsspc)) + ifst->r = NULL; + } else if (ifst->ifsspc) { + if (isifs) + q = p; + + ifst->start = q; + + if (isdefifs) + return p; + } else if (isifs) { + int ifsspc = ifst->ifsspc; + + if (!nulonly) { + ifsspc = isdefifs; + ifst->ifsspc = ifsspc; + } + + /* Ignore IFS whitespace at start */ + if (q == ifst->start && ifsspc) { + ifst->start = p; + return p; + } + if (ifst->maxargs > 0 && !--ifst->maxargs) { + ifst->r = q; + return p; + } + *q = '\0'; + sp = (struct strlist *)stalloc(sizeof *sp); + sp->text = ifst->start; + *arglist->lastp = sp; + arglist->lastp = &sp->next; + ifst->start = p; + return p; + } + ifst->ifsspc = 0; + return p; +} /* * Break the argument string into pieces based upon IFS and add the @@ -1133,21 +1285,19 @@ void ifsbreakup(char *string, int maxargs, struct arglist *arglist) { struct ifsregion *ifsp; + struct ifs_state ifst; + const char *realifs; struct strlist *sp; - char *start; - char *p; - char *q; - char *r = NULL; - const char *ifs, *realifs; - int ifsspc; int nulonly; + char *p; - - start = string; + ifst.r = NULL; + ifst.start = string; + ifst.maxargs = maxargs; if (ifslastp != NULL) { - ifsspc = 0; + ifst.ifsspc = 0; nulonly = 0; - realifs = ifsset() ? ifsval() : defifs; + realifs = ncifs; ifsp = &ifsfirst; do { int afternul; @@ -1155,106 +1305,60 @@ ifsbreakup(char *string, int maxargs, struct arglist *arglist) p = string + ifsp->begoff; afternul = nulonly; nulonly = ifsp->nulonly; - ifs = nulonly ? nullstr : realifs; - ifsspc = 0; - while (p < string + ifsp->endoff) { - int c; - bool isifs; - bool isdefifs; - - q = p; - c = *p++; - if (c == (char)CTLESC) - c = *p++; - - isifs = strchr(ifs, c); - isdefifs = false; - if (isifs) - isdefifs = strchr(defifs, c); - - /* If only reading one more argument: - * If we have exactly one field, - * read that field without its terminator. - * If we have more than one field, - * read all fields including their terminators, - * except for trailing IFS whitespace. - * - * This means that if we have only IFS - * characters left, and at most one - * of them is non-whitespace, we stop - * reading here. - * Otherwise, we read all the remaining - * characters except for trailing - * IFS whitespace. - * - * In any case, r indicates the start - * of the characters to remove, or NULL - * if no characters should be removed. - */ - if (!maxargs) { - if (isdefifs) { - if (!r) - r = q; - continue; - } - - if (!(isifs && ifsspc)) - r = NULL; - - ifsspc = 0; - continue; + ifst.ifs = nulonly ? nullstr : realifs; + ifst.ifsspc = 0; + for (;;) { + char *p0 = p; + + while (string + ifsp->endoff - p >= 8) { + union { + uint64_t qw; + unsigned char b[8]; + } x; + + x.qw = *(uint64_t *)p; + + if ((x.qw & 0x8080808080808080)) + break; + if (ifsmap[x.b[0]] | + ifsmap[x.b[1]] | + ifsmap[x.b[2]] | + ifsmap[x.b[3]] | + ifsmap[x.b[4]] | + ifsmap[x.b[5]] | + ifsmap[x.b[6]] | + ifsmap[x.b[7]]) + break; + p += 8; } - if (ifsspc) { - if (isifs) - q = p; - - start = q; - - if (isdefifs) - continue; - - isifs = false; + if (p != p0) { + if (!ifst.maxargs) + ifst.r = NULL; + else if (ifst.ifsspc) + ifst.start = p0; + ifst.ifsspc = 0; } - if (isifs) { - if (!(afternul || nulonly)) - ifsspc = isdefifs; - /* Ignore IFS whitespace at start */ - if (q == start && ifsspc) { - start = p; - ifsspc = 0; - continue; - } - if (maxargs > 0 && !--maxargs) { - r = q; - continue; - } - *q = '\0'; - sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = start; - *arglist->lastp = sp; - arglist->lastp = &sp->next; - start = p; - continue; - } + if (p >= string + ifsp->endoff) + break; - ifsspc = 0; + p = ifsbreakup_slow(&ifst, arglist, + afternul | nulonly, p); } } while ((ifsp = ifsp->next) != NULL); if (nulonly) goto add; + if (ifst.r) + *ifst.r = '\0'; } - if (r) - *r = '\0'; - - if (!*start) + if (!*ifst.start) return; add: sp = (struct strlist *)stalloc(sizeof *sp); - sp->text = start; + sp->text = ifst.start; *arglist->lastp = sp; arglist->lastp = &sp->next; } @@ -1280,7 +1384,56 @@ void ifsfree(void) ifslastp = NULL; } +void changeifs(const char *ifs) +{ + mbstate_t mbs = {}; + wchar_t *nwcifs; + unsigned mb = 0; + size_t len = 0; + const char *p; + size_t ml; + + if (!ifsset()) + ifs = defifs; + ncifs = ifs; + + memset(ifsmap, 0, sizeof(ifsmap)); + + for (p = ifs;; p++) { + unsigned c = (unsigned char)*p; + + mb |= c >> 7; + if (!(c >> 7)) + ifsmap[c] = 1; + if (c == 0) + break; + + len++; + } + + nwcifs = NULL; + + ifsmb0len = !!len; + + if (!mb) + goto out; + + ml = mbrlen(ifs, len, &mbs); + if (ml == -2 || ml == -1) + ml = 1; + ifsmb0len = ml; + + nwcifs = ckmalloc((len + 1) * sizeof(*wcifs)); + memset(nwcifs, 0, (len + 1) * sizeof(*wcifs)); + + p = ifs; + mbsrtowcs(nwcifs, &p, len + 1, &mbs); + +out: + ckfree(wcifs); + wcifs = nwcifs; +} /* * Expand shell metacharacters. At this point, the only control characters @@ -1437,31 +1590,25 @@ static void expmeta_rmescapes(char *enddir, char *name) preglob(strcpy(enddir, name), RMESCAPE_EMETA); } -static unsigned mbcharlen(char *p) +static int skipesc(char *p) { + unsigned short mb; int esc = 0; - if (*++p == (char)CTLESC) - esc++; + mb = mbnext(p); + if ((mb >> 8) > 3) + return (mb & 0xff) + (mb >> 8) - 1; - return esc + 3 + (unsigned char)p[esc]; -} - -static size_t skipesc(char *p) -{ - size_t esc = 0; + esc = mb & 0xff; - if (p[esc] == (char)CTLMBCHAR) - esc += mbcharlen(p); - else if (p[esc] == (char)CTLESC) - esc++; - else if (p[esc] == '\\' && p[esc + 1]) { + if (!esc && p[esc] == '\\' && p[esc + 1]) { while (p[++esc] == (char)CTLQUOTEMARK) ; - if (p[esc] == (char)CTLMBCHAR) - esc += mbcharlen(p + esc); - else if (p[esc] == (char)CTLESC) - esc++; + mb = mbnext(p + esc); + esc += mb & 0xff; + + if ((mb >> 8) > 3) + esc += (mb >> 8) - 1; } return esc; @@ -1868,6 +2015,7 @@ _rmescapes(char *str, int flag) inquotes = 0; notescaped = globbing; while (*p) { + unsigned mb; unsigned ml; int newnesc = globbing; @@ -1885,10 +2033,11 @@ _rmescapes(char *str, int flag) goto setnesc; } } else if (*p == (char)CTLMBCHAR) { - if (*++p == (char)CTLESC) - p++; + mb = mbnext(p); + ml = mb >> 8; - ml = (unsigned char)*p++; + ml -= 2; + p += mb & 0xff; q = mempcpy(q, p, ml); p += ml + 2; goto setnesc; diff --git a/src/expand.h b/src/expand.h index a78564f..7bcff75 100644 --- a/src/expand.h +++ b/src/expand.h @@ -75,6 +75,7 @@ void removerecordregions(int); void ifsbreakup(char *, int, struct arglist *); void ifsfree(void); void restore_handler_expandarg(struct jmploc *savehandler, int err); +void changeifs(const char *); /* From arith.y */ intmax_t arith(const char *); diff --git a/src/var.c b/src/var.c index e2102c3..eb4075f 100644 --- a/src/var.c +++ b/src/var.c @@ -86,7 +86,7 @@ struct var varinit[] = { #if ATTY { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "ATTY\0", 0 }, #endif - { 0, VSTRFIXED|VTEXTFIXED, defifsvar, 0 }, + { 0, VSTRFIXED|VTEXTFIXED, defifsvar, changeifs }, { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAIL\0", changemail }, { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAILPATH\0", changemail }, { 0, VSTRFIXED|VTEXTFIXED, defpathvar, changepath }, @@ -267,9 +267,6 @@ struct var *setvareq(char *s, int flags) n); } - if (vp->func && (flags & VNOFUNC) == 0) - (*vp->func)(varnull(s)); - if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); @@ -301,6 +298,9 @@ struct var *setvareq(char *s, int flags) vp->text = s; vp->flags = flags; + if (vp->func && (flags & VNOFUNC) == 0) + (*vp->func)(varnull(s)); + out: return vp; } @@ -531,12 +531,12 @@ poplocalvars(void) vp->flags &= ~(VSTRFIXED|VREADONLY); unsetvar(vp->text); } else { - if (vp->func && !(vp->flags & VNOFUNC)) - (*vp->func)(varnull(lvp->text)); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); vp->flags = lvp->flags; vp->text = lvp->text; + if (vp->func && !(vp->flags & VNOFUNC)) + (*vp->func)(varnull(vp->text)); } ckfree(lvp); } From c5bf9702ea110bede687b57c0b5fa3fd0e15829e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:28:59 +0800 Subject: [PATCH 130/401] expand: Add multi-byte support to pmatch Add CTLMBCHAR support to pmatch. POSIX equivalence classes and collating symbols are not unsupported. Enable CTLMBCHAR generation in mbtodest. Signed-off-by: Herbert Xu --- src/eval.c | 3 +- src/expand.c | 365 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 224 insertions(+), 144 deletions(-) diff --git a/src/eval.c b/src/eval.c index 1839623..140a734 100644 --- a/src/eval.c +++ b/src/eval.c @@ -457,7 +457,8 @@ evalcase(union node *n, int flags) lineno -= funcline - 1; arglist.lastp = &arglist.list; - expandarg(n->ncase.expr, &arglist, EXP_TILDE); + expandarg(n->ncase.expr, &arglist, FNMATCH_IS_ENABLED ? EXP_TILDE : + EXP_TILDE | EXP_MBCHAR); for (cp = n->ncase.cases ; cp && evalskip == 0 ; cp = cp->nclist.next) { for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) { if (casematch(patp, arglist.list->text)) { diff --git a/src/expand.c b/src/expand.c index 2223045..ec408fd 100644 --- a/src/expand.c +++ b/src/expand.c @@ -86,7 +86,6 @@ #define RMESCAPE_GLOB 0x2 /* Add backslashes for glob */ #define RMESCAPE_GROW 0x8 /* Grow strings instead of stalloc */ #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ -#define RMESCAPE_EMETA 0x20 /* Remove backslashes too */ /* Add CTLESC when necessary. */ #define QUOTES_ESC (EXP_FULL | EXP_CASE) @@ -142,7 +141,7 @@ STATIC struct strlist *expsort(struct strlist *); STATIC struct strlist *msort(struct strlist *, int); STATIC void addfname(char *); STATIC int patmatch(char *, const char *); -STATIC int pmatch(const char *, const char *); +STATIC int pmatch(char *, const char *); static size_t cvtnum(intmax_t num, int flags); STATIC size_t esclen(const char *, const char *); STATIC void varunset(const char *, const char *, const char *, int) @@ -157,6 +156,11 @@ STATIC void varunset(const char *, const char *, const char *, int) STATIC inline char * preglob(const char *pattern, int flag) { + if (FNMATCH_IS_ENABLED) { + if (!flag) + flag = RMESCAPE_GROW; + flag |= RMESCAPE_ALLOC; + } flag |= RMESCAPE_GLOB; return _rmescapes((char *)pattern, flag); } @@ -583,28 +587,31 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, loc = startp; loc2 = rmesc; do { - const char *s = loc2; + char *s = FNMATCH_IS_ENABLED ? loc2 : loc; unsigned mb; unsigned ml; int match; - c = *loc2; + c = *s; if (zero) { - *loc2 = '\0'; - s = rmesc; + *s = '\0'; + s = FNMATCH_IS_ENABLED ? rmesc : startp; } match = pmatch(str, s); - *loc2 = c; + *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return quotes ? loc : loc2; + return FNMATCH_IS_ENABLED && quotes ? loc : loc2; if (!c) break; mb = mbnext(loc); loc += (mb & 0xff) + (mb >> 8); - ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; - loc2 += ml; + if (unlikely(FNMATCH_IS_ENABLED || !quotes)) { + ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; + loc2 += ml; + } else + loc2 = loc; } while (1); return 0; } @@ -617,21 +624,23 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, char *loc; char *loc2; - for (loc = endp, loc2 = rmescend; loc >= startp; loc2--) { - const char *s = loc2; - char c = *loc2; + for (loc = endp, loc2 = rmescend;; + FNMATCH_IS_ENABLED ? loc2-- : (loc2 = loc)) { + char *s = FNMATCH_IS_ENABLED ? loc2 : loc; + char c = *s; unsigned ml; int match; if (zero) { - *loc2 = '\0'; + *s = '\0'; s = rmesc; } match = pmatch(str, s); - *loc2 = c; + *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return quotes ? loc : loc2; - loc--; + return FNMATCH_IS_ENABLED && quotes ? loc : loc2; + if (--loc < startp) + break; if (!esc--) esc = esclen(startp, loc); if (esc % 2) { @@ -646,7 +655,8 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, loc -= ml + 2; if (*loc == (char)CTLESC) loc--; - loc2 -= ml - 1; + if (FNMATCH_IS_ENABLED) + loc2 -= ml - 1; } return 0; } @@ -692,19 +702,21 @@ static char *subevalvar(char *start, char *str, int strloc, int startloc, #endif rmescend = stackblock() + strloc; - str = preglob(rmescend, FNMATCH_IS_ENABLED ? - RMESCAPE_ALLOC | RMESCAPE_GROW : 0); + str = preglob(rmescend, 0); if (FNMATCH_IS_ENABLED) { startp = stackblock() + startloc; rmescend = stackblock() + strloc; nstrloc = str - (char *)stackblock(); } - rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); - if (rmesc != startp) - rmescend = expdest; - startp = stackblock() + startloc; - str = stackblock() + nstrloc; + rmesc = startp; + if (FNMATCH_IS_ENABLED || !quotes) { + rmesc = _rmescapes(startp, RMESCAPE_ALLOC | RMESCAPE_GROW); + if (rmesc != startp) + rmescend = expdest; + startp = stackblock() + startloc; + str = stackblock() + nstrloc; + } rmescend--; /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */ @@ -895,12 +907,6 @@ static struct mbpair mbtodest(const char *p, char *q, const char *syntax, goto out; } - len = ml; - do { - q = chtodest((signed char)*p++, syntax, q); - } while (--len); - goto out; - if (syntax[CTLMBCHAR] == CCTL) { USTPUTC(CTLMBCHAR, q); USTPUTC(ml, q); @@ -1473,7 +1479,7 @@ static void expandmeta_glob(struct strlist *str) #endif INTOFF; - p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); + p = preglob(str->text, RMESCAPE_HEAP); i = glob64(p, GLOB_ALTDIRFUNC | GLOB_NOMAGIC, 0, &pglob); if (p != str->text) ckfree(p); @@ -1540,10 +1546,12 @@ expandmeta(struct strlist *str) savelastp = exparg.lastp; INTOFF; - p = str->text; + p = preglob(str->text, RMESCAPE_ALLOC | RMESCAPE_HEAP); len = strlen(p); expmeta(p, len, 0); + if (p != str->text) + ckfree(p); INTON; if (exparg.lastp == savelastp) { /* @@ -1585,9 +1593,26 @@ static char *addfnamealt(char *enddir, size_t expdir_len) return stnputs(name, expdir_len, enddir) - expdir_len; } -static void expmeta_rmescapes(char *enddir, char *name) +static char *expmeta_rmescapes(char *enddir, const char *name) { - preglob(strcpy(enddir, name), RMESCAPE_EMETA); + const char *p; + + if (!FNMATCH_IS_ENABLED) + return strchrnul(rmescapes(strcpy(enddir, name)), 0); + + p = name; + do { + char *q = strchrnul(p, '\\'); + + enddir = mempcpy(enddir, p, q - p + 1); + p = q; + if (!*p) + break; + if (*++p) + enddir[-1] = *p++; + } while (1); + + return enddir - 1; } static int skipesc(char *p) @@ -1602,8 +1627,7 @@ static int skipesc(char *p) esc = mb & 0xff; if (!esc && p[esc] == '\\' && p[esc + 1]) { - while (p[++esc] == (char)CTLQUOTEMARK) - ; + esc++; mb = mbnext(p + esc); esc += mb & 0xff; @@ -1639,7 +1663,6 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) int c; *(DIR *volatile *)&dirp = NULL; - *(char *volatile *)&pat = NULL; savehandler = handler; if (unlikely(err = setjmp(jmploc.loc))) goto out; @@ -1676,18 +1699,17 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) if (metaflag == 0) { /* we've reached the end of the file name */ if (!expdir_len) goto out_opendir; - expmeta_rmescapes(enddir, name); + enddir = expmeta_rmescapes(enddir, name); if (lstat64(cp, &statb) >= 0) - cp = addfnamealt(strchrnul(enddir, 0), expdir_len); + cp = addfnamealt(enddir, expdir_len); goto out_opendir; } endname = p; if (name < start) { c = *start; *start = 0; - expmeta_rmescapes(enddir, name); + enddir = expmeta_rmescapes(enddir, name); *start = c; - enddir += strlen(enddir); } *enddir = 0; expdir_len = enddir - cp; @@ -1702,21 +1724,28 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) } name_len -= endname - name; matchdot = 0; - *(char *volatile *)&pat = - preglob(start, RMESCAPE_ALLOC | RMESCAPE_HEAP); + pat = start; p = pat; - if (*p == '\\') + if (*p == (FNMATCH_IS_ENABLED ? '\\' : (char)CTLESC)) p++; if (*p == '.') matchdot++; while (! int_pending() && (dp = readdir64(dirp)) != NULL) { - if (dp->d_name[0] == '.' && ! matchdot) - continue; - if (pmatch(pat, dp->d_name)) { - len = strlen(dp->d_name) + 1; + char *dname = dp->d_name; + if (*dname == '.' && !matchdot) + continue; + len = strlen(dname) + 1; + p = dname; + if (!FNMATCH_IS_ENABLED) { + expdest = enddir; + memtodest(p, len, EXP_MBCHAR | EXP_KEEPNUL); + cp = stackblock(); enddir = cp + expdir_len; - enddir = stnputs(dp->d_name, len, enddir); + p = enddir; + } + if (pmatch(pat, p)) { + enddir = stnputs(dname, len, enddir); if (!c) cp = addfnamealt(enddir, expdir_len); else { @@ -1724,15 +1753,13 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) len += expdir_len; cp = expmeta(endname, name_len, len); } + enddir = cp + expdir_len; } } if (c) endname[-esc - 1] = c; out: - pat = *(char *volatile *)&pat; - if (pat != start) - ckfree(pat); closedir(*(DIR *volatile *)&dirp); out_opendir: handler = savehandler; @@ -1820,52 +1847,48 @@ msort(struct strlist *list, int len) STATIC inline int patmatch(char *pattern, const char *string) { - return pmatch(preglob(pattern, FNMATCH_IS_ENABLED ? - RMESCAPE_ALLOC | RMESCAPE_GROW : 0), - string); + return pmatch(preglob(pattern, 0), string); } -STATIC int ccmatch(const char *p, int chr, const char **r) +static __attribute__((noinline)) int ccmatch(char *p, const char *mbc, int ml, + char **r) { - static const struct class { - char name[10]; - int (*fn)(int); - } classes[] = { - { .name = ":alnum:]", .fn = isalnum }, - { .name = ":cntrl:]", .fn = iscntrl }, - { .name = ":lower:]", .fn = islower }, - { .name = ":space:]", .fn = isspace }, - { .name = ":alpha:]", .fn = isalpha }, - { .name = ":digit:]", .fn = isdigit }, - { .name = ":print:]", .fn = isprint }, - { .name = ":upper:]", .fn = isupper }, - { .name = ":blank:]", .fn = isblank }, - { .name = ":graph:]", .fn = isgraph }, - { .name = ":punct:]", .fn = ispunct }, - { .name = ":xdigit:]", .fn = isxdigit }, - }; - const struct class *class, *end; + mbstate_t mbst = {}; + wctype_t type; + wchar_t wc; + char *q; - end = classes + sizeof(classes) / sizeof(classes[0]); - for (class = classes; class < end; class++) { - const char *q; + *r = 0; - q = prefix(p, class->name); - if (!q) - continue; - *r = q; - return class->fn(chr); - } + if (*p++ != ':') + return 0; - *r = 0; - return 0; + q = strstr(p, ":]"); + if (!q) + return 0; + + *q = 0; + type = wctype(p); + *q = ':'; + + if (!type) + return 0; + + *r = q + 2; + + if (mbrtowc(&wc, mbc, ml, &mbst) != ml) + return 0; + + return iswctype(wc, type); } -STATIC int -pmatch(const char *pattern, const char *string) +static int pmatch(char *pattern, const char *string) { - const char *p, *q; + char stop[] = { 0, CTLESC, CTLMBCHAR }; + const char *q; + unsigned mb; + char *p; char c; if (FNMATCH_IS_ENABLED) @@ -1874,36 +1897,43 @@ pmatch(const char *pattern, const char *string) p = pattern; q = string; for (;;) { - switch (c = *p++) { + switch ((signed char)(c = *p++)) { case '\0': goto breakloop; - case '\\': - if (*p) { - c = *p++; - } - goto dft; + case CTLESC: + c = *p++; + break; case '?': - if (*q++ == '\0') + if (*q == '\0') return 0; - break; + mb = mbnext(q); + q += (mb >> 8) + (mb & 0xff); + continue; case '*': c = *p; while (c == '*') c = *++p; - if (c != '\\' && c != '?' && c != '*' && c != '[') { - while (*q != c) { - if (*q == '\0') + if (!c) + return 1; + stop[0] = CTLESC; + if (c != '?' && c != '[') + stop[0] = c; + for (;;) { + if (stop[0] != (char)CTLESC) { + q = strpbrk(q, stop); + if (!q) return 0; - q++; } - } - do { if (pmatch(p, q)) return 1; - } while (*q++ != '\0'); + if (!*q) + break; + mb = mbnext(q); + q += (mb >> 8) + (mb & 0xff); + } return 0; case '[': { - const char *startp; + char *startp; int invert, found; char chr; @@ -1914,48 +1944,85 @@ pmatch(const char *pattern, const char *string) p++; } found = 0; + mb = mbnext(q); + q += mb & 0xff; + mb >>= 8; chr = *q; if (chr == '\0') return 0; c = *p++; do { + unsigned mbp = 0; + const char *mbs = &c; + if (!c) { p = startp; c = '['; goto dft; } if (c == '[') { - const char *r; + char *r; - found |= !!ccmatch(p, chr, &r); + found |= !!ccmatch(p, q, mb > 1 ? + mb - 2 : mb, + &r); if (r) { p = r; continue; } - } else if (c == '\\') + } else if (c == (char)CTLESC) c = *p++; + else if (c == (char)CTLMBCHAR) { + mbp = mbnext(--p); + p += mbp & 0xff; + mbs = p; + mbp >>= 8; + p += mbp; + } if (*p == '-' && p[1] != ']') { p++; - if (*p == '\\') + if (*p == (char)CTLESC) p++; - if (chr >= c && chr <= *p) + else if (*p == CTLMBCHAR) { + mbp = mbnext(p); + p += mbp & 0xff; + p += mbp >> 8; + continue; + } + if (!(mbp | (mb - 1)) && + chr >= c && chr <= *p) found = 1; p++; - } else { - if (chr == c) - found = 1; - } + } else if (!memcmp(mbs, q, mb)) + found = 1; } while ((c = *p++) != ']'); if (found == invert) return 0; - q++; - break; + q += mb; + continue; } -dft: default: - if (*q++ != c) + case CTLMBCHAR: + mb = mbnext(--p); + p += mb & 0xff; + mb = mbnext(q); + q += mb & 0xff; + mb >>= 8; + + if (memcmp(p - 1, q - 1, mb + 1)) return 0; - break; + + p += mb; + q += mb; + continue; } +dft: + mb = mbnext(q); + if ((mb >> 8) > 1) + return 0; + q += mb & 0xff; + if (*q != c) + return 0; + q += mb >> 8; } breakloop: if (*q != '\0') @@ -1976,7 +2043,6 @@ _rmescapes(char *str, int flag) int notescaped; int globbing; int inquotes; - int expmeta; p = strpbrk(str, cqchars); if (!p) { @@ -1985,7 +2051,6 @@ _rmescapes(char *str, int flag) q = p; r = str; globbing = flag & RMESCAPE_GLOB; - expmeta = (flag & RMESCAPE_EMETA) ? RMESCAPE_GLOB : 0; if (flag & RMESCAPE_ALLOC) { size_t len = p - str; @@ -2015,50 +2080,64 @@ _rmescapes(char *str, int flag) inquotes = 0; notescaped = globbing; while (*p) { + int c = (signed char)*p; + int newnesc = globbing; unsigned mb; unsigned ml; - int newnesc = globbing; - if (*p == (char)CTLQUOTEMARK) { + if (c == CTLQUOTEMARK) { p++; inquotes ^= globbing; continue; - } else if (*p == '\\') { + } else if (c == '\\') { /* naked back slash */ newnesc ^= notescaped; /* naked backslashes can only occur outside quotes */ inquotes = 0; - if (expmeta & ~newnesc) { - p++; - goto setnesc; + if (!FNMATCH_IS_ENABLED && notescaped) + c = CTLESC; + } else if (c == CTLESC) { + if ((notescaped ^ inquotes) & inquotes) { + if (FNMATCH_IS_ENABLED) + *q++ = '\\'; + else + q[-1] = '\\'; } - } else if (*p == (char)CTLMBCHAR) { + if (globbing) + *q++ = FNMATCH_IS_ENABLED ? '\\' : CTLESC; + + c = *++p; + } else if (c == CTLMBCHAR) { + unsigned tail = 2; + + if (!FNMATCH_IS_ENABLED && (globbing ^ notescaped)) + q--; + mb = mbnext(p); ml = mb >> 8; - ml -= 2; - p += mb & 0xff; + if (!globbing || FNMATCH_IS_ENABLED) { + p += mb & 0xff; + ml -= 2; + } else { + ml += mb & 0xff; + tail = 0; + } + q = mempcpy(q, p, ml); - p += ml + 2; + p += ml + tail; goto setnesc; - } else if (*p == (char)CTLESC) { - p++; - if (expmeta) - ; - else if (notescaped) - *q++ = '\\'; - else if (inquotes) { - *q++ = '\\'; - *q++ = '\\'; - } } - *q++ = *p++; + *q++ = c; + p++; setnesc: notescaped = newnesc; } + if (!FNMATCH_IS_ENABLED && (globbing ^ notescaped)) + q[-1] = '\\'; *q = '\0'; - if (flag & RMESCAPE_GROW) { + if (flag & (RMESCAPE_ALLOC | RMESCAPE_GROW)) { expdest = r; STADJUST(q - r + 1, expdest); } From 2c92409145d044427eba5f2db16b00b879897598 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:29:01 +0800 Subject: [PATCH 131/401] input: Allow MB_LEN_MAX calls to pungetc In order to parse multi-byte characters which may be up to MB_LEN_MAX bytes long, allow enough calls to pungetc to undo a single multi-byte character. Also add a function pungetn to do multiple pungetc calls in a row. Signed-off-by: Herbert Xu --- src/input.c | 58 ++++++++++++++++++++++++++++++++++------------------- src/input.h | 11 +++++----- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/input.c b/src/input.c index 1c598b2..e17e067 100644 --- a/src/input.c +++ b/src/input.c @@ -56,7 +56,7 @@ #include "main.h" #include "myhistedit.h" -#define IBUFSIZ (BUFSIZ + 1) +#define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1) MKINIT struct parsefile basepf; /* top level input file */ @@ -83,13 +83,16 @@ INIT { } RESET { + int c; + /* clear input buffer */ popallfiles(); - basepf.unget = 0; - while (basepf.lastc[0] != '\n' && - basepf.lastc[0] != PEOF && - !int_pending()) - pgetc(); + + c = PEOF; + if (basepf.nextc - basebuf > basepf.unget) + c = basepf.nextc[-basepf.unget - 1]; + while (c != '\n' && c != PEOF && !int_pending()) + c = pgetc(); } FORKRESET { @@ -131,17 +134,20 @@ static int __pgetc(void) { int c; - if (parsefile->unget) - return parsefile->lastc[--parsefile->unget]; + if (parsefile->unget) { + long unget = -(long)(unsigned)parsefile->unget--; + + if (parsefile->nleft < 0) + return preadbuffer(); + + return parsefile->nextc[unget]; + } if (--parsefile->nleft >= 0) c = (signed char)*parsefile->nextc++; else c = preadbuffer(); - parsefile->lastc[1] = parsefile->lastc[0]; - parsefile->lastc[0] = c; - return c; } @@ -176,9 +182,16 @@ static int stdin_clear_nonblock(void) static int preadfd(void) { + char *buf = parsefile->buf; + int unget; int nr; - char *buf = parsefile->buf; - parsefile->nextc = buf; + + unget = parsefile->nextc - buf; + if (unget > PUNGETC_MAX) + unget = PUNGETC_MAX; + + memmove(buf, parsefile->nextc - unget, unget); + parsefile->nextc = buf += unget; retry: #ifndef SMALL @@ -196,8 +209,8 @@ preadfd(void) nr = 0; else { nr = el_len; - if (nr > IBUFSIZ - 1) - nr = IBUFSIZ - 1; + if (nr > BUFSIZ) + nr = BUFSIZ; memcpy(buf, rl_cp, nr); if (nr != el_len) { el_len -= nr; @@ -209,9 +222,9 @@ preadfd(void) } else #endif if (parsefile->fd) - nr = read(parsefile->fd, buf, IBUFSIZ - 1); + nr = read(parsefile->fd, buf, BUFSIZ); else { - unsigned len = IBUFSIZ - 1; + unsigned len = BUFSIZ; nr = 0; @@ -348,6 +361,11 @@ static int preadbuffer(void) return (signed char)*parsefile->nextc++; } +void pungetn(int n) +{ + parsefile->unget += n; +} + /* * Undo a call to pgetc. Only two characters may be pushed back. * PEOF may be pushed back. @@ -356,7 +374,7 @@ static int preadbuffer(void) void pungetc(void) { - parsefile->unget++; + pungetn(1); } /* @@ -383,7 +401,6 @@ pushstring(char *s, void *ap) sp->prevnleft = parsefile->nleft; sp->unget = parsefile->unget; sp->spfree = parsefile->spfree; - memcpy(sp->lastc, parsefile->lastc, sizeof(sp->lastc)); sp->ap = (struct alias *)ap; if (ap) { ((struct alias *)ap)->flag |= ALIASINUSE; @@ -413,7 +430,6 @@ static void popstring(void) parsefile->nextc = sp->prevstring; parsefile->nleft = sp->prevnleft; parsefile->unget = sp->unget; - memcpy(parsefile->lastc, sp->lastc, sizeof(sp->lastc)); /*dprintf("*** calling popstring: restoring to '%s'\n", parsenextc);*/ parsefile->strpush = sp->prev; parsefile->spfree = sp; @@ -457,7 +473,7 @@ setinputfd(int fd, int push) } parsefile->fd = fd; if (parsefile->buf == NULL) - parsefile->buf = ckmalloc(IBUFSIZ); + parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ); input_set_lleft(parsefile, parsefile->nleft = 0); plinno = 1; } diff --git a/src/input.h b/src/input.h index 1ff5773..5b4a045 100644 --- a/src/input.h +++ b/src/input.h @@ -34,12 +34,16 @@ * @(#)input.h 8.2 (Berkeley) 5/4/95 */ +#include + #ifdef SMALL #define IS_DEFINED_SMALL 1 #else #define IS_DEFINED_SMALL 0 #endif +#define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + /* PEOF (the end of file marker) is defined in syntax.h */ enum { @@ -59,9 +63,6 @@ struct strpush { /* Delay freeing so we can stop nested aliases. */ struct strpush *spfree; - /* Remember last two characters for pungetc. */ - int lastc[2]; - /* Number of outstanding calls to pungetc. */ int unget; }; @@ -87,9 +88,6 @@ struct parsefile { /* Delay freeing so we can stop nested aliases. */ struct strpush *spfree; - /* Remember last two characters for pungetc. */ - int lastc[2]; - /* Number of outstanding calls to pungetc. */ int unget; }; @@ -106,6 +104,7 @@ extern struct parsefile *parsefile; int pgetc(void); int pgetc2(void); void pungetc(void); +void pungetn(int); void pushstring(char *, void *); int setinputfile(const char *, int); void setinputstring(char *); From d6df48c626fbf3bf0c3197927f2ec740d7a8b7ac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:29:04 +0800 Subject: [PATCH 132/401] input: Add pgetc_eoa This reintroduces PEOA in a limited way. Instead of allowing pgetc to return it, limit it to a new function pgetc_eoa so only specific callers need to deal with PEOA. Signed-off-by: Herbert Xu --- src/input.c | 8 +++++++- src/input.h | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/input.c b/src/input.c index e17e067..bedc581 100644 --- a/src/input.c +++ b/src/input.c @@ -157,7 +157,7 @@ static int __pgetc(void) * Nul characters in the input are silently discarded. */ -int pgetc(void) +int __attribute__((noinline)) pgetc(void) { struct strpush *sp = parsefile->spfree; @@ -167,6 +167,12 @@ int pgetc(void) return __pgetc(); } +int pgetc_eoa(void) +{ + return parsefile->strpush && parsefile->nleft == -1 && + parsefile->strpush->ap ? PEOA : pgetc(); +} + static int stdin_clear_nonblock(void) { int flags = fcntl(0, F_GETFL, 0); diff --git a/src/input.h b/src/input.h index 5b4a045..151b1c6 100644 --- a/src/input.h +++ b/src/input.h @@ -45,6 +45,7 @@ #define PUNGETC_MAX (MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) /* PEOF (the end of file marker) is defined in syntax.h */ +#define PEOA ((PEOF) - 1) enum { INPUT_PUSH_FILE = 1, @@ -102,7 +103,7 @@ extern struct parsefile *parsefile; #define plinno (parsefile->linno) int pgetc(void); -int pgetc2(void); +int pgetc_eoa(void); void pungetc(void); void pungetn(int); void pushstring(char *, void *); From 6c44f4ee0c2b4e96745d83752c64402eaa360a1c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:29:06 +0800 Subject: [PATCH 133/401] parser: Add support for multi-byte characters Add the requisite markers for multi-byte characters so that the expansion code can recognise them. Also allow wide blank characters to terminate words. Signed-off-by: Herbert Xu --- src/expand.c | 19 ++++++++ src/mktokens | 1 + src/parser.c | 134 +++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 128 insertions(+), 26 deletions(-) diff --git a/src/expand.c b/src/expand.c index ec408fd..cb579df 100644 --- a/src/expand.c +++ b/src/expand.c @@ -276,6 +276,7 @@ static char *argstr(char *p, int flag) CTLESC, CTLVAR, CTLBACKQ, + CTLMBCHAR, CTLARI, CTLENDARI, 0 @@ -300,6 +301,8 @@ static char *argstr(char *p, int flag) start: startloc = expdest - (char *)stackblock(); for (;;) { + unsigned ml; + unsigned mb; int end; length += strcspn(p + length, reject); @@ -362,6 +365,22 @@ static char *argstr(char *p, int flag) startloc++; } break; + case CTLMBCHAR: + c = (signed char)*p--; + mb = mbnext(p); + ml = (mb >> 8) - 2; + if (flag & QUOTES_ESC) { + length = (mb >> 8) + (mb & 0xff); + if (c == (char)CTLESC) + startloc += length; + break; + } + if (c == CTLESC) + startloc += ml; + p += mb & 0xff; + expdest = stnputs(p, ml, expdest); + p += mb >> 8; + break; case CTLESC: startloc++; length++; diff --git a/src/mktokens b/src/mktokens index 78055be..dcef676 100644 --- a/src/mktokens +++ b/src/mktokens @@ -41,6 +41,7 @@ cat > "${TMPDIR}"/ka$$ <<\! TEOF 1 end of file +TBLANK 0 blank TNL 0 newline TSEMI 0 ";" TBACKGND 0 "&" diff --git a/src/parser.c b/src/parser.c index 09b1cb8..1a2f5df 100644 --- a/src/parser.c +++ b/src/parser.c @@ -36,7 +36,11 @@ #include #endif +#include +#include #include +#include +#include #include "shell.h" #include "parser.h" @@ -801,6 +805,8 @@ xxreadtoken(void) setprompt(2); } for (;;) { /* until token or start of word found */ + int tok; + c = pgetc_eatbnl(); switch (c) { case ' ': case '\t': @@ -834,9 +840,10 @@ xxreadtoken(void) case ')': RETURN(TRP); } - break; + tok = readtoken1(c, BASESYNTAX, (char *)NULL, 0); + if (tok != TBLANK) + return tok; } - return readtoken1(c, BASESYNTAX, (char *)NULL, 0); #undef RETURN } @@ -876,7 +883,53 @@ static void synstack_pop(struct synstack **stack) *stack = (*stack)->next; } +static unsigned getmbc(int c, char *out, int mode) +{ + char *const start = out; + mbstate_t mbst = {}; + unsigned ml = 0; + size_t ml2; + wchar_t wc; + char *mbc; + + if (likely(c >= 0)) + return 0; + mbc = (mode & 3) < 2 ? out + 2 + (mode == 1) : out; + mbc[ml] = c; + while ((ml2 = mbrtowc(&wc, mbc + ml++, 1, &mbst)) == -2) { + if (ml >= MB_LEN_MAX) + break; + c = pgetc_eoa(); + if (c == PEOA || c == PEOF) + break; + mbc[ml] = c; + } + + if (ml2 == 1 && ml > 1) { + if (mode == 4 && iswblank(wc)) + return 1; + + if ((mode & 3) < 2) { + USTPUTC(CTLMBCHAR, out); + if (mode == 1) + USTPUTC(CTLESC, out); + USTPUTC(ml, out); + } + STADJUST(ml, out); + if ((mode & 3) < 2) { + USTPUTC(ml, out); + USTPUTC(CTLMBCHAR, out); + } + + return out - start; + } + + if (ml > 1) + pungetn(ml - 1); + + return 0; +} /* * If eofmark is NULL, read a word or a redirection symbol. If eofmark @@ -929,12 +982,29 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } #endif CHECKEND(); /* set c to PEOF if at end of here document */ - for (;;) { /* until end of line or end of word */ - CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ + /* Until end of line or end of word */ + for (;; c = pgetc_top(synstack)) { + int fieldsplitting; + unsigned ml; + + /* Permit max(MB_LEN_MAX, 23) calls to USTPUTC. */ + CHECKSTRSPACE((MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + 7, + out); + fieldsplitting = synstack->syntax == BASESYNTAX && + !synstack->varnest ? 4 : 0; + ml = getmbc(c, out, fieldsplitting); + if (ml == 1) { + if (out == stackblock()) + return TBLANK; + c = pgetc(); + break; + } + out += ml; + if (ml) + continue; switch(synstack->syntax[c]) { case CNL: /* '\n' */ - if (synstack->syntax == BASESYNTAX && - !synstack->varnest) + if (fieldsplitting) goto endword; /* exit outer loop */ USTPUTC(c, out); nlprompt(); @@ -956,26 +1026,33 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(CTLESC, out); USTPUTC('\\', out); pungetc(); - } else { - if ( - synstack->dblquote && - c != '\\' && c != '`' && - c != '$' && ( - c != '"' || - (eofmark != NULL && - !synstack->varnest) - ) && ( - c != '}' || - !synstack->varnest - ) - ) { - USTPUTC(CTLESC, out); - USTPUTC('\\', out); - } + break; + } + + if ( + synstack->dblquote && + c != '\\' && c != '`' && + c != '$' && ( + c != '"' || + (eofmark != NULL && + !synstack->varnest) + ) && ( + c != '}' || + !synstack->varnest + ) + ) { USTPUTC(CTLESC, out); - USTPUTC(c, out); - quotef++; + USTPUTC('\\', out); } + quotef++; + + ml = getmbc(c, out, 1); + out += ml; + if (ml) + break; + + USTPUTC(CTLESC, out); + USTPUTC(c, out); break; case CSQUOTE: synstack->syntax = SQSYNTAX; @@ -1053,11 +1130,10 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) case CEOF: goto endword; /* exit outer loop */ default: - if (synstack->varnest == 0) + if (fieldsplitting) goto endword; /* exit outer loop */ USTPUTC(c, out); } - c = pgetc_top(synstack); } } endword: @@ -1384,6 +1460,7 @@ parsebackq: { size_t psavelen; size_t savelen; union node *n; + unsigned ml; char *pstr; char *str; @@ -1415,6 +1492,11 @@ parsebackq: { if (pc != '\\' && pc != '`' && pc != '$' && (!synstack->dblquote || pc != '"')) STPUTC('\\', pout); + CHECKSTRSPACE(MB_LEN_MAX, pout); + ml = getmbc(pc, pout, 2); + pout += ml; + if (ml) + continue; break; case PEOF: From 7105464252db7af6a4fd2eb09229ad2601d112a7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:29:08 +0800 Subject: [PATCH 134/401] input: Always push in setinputfile Push the input file even in the case of "sh file". This is because the base parsefile will be used for read(1). Signed-off-by: Herbert Xu --- src/input.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/input.c b/src/input.c index bedc581..1712e5f 100644 --- a/src/input.c +++ b/src/input.c @@ -61,6 +61,7 @@ MKINIT struct parsefile basepf; /* top level input file */ MKINIT char basebuf[IBUFSIZ]; /* buffer for top level input file */ +MKINIT struct parsefile *toppf = &basepf; struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ @@ -89,8 +90,8 @@ RESET { popallfiles(); c = PEOF; - if (basepf.nextc - basebuf > basepf.unget) - c = basepf.nextc[-basepf.unget - 1]; + if (toppf->nextc - toppf->buf > toppf->unget) + c = toppf->nextc[-toppf->unget - 1]; while (c != '\n' && c != PEOF && !int_pending()) c = pgetc(); } @@ -473,13 +474,11 @@ setinputfile(const char *fname, int flags) static void setinputfd(int fd, int push) { - if (push) { - pushfile(); - parsefile->buf = 0; - } + pushfile(); + if (!push) + toppf = parsefile; parsefile->fd = fd; - if (parsefile->buf == NULL) - parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ); + parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ); input_set_lleft(parsefile, parsefile->nleft = 0); plinno = 1; } @@ -560,5 +559,5 @@ void unwindfiles(struct parsefile *stop) void popallfiles(void) { - unwindfiles(&basepf); + unwindfiles(toppf); } From 1d072e9c3292281a7eee54c41fec117ff22723e5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:29:11 +0800 Subject: [PATCH 135/401] builtin: Use pgetc in read(1) Use pgetc instead of read(2) in read(1). This allows any future buffering in the input layer to be used by read(1). This also allows read(1) to call helpers in the parser that may use the input layer. Signed-off-by: Herbert Xu --- src/input.c | 40 +++++++++++++++++++++++++++------------- src/input.h | 1 + src/miscbltin.c | 39 +++++++++++++++++++-------------------- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/input.c b/src/input.c index 1712e5f..6779069 100644 --- a/src/input.c +++ b/src/input.c @@ -42,19 +42,20 @@ * This file implements the input routines used by the parser. */ +#include "alias.h" +#include "error.h" #include "eval.h" -#include "shell.h" -#include "redir.h" -#include "syntax.h" #include "input.h" -#include "output.h" -#include "options.h" -#include "memalloc.h" -#include "error.h" -#include "alias.h" -#include "parser.h" #include "main.h" +#include "memalloc.h" #include "myhistedit.h" +#include "options.h" +#include "output.h" +#include "parser.h" +#include "redir.h" +#include "shell.h" +#include "syntax.h" +#include "trap.h" #define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1) @@ -258,7 +259,7 @@ preadfd(void) } if (nr < 0) { - if (errno == EINTR) + if (errno == EINTR && !(basepf.prev && pending_sig)) goto retry; } return nr; @@ -522,6 +523,13 @@ pushfile(void) parsefile = pf; } +void pushstdin(void) +{ + INTOFF; + basepf.prev = parsefile; + parsefile = &basepf; + INTON; +} void popfile(void) @@ -529,6 +537,11 @@ popfile(void) struct parsefile *pf = parsefile; INTOFF; + parsefile = pf->prev; + pf->prev = NULL; + if (pf == &basepf) + goto out; + if (pf->fd >= 0) close(pf->fd); if (pf->buf) @@ -539,15 +552,16 @@ popfile(void) popstring(); freestrings(parsefile->spfree); } - parsefile = pf->prev; ckfree(pf); + +out: INTON; } -void unwindfiles(struct parsefile *stop) +void __attribute__((noinline)) unwindfiles(struct parsefile *stop) { - while (parsefile != stop) + while (basepf.prev || parsefile != stop) popfile(); } diff --git a/src/input.h b/src/input.h index 151b1c6..c59d784 100644 --- a/src/input.h +++ b/src/input.h @@ -109,6 +109,7 @@ void pungetn(int); void pushstring(char *, void *); int setinputfile(const char *, int); void setinputstring(char *); +void pushstdin(void); void popfile(void); void unwindfiles(struct parsefile *); void popallfiles(void); diff --git a/src/miscbltin.c b/src/miscbltin.c index 8a0ddf4..10d256e 100644 --- a/src/miscbltin.c +++ b/src/miscbltin.c @@ -46,18 +46,20 @@ #include #include -#include "shell.h" -#include "options.h" -#include "var.h" -#include "output.h" -#include "memalloc.h" #include "error.h" +#include "expand.h" +#include "input.h" +#include "memalloc.h" #include "miscbltin.h" #include "mystring.h" #include "main.h" -#include "expand.h" +#include "options.h" +#include "output.h" #include "parser.h" +#include "shell.h" +#include "syntax.h" #include "trap.h" +#include "var.h" #undef rflag @@ -115,14 +117,13 @@ readcmd_handle_line(char *s, int ac, char **ap) int readcmd(int argc, char **argv) { - char **ap; - char c; - int rflag; char *prompt; - char *p; int startloc; int newloc; int status; + char **ap; + int rflag; + char *p; int i; rflag = 0; @@ -145,19 +146,17 @@ readcmd(int argc, char **argv) status = 0; STARTSTACKSTR(p); + pushstdin(); + goto start; for (;;) { - switch (read(0, &c, 1)) { - case 1: - break; - default: - if (errno == EINTR && !pending_sig) - continue; - /* fall through */ - case 0: + int c; + + c = pgetc(); + if (c == PEOF) { status = 1; - goto out; + break; } if (c == '\0') continue; @@ -186,7 +185,7 @@ readcmd(int argc, char **argv) newloc = startloc - 1; } } -out: + popfile(); recordregion(startloc, p - (char *)stackblock(), 0); STACKSTRNUL(p); readcmd_handle_line(p + 1, argc - (ap - argv), ap); From b12f136cc7042c5dfbd552e4307a57238e027c38 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:29:13 +0800 Subject: [PATCH 136/401] builtin: Process multi-byte characters in read(1) Add support for multi-byte characters in read(1) by using getmbc from the parser. Signed-off-by: Herbert Xu --- src/miscbltin.c | 19 +++++++++++++------ src/parser.c | 2 +- src/parser.h | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/miscbltin.c b/src/miscbltin.c index 10d256e..5aa2b24 100644 --- a/src/miscbltin.c +++ b/src/miscbltin.c @@ -36,15 +36,16 @@ * Miscelaneous builtins. */ +#include +#include +#include +#include #include /* quad_t */ #include /* BSD4_4 */ #include #include #include #include -#include -#include -#include #include "error.h" #include "expand.h" @@ -151,8 +152,10 @@ readcmd(int argc, char **argv) goto start; for (;;) { + unsigned ml; int c; + CHECKSTRSPACE((MB_LEN_MAX > 16 ? MB_LEN_MAX : 16) + 4, p); c = pgetc(); if (c == PEOF) { status = 1; @@ -160,9 +163,14 @@ readcmd(int argc, char **argv) } if (c == '\0') continue; + ml = getmbc(c, p, 0); + if (ml) { + p += ml; + goto record; + } if (newloc >= startloc) { if (c == '\n') - goto resetbs; + goto record; goto put; } if (!rflag && c == '\\') { @@ -172,13 +180,12 @@ readcmd(int argc, char **argv) if (c == '\n') break; put: - CHECKSTRSPACE(2, p); if (strchr(qchars, c)) USTPUTC(CTLESC, p); USTPUTC(c, p); +record: if (newloc >= startloc) { -resetbs: recordregion(startloc, newloc, 0); start: startloc = p - (char *)stackblock(); diff --git a/src/parser.c b/src/parser.c index 1a2f5df..a4d3291 100644 --- a/src/parser.c +++ b/src/parser.c @@ -883,7 +883,7 @@ static void synstack_pop(struct synstack **stack) *stack = (*stack)->next; } -static unsigned getmbc(int c, char *out, int mode) +unsigned getmbc(int c, char *out, int mode) { char *const start = out; mbstate_t mbst = {}; diff --git a/src/parser.h b/src/parser.h index 14bfc4f..7a9605b 100644 --- a/src/parser.h +++ b/src/parser.h @@ -95,6 +95,7 @@ const char *getprompt(void *); const char *const *findkwd(const char *); char *endofname(const char *); const char *expandstr(const char *); +unsigned getmbc(int c, char *out, int mode); static inline int goodname(const char *p) From a065467315dc5f2f312f7e6f1d2e9d05a77b3cb7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:31:32 +0800 Subject: [PATCH 137/401] input: Move newline loop into preadbuffer As it stands preadfd tries to fetch a whole line when we're reading one byte at a time. However, this is wrong because how many bytes we're trying to read has nothing to do with whether we get a whole line or not. Move the loop into preadbuffer alongside the other history support code. Signed-off-by: Herbert Xu --- src/input.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/input.c b/src/input.c index 6779069..193235d 100644 --- a/src/input.c +++ b/src/input.c @@ -192,16 +192,27 @@ preadfd(void) { char *buf = parsefile->buf; int unget; + int pnr; int nr; + nr = input_get_lleft(parsefile); + unget = parsefile->nextc - buf; if (unget > PUNGETC_MAX) unget = PUNGETC_MAX; - memmove(buf, parsefile->nextc - unget, unget); - parsefile->nextc = buf += unget; + memmove(buf, parsefile->nextc - unget, unget + nr); + buf += unget; + parsefile->nextc = buf; + buf += nr; + + nr = BUFSIZ - nr; + if (!IS_DEFINED_SMALL && !nr) + return nr; + pnr = nr; retry: + nr = pnr; #ifndef SMALL if (parsefile->fd == 0 && el) { static const char *rl_cp; @@ -216,9 +227,8 @@ preadfd(void) if (rl_cp == NULL) nr = 0; else { - nr = el_len; - if (nr > BUFSIZ) - nr = BUFSIZ; + if (nr > el_len) + nr = el_len; memcpy(buf, rl_cp, nr); if (nr != el_len) { el_len -= nr; @@ -230,10 +240,8 @@ preadfd(void) } else #endif if (parsefile->fd) - nr = read(parsefile->fd, buf, BUFSIZ); + nr = read(parsefile->fd, buf, nr); else { - unsigned len = BUFSIZ; - nr = 0; do { @@ -255,7 +263,7 @@ preadfd(void) } nr++; - } while (!IS_DEFINED_SMALL && *buf++ != '\n' && --len); + } while (0); } if (nr < 0) { @@ -290,19 +298,26 @@ static int preadbuffer(void) return PEOF; flushall(); + q = parsefile->nextc; + something = !first; + more = input_get_lleft(parsefile); if (more <= 0) { + int nr; + again: - if ((more = preadfd()) <= 0) { + nr = q - parsefile->nextc; + more = preadfd(); + q = parsefile->nextc + nr; + if (more <= 0) { input_set_lleft(parsefile, parsefile->nleft = 0); + if (!IS_DEFINED_SMALL && nr > 0) + goto save; return PEOF; } } - q = parsefile->nextc; - /* delete nul characters */ - something = !first; for (;;) { int c; @@ -321,7 +336,6 @@ static int preadbuffer(void) switch (c) { case '\n': - parsefile->nleft = q - parsefile->nextc - 1; goto done; default: @@ -335,8 +349,7 @@ static int preadbuffer(void) check: if (more <= 0) { - parsefile->nleft = q - parsefile->nextc - 1; - if (parsefile->nleft < 0) + if (!IS_DEFINED_SMALL) goto again; break; } @@ -344,6 +357,8 @@ static int preadbuffer(void) done: input_set_lleft(parsefile, more); +save: + parsefile->nleft = q - parsefile->nextc - 1; if (!IS_DEFINED_SMALL) savec = *q; *q = '\0'; From b1864ee9f2418f84d918ef101dab972ecf4f852f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:31:35 +0800 Subject: [PATCH 138/401] input: Use lseek on stdin when possible For files that can be sought, use lseek instead of reading one byte at a time. Signed-off-by: Herbert Xu --- src/eval.c | 3 ++ src/init.h | 1 + src/input.c | 106 +++++++++++++++++++++++++++++++++++--------------- src/input.h | 4 ++ src/jobs.c | 3 ++ src/mkinit.c | 6 +++ src/options.c | 2 +- src/redir.c | 20 ++++++---- src/trap.c | 1 + 9 files changed, 107 insertions(+), 39 deletions(-) diff --git a/src/eval.c b/src/eval.c index 140a734..ad2aa03 100644 --- a/src/eval.c +++ b/src/eval.c @@ -907,6 +907,8 @@ evalcommand(union node *cmd, int flags) goto bail; default: + flush_input(); + /* Fork off a child process if necessary. */ if (!(flags & EV_EXIT) || have_traps()) { INTOFF; @@ -1142,6 +1144,7 @@ execcmd(int argc, char **argv) iflag = 0; /* exit on error */ mflag = 0; optschanged(); + flush_input(); shellexec(argv + 1, pathval(), 0); } return 0; diff --git a/src/init.h b/src/init.h index 4f98b5d..e117895 100644 --- a/src/init.h +++ b/src/init.h @@ -39,4 +39,5 @@ union node; void init(void); void exitreset(void); void forkreset(union node *); +void postexitreset(void); void reset(void); diff --git a/src/input.c b/src/input.c index 193235d..b84ecec 100644 --- a/src/input.c +++ b/src/input.c @@ -32,11 +32,13 @@ * SUCH DAMAGE. */ -#include /* defines BUFSIZ */ #include -#include +#include +#include /* defines BUFSIZ */ #include #include +#include +#include /* * This file implements the input routines used by the parser. @@ -59,12 +61,21 @@ #define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1) +struct stdin_state { + tcflag_t canon; + off_t seekable; + struct termios tios; +}; MKINIT struct parsefile basepf; /* top level input file */ MKINIT char basebuf[IBUFSIZ]; /* buffer for top level input file */ MKINIT struct parsefile *toppf = &basepf; +MKINIT struct stdin_state stdin_state; struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ +int stdin_istty; + +MKINIT void input_init(void); STATIC void pushfile(void); static void popstring(void); @@ -74,6 +85,7 @@ static int preadbuffer(void); #ifdef mkinit INCLUDE +INCLUDE INCLUDE INCLUDE "input.h" INCLUDE "error.h" @@ -82,6 +94,8 @@ INCLUDE "syntax.h" INIT { basepf.nextc = basepf.buf = basebuf; basepf.linno = 1; + + input_init(); } RESET { @@ -104,8 +118,32 @@ FORKRESET { parsefile->fd = 0; } } + +POSTEXITRESET { + flush_input(); +} #endif +void input_init(void) +{ + struct stdin_state *st = &stdin_state; + int istty; + + istty = tcgetattr(0, &st->tios) + 1; + st->seekable = istty ? 0 : lseek(0, 0, SEEK_CUR) + 1; + st->canon = istty ? st->tios.c_lflag & ICANON : 0; + stdin_istty = istty; +} + +static bool stdin_bufferable(void) +{ + struct stdin_state *st = &stdin_state; + + if (stdin_istty < 0) + input_init(); + + return st->canon || st->seekable; +} static void freestrings(struct strpush *sp) { @@ -191,6 +229,7 @@ static int preadfd(void) { char *buf = parsefile->buf; + int fd = parsefile->fd; int unget; int pnr; int nr; @@ -214,7 +253,7 @@ preadfd(void) retry: nr = pnr; #ifndef SMALL - if (parsefile->fd == 0 && el) { + if (fd == 0 && el) { static const char *rl_cp; static int el_len; @@ -237,38 +276,23 @@ preadfd(void) rl_cp = 0; } - } else + return nr; + } #endif - if (parsefile->fd) - nr = read(parsefile->fd, buf, nr); - else { - nr = 0; - - do { - int err; - - err = read(0, buf, 1); - if (err <= 0) { - if (nr) - break; - - nr = err; - if (errno != EWOULDBLOCK) - break; - if (stdin_clear_nonblock() < 0) - break; - - out2str("sh: turning off NDELAY mode\n"); - goto retry; - } - nr++; - } while (0); - } + if (!fd && !stdin_bufferable()) + nr = 1; + + nr = read(fd, buf, nr); if (nr < 0) { if (errno == EINTR && !(basepf.prev && pending_sig)) goto retry; + if (fd == 0 && errno == EWOULDBLOCK && + stdin_clear_nonblock() >= 0) { + out2str("sh: turning off NDELAY mode\n"); + goto retry; + } } return nr; } @@ -302,6 +326,8 @@ static int preadbuffer(void) something = !first; more = input_get_lleft(parsefile); + + INTOFF; if (more <= 0) { int nr; @@ -313,6 +339,7 @@ static int preadbuffer(void) input_set_lleft(parsefile, parsefile->nleft = 0); if (!IS_DEFINED_SMALL && nr > 0) goto save; + INTON; return PEOF; } } @@ -365,11 +392,10 @@ static int preadbuffer(void) if (parsefile->fd == 0 && hist && something) { HistEvent he; - INTOFF; history(hist, &he, first ? H_ENTER : H_APPEND, parsefile->nextc); - INTON; } + INTON; if (vflag) { out2str(parsefile->nextc); @@ -590,3 +616,21 @@ popallfiles(void) { unwindfiles(toppf); } + +void __attribute__((noinline)) flush_input(void) +{ + int left = basepf.nleft + input_get_lleft(&basepf); + + if (stdin_state.seekable && left) { + INTOFF; + lseek(0, -left, SEEK_CUR); + input_set_lleft(&basepf, basepf.nleft = 0); + INTON; + } +} + +void reset_input(void) +{ + flush_input(); + stdin_istty = -1; +} diff --git a/src/input.h b/src/input.h index c59d784..af1c1be 100644 --- a/src/input.h +++ b/src/input.h @@ -35,6 +35,7 @@ */ #include +#include #ifdef SMALL #define IS_DEFINED_SMALL 1 @@ -94,6 +95,7 @@ struct parsefile { }; extern struct parsefile *parsefile; +extern int stdin_istty; /* * The input line number. Input.c just defines this variable, and saves @@ -113,6 +115,8 @@ void pushstdin(void); void popfile(void); void unwindfiles(struct parsefile *); void popallfiles(void); +void flush_input(void); +void reset_input(void); static inline int input_get_lleft(struct parsefile *pf) { diff --git a/src/jobs.c b/src/jobs.c index 02ec6f4..247fab5 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -969,6 +969,9 @@ forkshell(struct job *jp, union node *n, int mode) int pid; TRACE(("forkshell(%%%d, %p, %d) called\n", jobno(jp), n, mode)); + + flush_input(); + pid = fork(); if (pid == 0) forkchild(jp, n, mode); diff --git a/src/mkinit.c b/src/mkinit.c index 870b64d..2514ebf 100644 --- a/src/mkinit.c +++ b/src/mkinit.c @@ -119,6 +119,11 @@ char forkreset[] = "\ * This routine is called when we enter a subshell.\n\ */\n"; +char postexitreset[] = "\ +/*\n\ + * This routine is called in exitshell.\n\ + */\n"; + char reset[] = "\ /*\n\ * This routine is called when an error or an interrupt occurs in an\n\ @@ -130,6 +135,7 @@ struct event event[] = { {"INIT", "init", init}, {"EXITRESET", "exitreset", exitreset}, {"FORKRESET", "forkreset", forkreset, "union node *n"}, + {"POSTEXITRESET", "postexitreset", postexitreset}, {"RESET", "reset", reset}, {NULL, NULL} }; diff --git a/src/options.c b/src/options.c index 34e500a..c74e4fe 100644 --- a/src/options.c +++ b/src/options.c @@ -142,7 +142,7 @@ procargs(int argc, char **argv) sh_error("-c requires an argument"); sflag = 1; } - if (iflag == 2 && sflag == 1 && isatty(0) && isatty(1)) + if (iflag == 2 && sflag == 1 && stdin_istty && isatty(1)) iflag = 1; if (mflag == 2) mflag = iflag; diff --git a/src/redir.c b/src/redir.c index c57d745..8d1c8f6 100644 --- a/src/redir.c +++ b/src/redir.c @@ -46,16 +46,17 @@ * Code for dealing with input/output redirection. */ +#include "error.h" +#include "expand.h" +#include "input.h" +#include "jobs.h" #include "main.h" -#include "shell.h" +#include "memalloc.h" #include "nodes.h" -#include "jobs.h" #include "options.h" -#include "expand.h" -#include "redir.h" #include "output.h" -#include "memalloc.h" -#include "error.h" +#include "redir.h" +#include "shell.h" #include "system.h" #include "trap.h" @@ -142,6 +143,8 @@ redirect(union node *redir, int flags) continue; fd = n->nfile.fd; + if (fd == 0) + reset_input(); if (sv) { int closed; @@ -415,8 +418,11 @@ popredir(int drop) close(i); break; default: - if (!drop) + if (!drop) { + if (i == 0) + reset_input(); dup2(rp->renamed[i], i); + } close(rp->renamed[i]); break; } diff --git a/src/trap.c b/src/trap.c index 4eb9849..aebffa0 100644 --- a/src/trap.c +++ b/src/trap.c @@ -426,6 +426,7 @@ exitshell(void) } out: exitreset(); + postexitreset(); /* * Disable job control so that whoever had the foreground before we * started can get it back. From 44b15ea09a9ee5872cf477e4ffc6b42ef37d1e46 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:31:37 +0800 Subject: [PATCH 139/401] input: Use tee(2) for stdin pipe use tee(2) to peek at pipes in order to avoid reading one byte at a time. Signed-off-by: Herbert Xu --- configure.ac | 2 +- src/input.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++------ src/system.h | 7 ++++++ 3 files changed, 72 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 9b4db72..338d5bd 100644 --- a/configure.ac +++ b/configure.ac @@ -89,7 +89,7 @@ dnl Checks for library functions. AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \ memfd_create mempcpy \ sigsetmask stpcpy strchrnul strsignal strtod strtoimax \ - strtoumax sysconf) + strtoumax sysconf tee) dnl Check whether it's worth working around FreeBSD PR kern/125009. dnl The traditional behavior of access/faccessat is crazy, but diff --git a/src/input.c b/src/input.c index b84ecec..8f8c173 100644 --- a/src/input.c +++ b/src/input.c @@ -57,14 +57,18 @@ #include "redir.h" #include "shell.h" #include "syntax.h" +#include "system.h" #include "trap.h" #define IBUFSIZ (BUFSIZ + PUNGETC_MAX + 1) +MKINIT struct stdin_state { tcflag_t canon; off_t seekable; struct termios tios; + int pip[2]; + int pending; }; MKINIT struct parsefile basepf; /* top level input file */ @@ -85,6 +89,7 @@ static int preadbuffer(void); #ifdef mkinit INCLUDE +INCLUDE INCLUDE INCLUDE INCLUDE "input.h" @@ -117,6 +122,11 @@ FORKRESET { close(parsefile->fd); parsefile->fd = 0; } + if (stdin_state.pip[0]) { + close(stdin_state.pip[0]); + close(stdin_state.pip[1]); + memset(stdin_state.pip, 0, sizeof(stdin_state.pip)); + } } POSTEXITRESET { @@ -145,6 +155,43 @@ static bool stdin_bufferable(void) return st->canon || st->seekable; } +static void flush_tee(void *buf, int nr, int pending) +{ + while (pending > 0) { + int err; + + err = read(0, buf, nr > pending ? pending : nr); + if (err > 0) + pending -= err; + } +} + +static int stdin_tee(void *buf, int nr) +{ + int err; + + if (stdin_istty) + return 0; + + if (!stdin_state.pip[0]) { + err = pipe(stdin_state.pip); + if (err < 0) + return err; + if (stdin_state.pip[0] < 10) + stdin_state.pip[0] = savefd(stdin_state.pip[0], + stdin_state.pip[0]); + if (stdin_state.pip[1] < 10) + stdin_state.pip[1] = savefd(stdin_state.pip[1], + stdin_state.pip[1]); + } + + flush_tee(buf, nr, stdin_state.pending); + + err = tee(0, stdin_state.pip[1], nr, 0); + stdin_state.pending = err; + return err; +} + static void freestrings(struct strpush *sp) { INTOFF; @@ -280,10 +327,17 @@ preadfd(void) } #endif - if (!fd && !stdin_bufferable()) - nr = 1; + if (!fd && !stdin_bufferable()) { + nr = stdin_tee(buf, nr); + fd = stdin_state.pip[0]; + if (nr <= 0) { + fd = 0; + nr = 1; + } + } - nr = read(fd, buf, nr); + if (nr >= 0) + nr = read(fd, buf, nr); if (nr < 0) { if (errno == EINTR && !(basepf.prev && pending_sig)) @@ -621,12 +675,15 @@ void __attribute__((noinline)) flush_input(void) { int left = basepf.nleft + input_get_lleft(&basepf); - if (stdin_state.seekable && left) { - INTOFF; + INTOFF; + if (stdin_state.seekable && left) lseek(0, -left, SEEK_CUR); - input_set_lleft(&basepf, basepf.nleft = 0); - INTON; + else if (stdin_state.pending > left) { + flush_tee(basebuf, BUFSIZ, stdin_state.pending - left); + stdin_state.pending = 0; } + input_set_lleft(&basepf, basepf.nleft = 0); + INTON; } void reset_input(void) diff --git a/src/system.h b/src/system.h index 6b31d52..e7f968b 100644 --- a/src/system.h +++ b/src/system.h @@ -118,6 +118,13 @@ long sysconf(int) __attribute__((__noreturn__)); int isblank(int c); #endif +#ifndef HAVE_TEE +static inline ssize_t tee(int fd_in, int fd_out, size_t len, unsigned int flags) +{ + return -1; +} +#endif + #ifndef HAVE_FNMATCH static inline int fnmatch(const char *pattern, const char *string, int flags) { From 8d0eca2d9fb5f5ed747fbdbe64bf04ccad1568e8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 2 Jun 2024 09:32:46 +0800 Subject: [PATCH 140/401] expand: Rewrite expmeta meta detection Remove the meta detection in expandmeta and rely on the detection in expmeta instead. Replace the open-coded meta detection with one based on strpbrk. This is slightly inaccurate with bracket expressions but the difference is minor (only affecting patterns with an unquoted ']'). Move int_pending to the end of the loop so that it is only executed after some work has been done. Signed-off-by: Herbert Xu --- src/expand.c | 116 +++++++++++++++++++-------------------------------- 1 file changed, 43 insertions(+), 73 deletions(-) diff --git a/src/expand.c b/src/expand.c index cb579df..345c498 100644 --- a/src/expand.c +++ b/src/expand.c @@ -143,7 +143,6 @@ STATIC void addfname(char *); STATIC int patmatch(char *, const char *); STATIC int pmatch(char *, const char *); static size_t cvtnum(intmax_t num, int flags); -STATIC size_t esclen(const char *, const char *); STATIC void varunset(const char *, const char *, const char *, int) __attribute__((__noreturn__)); @@ -166,16 +165,19 @@ preglob(const char *pattern, int flag) { } -STATIC size_t -esclen(const char *start, const char *p) { +static size_t mesclen(const char *start, const char *p, char mesc) { size_t esc = 0; - while (p > start && *--p == (char)CTLESC) { + while (p > start && *--p == mesc) { esc++; } return esc; } +static size_t esclen(const char *start, const char *p) { + return mesclen(start, p, CTLESC); +} + static __attribute__((noinline)) unsigned mbnext(const char *p) { unsigned start = 0; @@ -1544,9 +1546,6 @@ static void addglob(const glob64_t *pglob) STATIC void expandmeta(struct strlist *str) { - static const char metachars[] = { - '*', '?', '[', 0 - }; /* TODO - EXP_REDIR */ if (GLOB_IS_ENABLED) @@ -1560,7 +1559,7 @@ expandmeta(struct strlist *str) if (fflag) goto nometa; - if (!strpbrk(str->text, metachars)) + if (!strpbrk(str->text, "*?]")) goto nometa; savelastp = exparg.lastp; @@ -1634,51 +1633,29 @@ static char *expmeta_rmescapes(char *enddir, const char *name) return enddir - 1; } -static int skipesc(char *p) -{ - unsigned short mb; - int esc = 0; - - mb = mbnext(p); - if ((mb >> 8) > 3) - return (mb & 0xff) + (mb >> 8) - 1; - - esc = mb & 0xff; - - if (!esc && p[esc] == '\\' && p[esc + 1]) { - esc++; - mb = mbnext(p + esc); - esc += mb & 0xff; - - if ((mb >> 8) > 3) - esc += (mb >> 8) - 1; - } - - return esc; -} - /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) { + const char mesc = FNMATCH_IS_ENABLED ? '\\' : CTLESC; struct jmploc *volatile savehandler; struct jmploc jmploc; struct stat64 statb; struct dirent64 *dp; volatile int err; char *endname; + char *zeroedp; char *enddir; - int metaflag; int matchdot; + unsigned esc; char *start; size_t len; DIR *dirp; char *pat; char *cp; char *p; - int esc; int c; *(DIR *volatile *)&dirp = NULL; @@ -1690,32 +1667,16 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) cp = growstackto(len); enddir = cp + expdir_len; - metaflag = 0; - start = name; - for (p = name; esc = 0, *p; p += esc + 1) { - if (*p == '*' || *p == '?') - metaflag = 1; - else if (*p == '[') { - char *q = p + 1; - for (;;) { - q += skipesc(q); - if (*q == '/' || *q == '\0') - break; - if (*++q == ']') { - metaflag = 1; - break; - } - } - } else { - esc = skipesc(p); - if (p[esc] == '/') { - if (metaflag) - break; - start = p + esc + 1; - } - } - } - if (metaflag == 0) { /* we've reached the end of the file name */ + p = name; + esc = 0; + do { + p = strpbrk(p + esc, "*?]"); + if (!p) + break; + esc = mesclen(name, p, mesc) & 1; + } while (esc); + /* No meta characters */ + if (likely(!p)) { if (!expdir_len) goto out_opendir; enddir = expmeta_rmescapes(enddir, name); @@ -1723,37 +1684,44 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) cp = addfnamealt(enddir, expdir_len); goto out_opendir; } - endname = p; - if (name < start) { - c = *start; + start = memrchr(name, '/', p - name); + if (start) { + c = *++start; *start = 0; enddir = expmeta_rmescapes(enddir, name); *start = c; - } + expdir_len = enddir - cp; + } else + start = name; *enddir = 0; - expdir_len = enddir - cp; *(DIR *volatile *)&dirp = opendir(expdir_len ? cp : dotdir); if (!dirp) goto out_opendir; - c = *endname; - if (c) { - *endname = '\0'; - endname += esc + 1; + esc = 0; + p = strchrnul(p + 1, '/'); + zeroedp = p; + endname = p; + if (*p) { + esc = mesclen(name, p, mesc) & 1; + zeroedp -= esc; + endname++; } + c = *zeroedp; + *zeroedp = '\0'; name_len -= endname - name; matchdot = 0; pat = start; p = pat; - if (*p == (FNMATCH_IS_ENABLED ? '\\' : (char)CTLESC)) + if (*p == mesc) p++; if (*p == '.') matchdot++; - while (! int_pending() && (dp = readdir64(dirp)) != NULL) { + while ((dp = readdir64(dirp))) { char *dname = dp->d_name; if (*dname == '.' && !matchdot) - continue; + goto check_int; len = strlen(dname) + 1; p = dname; if (!FNMATCH_IS_ENABLED) { @@ -1774,9 +1742,11 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) } enddir = cp + expdir_len; } +check_int: + if (int_pending()) + break; } - if (c) - endname[-esc - 1] = c; + *zeroedp = c; out: closedir(*(DIR *volatile *)&dirp); From 636a856232767b1fdf2396e2073ba859924a7d7f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 8 Jun 2024 11:42:53 +0800 Subject: [PATCH 141/401] expand: Check d_type in expmeta before recursing If the directory pointer is not a directory, a symlink or an unknown entity, do not recurse into expmeta. Signed-off-by: Herbert Xu --- src/expand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/expand.c b/src/expand.c index 345c498..6912e39 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1722,6 +1722,9 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) if (*dname == '.' && !matchdot) goto check_int; + if (c && dp->d_type != DT_DIR && dp->d_type != DT_LNK && + dp->d_type != DT_UNKNOWN) + goto check_int; len = strlen(dname) + 1; p = dname; if (!FNMATCH_IS_ENABLED) { From 69786bc79f9bdacc7e37ae0bb226c31bb41440c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 9 Jun 2024 16:18:40 +0800 Subject: [PATCH 142/401] input: Fix pungetc on PEOF Calling pungetc upon PEOF must cause the next pgetc call to return PEOF. This was broken by the multi-byte pungetc patch. Fix it by adding the EOF logic to pgetc. Note that pungetn will always disregard the PEOF. Fixes: 2c92409145d0 ("input: Allow MB_LEN_MAX calls to pungetc") Signed-off-by: Herbert Xu --- src/input.c | 32 ++++++++++++++------------------ src/input.h | 8 +++++--- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/input.c b/src/input.c index 8f8c173..6388b83 100644 --- a/src/input.c +++ b/src/input.c @@ -224,15 +224,13 @@ static int __pgetc(void) if (parsefile->unget) { long unget = -(long)(unsigned)parsefile->unget--; - if (parsefile->nleft < 0) - return preadbuffer(); - return parsefile->nextc[unget]; } - if (--parsefile->nleft >= 0) + if (parsefile->nleft > 0) { + parsefile->nleft--; c = (signed char)*parsefile->nextc++; - else + } else c = preadbuffer(); return c; @@ -372,8 +370,11 @@ static int preadbuffer(void) popstring(); return __pgetc(); } - if (parsefile->buf == NULL) + if (parsefile->eof & 2) { +eof: + parsefile->eof = 3; return PEOF; + } flushall(); q = parsefile->nextc; @@ -394,7 +395,7 @@ static int preadbuffer(void) if (!IS_DEFINED_SMALL && nr > 0) goto save; INTON; - return PEOF; + goto eof; } } @@ -477,7 +478,8 @@ void pungetn(int n) void pungetc(void) { - pungetn(1); + pungetn(1 - (parsefile->eof & 1)); + parsefile->eof &= ~1; } /* @@ -575,8 +577,6 @@ setinputfd(int fd, int push) toppf = parsefile; parsefile->fd = fd; parsefile->nextc = parsefile->buf = ckmalloc(IBUFSIZ); - input_set_lleft(parsefile, parsefile->nleft = 0); - plinno = 1; } @@ -591,8 +591,7 @@ setinputstring(char *string) pushfile(); parsefile->nextc = string; parsefile->nleft = strlen(string); - parsefile->buf = NULL; - plinno = 1; + parsefile->eof = 2; INTON; } @@ -609,12 +608,10 @@ pushfile(void) struct parsefile *pf; pf = (struct parsefile *)ckmalloc(sizeof (struct parsefile)); + memset(pf, 0, sizeof(*pf)); pf->prev = parsefile; + pf->linno = 1; pf->fd = -1; - pf->strpush = NULL; - pf->spfree = NULL; - pf->basestrpush.prev = NULL; - pf->unget = 0; parsefile = pf; } @@ -639,8 +636,7 @@ popfile(void) if (pf->fd >= 0) close(pf->fd); - if (pf->buf) - ckfree(pf->buf); + ckfree(pf->buf); if (parsefile->spfree) freestrings(parsefile->spfree); while (pf->strpush) { diff --git a/src/input.h b/src/input.h index af1c1be..706ac73 100644 --- a/src/input.h +++ b/src/input.h @@ -79,9 +79,7 @@ struct parsefile { int linno; /* current line */ int fd; /* file descriptor (or -1 if string) */ int nleft; /* number of chars left in this line */ -#ifndef SMALL - int lleft; /* number of chars left in this buffer */ -#endif + int eof; /* do not read again once we hit EOF */ char *nextc; /* next char in buffer */ char *buf; /* input buffer */ struct strpush *strpush; /* for pushing strings at this level */ @@ -90,6 +88,10 @@ struct parsefile { /* Delay freeing so we can stop nested aliases. */ struct strpush *spfree; +#ifndef SMALL + int lleft; /* number of chars left in this buffer */ +#endif + /* Number of outstanding calls to pungetc. */ int unget; }; From 5cf3bf8f0dd741b1c2a13aa5f03b5445efc856f5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 9 Jun 2024 16:19:08 +0800 Subject: [PATCH 143/401] parser: Fix PEOF infinite loop in getmbc Bail out of getmbc if the first character is PEOF. Fixes: 6c44f4ee0c2b ("parser: Add support for multi-byte characters") Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index a4d3291..3d21894 100644 --- a/src/parser.c +++ b/src/parser.c @@ -892,7 +892,7 @@ unsigned getmbc(int c, char *out, int mode) wchar_t wc; char *mbc; - if (likely(c >= 0)) + if (likely(c >= 0 || c <= PEOF)) return 0; mbc = (mode & 3) < 2 ? out + 2 + (mode == 1) : out; From ab6eaa3409654c377275faa1d0c65aebad2270ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 10 Jun 2024 14:45:06 +0800 Subject: [PATCH 144/401] parser: Move non-variable case in parsesub to end Move the rare case of a literal dollar sign to the end of the parsesub block. This eliminates a duplicate USTPUTC call. Signed-off-by: Herbert Xu --- src/parser.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/parser.c b/src/parser.c index 3d21894..b711d6c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1298,15 +1298,9 @@ parsesub: { char *p; static const char types[] = "}-+?="; - c = pgetc_eatbnl(); - if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) { - USTPUTC('$', out); - pungetc(); - goto parsesub_return; - } - USTPUTC('$', out); + c = pgetc_eatbnl(); if (c == '(') { /* $(command) or $((arith)) */ USTPUTC(c, out); if (pgetc_eatbnl() == '(') { @@ -1315,7 +1309,7 @@ parsesub: { pungetc(); PARSEBACKQNEW(); } - } else { + } else if (c == '{' || is_name(c) || is_special(c)) { const char *newsyn = synstack->syntax; typeloc = out - (char *)stackblock(); @@ -1441,7 +1435,9 @@ parsesub: { *((char *)stackblock() + typeloc) = subtype | VSBIT; STPUTC('=', out); } - } + } else + pungetc(); + goto parsesub_return; } From f14979510908b7ce4ff5dd6b43fe445bc6d646a1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 10 Jun 2024 14:45:08 +0800 Subject: [PATCH 145/401] parser: Merge first and last chkeofmark branches in parsesub Elminate the first chkeofmark branch by moving the CTLVAR to the end of the parsesub block and always doing STADJUST. Signed-off-by: Herbert Xu --- src/parser.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/parser.c b/src/parser.c index b711d6c..2517721 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1293,10 +1293,9 @@ parseredir: { */ parsesub: { + static const char types[] = "}-+?="; int subtype; - int typeloc; char *p; - static const char types[] = "}-+?="; USTPUTC('$', out); @@ -1310,13 +1309,10 @@ parsesub: { PARSEBACKQNEW(); } } else if (c == '{' || is_name(c) || is_special(c)) { + int typeloc = out - (char *)stackblock(); const char *newsyn = synstack->syntax; - typeloc = out - (char *)stackblock(); - if (!chkeofmark) { - out[-1] = CTLVAR; - STADJUST(1, out); - } + STADJUST(!chkeofmark, out); subtype = VSNORMAL; if (likely(c == '{')) { if (chkeofmark) @@ -1432,7 +1428,10 @@ parsesub: { synstack->dqvarnest++; } if (!chkeofmark) { - *((char *)stackblock() + typeloc) = subtype | VSBIT; + char *p = stackblock(); + + p[typeloc - 1] = CTLVAR; + p[typeloc] = subtype | VSBIT; STPUTC('=', out); } } else From 776424a8f9158bfe9f53aa55f931af9f73437caf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 10 Jun 2024 14:45:11 +0800 Subject: [PATCH 146/401] parser: Add dollar single quote Add support for $' quoting, including \u and \U. The code is shared with printf, so printf (both format and %b) will recognise the new escape codes (except \c) too. Signed-off-by: Herbert Xu --- src/bltin/printf.c | 156 +++++++++++++++++++++++++++++++++++---------- src/parser.c | 77 ++++++++++++++++++---- src/system.h | 3 + 3 files changed, 193 insertions(+), 43 deletions(-) diff --git a/src/bltin/printf.c b/src/bltin/printf.c index 7785735..2c18e93 100644 --- a/src/bltin/printf.c +++ b/src/bltin/printf.c @@ -29,8 +29,7 @@ * SUCH DAMAGE. */ -#include - +#include #include #include #include @@ -38,10 +37,10 @@ #include #include #include +#include #include static int conv_escape_str(char *, char **); -static char *conv_escape(char *, int *); static int getchr(void); static double getdouble(void); static uintmax_t getuintmax(int); @@ -56,6 +55,7 @@ static char **gargv; #define octtobin(c) ((c) - '0') #include "bltin.h" +#include "parser.h" #include "system.h" #define PF(f, func) { \ @@ -164,13 +164,17 @@ int printfcmd(int argc, char *argv[]) int *param; if (ch == '\\') { - int c_ch; - fmt = conv_escape(fmt, &c_ch); - ch = c_ch; - goto pc; + unsigned ret; + char *cp; + + STARTSTACKSTR(cp); + CHECKSTRSPACE(4, cp); + ret = conv_escape(fmt, cp, false); + fmt += ret >> 4; + out1mem(cp, ret & 15); + continue; } if (ch != '%' || (*fmt == '%' && (++fmt || 1))) { -pc: putchar(ch); continue; } @@ -275,58 +279,69 @@ int printfcmd(int argc, char *argv[]) static int conv_escape_str(char *str, char **sp) { - int c; - int ch; char *cp; + int c; /* convert string into a temporary buffer... */ STARTSTACKSTR(cp); do { - c = ch = *str++; - if (ch != '\\') - continue; + unsigned ret; + int ch; + + CHECKSTRSPACE(4, cp); c = *str++; - if (c == 'c') { - /* \c as in SYSV echo - abort all processing.... */ - c = ch = 0x100; + if (c != '\\') { +putchar: + USTPUTC(c, cp); continue; } + ch = *str; + if (ch == 'c') { + /* \c as in SYSV echo - abort all processing.... */ + c = 0x100; + goto putchar; + } + /* * %b string octal constants are not like those in C. * They start with a \0, and are followed by 0, 1, 2, * or 3 octal digits. */ - if (c == '0' && isodigit(*str)) + if (ch == '0' && isodigit(str[1])) str++; /* Finally test for sequences valid in the format string */ - str = conv_escape(str - 1, &c); - } while (STPUTC(c, cp), (char)ch); + ret = conv_escape(str, cp, false); + str += ret >> 4; + cp += ret & 15; + } while (c & 0xff); *sp = cp; - return ch; + return c; } /* * Print "standard" escape characters */ -static char * -conv_escape(char *str, int *conv_ch) +unsigned conv_escape(char *str0, char *out0, bool mbchar) { - int value; + char *out = out0; + char *str = str0; + unsigned value; int ch; ch = *str; switch (ch) { default: - if (!isodigit(*str)) { - value = '\\'; - goto out; + if (!isodigit(ch)) { + value = ch ?: '\\'; + str -= !ch; + break; } ch = 3; @@ -334,12 +349,88 @@ conv_escape(char *str, int *conv_ch) do { value <<= 3; value += octtobin(*str++); - } while (isodigit(*str) && --ch); - goto out; + } while (--ch && isodigit(*str)); + str--; + break; + + case 'x': + ch = 2; + +hex: + value = 0; + do { + int c = *++str; + int d; + + if (c >= '0' && c <= '9') + d = c - '0'; + else { + int cl; + + cl = c & ~0x20; + if (cl >= 'A' && cl <= 'F') + d = cl - 'A' + 10; + else { + str--; + break; + } + } + + value <<= 4; + value += d; + } while (--ch); + + if (value < 0x80) + break; + + if (value < 0x110000) { + int mboff = (mbchar - 1) * 2; + unsigned uni = value; + int len; + + value = 0x80 << 8 | (value & 0xfc0) << 2 | + 0x80 | (value & 0x3f); + + if (uni < 0x800) { + value |= 0x40 << 8; + len = 2; + } else { + value |= 0x80 << 16 | (uni & 0x3f000) << 4; + if (uni < 0x10000) { + value |= 0x60 << 16; + len = 3; + } else { + value |= 0xf0 << 24 | + (uni & ~0x3ffff) << 6; + len = 4; + } + } + + value = htonl(value << (4 - len) * 8); + + USTPUTC(CTLMBCHAR, out); + USTPUTC(len, out); + STADJUST(mboff, out); + *(uint32_t *)out = value; + STADJUST(len, out); + USTPUTC(len, out); + USTPUTC(CTLMBCHAR, out); + STADJUST(mboff, out); + } + + goto out_noput; + + case 'u': + ch = 4; + goto hex; + + case 'U': + ch = 8; + goto hex; - case '\\': value = '\\'; break; /* backslash */ case 'a': value = '\a'; break; /* alert */ case 'b': value = '\b'; break; /* backspace */ + case 'e': value = '\033'; break; /* */ case 'f': value = '\f'; break; /* form-feed */ case 'n': value = '\n'; break; /* newline */ case 'r': value = '\r'; break; /* carriage-return */ @@ -347,10 +438,11 @@ conv_escape(char *str, int *conv_ch) case 'v': value = '\v'; break; /* vertical-tab */ } + USTPUTC(value, out); + +out_noput: str++; -out: - *conv_ch = value; - return str; + return (out - out0) | (str - str0) << 4; } static char * diff --git a/src/parser.c b/src/parser.c index 2517721..d1bec58 100644 --- a/src/parser.c +++ b/src/parser.c @@ -931,6 +931,46 @@ unsigned getmbc(int c, char *out, int mode) return 0; } +static char *dollarsq_escape(char *out) +{ + /* 10 = length of UXXXXXXXX + NUL */ + char str[10]; + unsigned len; + char *p; + + for (len = 0; len < sizeof(str) - 1; len++) { + int c = pgetc(); + + if (c <= PEOF) + break; + + str[len] = c; + } + str[len] = 0; + + p = str; + if (*p != 'c') { + unsigned ret; + + ret = conv_escape(p, out, true); + p += ret >> 4; + out += ret & 15; + } else if (*++p) { + int conv_ch; + int c; + + c = (unsigned char)*p++; + + p += !((c ^ *p) | (c ^ '\\')); + + conv_ch = (c & ~((c & 0x40) >> 1)) ^ 0x40; + USTPUTC(conv_ch, out); + } + + pungetn(len - (p - str)); + return out; +} + /* * If eofmark is NULL, read a word or a redirection symbol. If eofmark * is not NULL, read a here document. In the latter case, eofmark is the @@ -953,21 +993,19 @@ unsigned getmbc(int c, char *out, int mode) STATIC int readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) { - int c = firstc; - char *out; - size_t len; - struct nodelist *bqlist; - int quotef; - int oldstyle; - /* syntax stack */ struct synstack synbase = { .syntax = syntax }; - struct synstack *synstack = &synbase; int chkeofmark = checkkwd & CHKEOFMARK; + struct synstack *synstack = &synbase; + struct nodelist *bqlist = NULL; + int dollarsq = 0; + int c = firstc; + int quotef = 0; + int oldstyle; + size_t len; + char *out; if (syntax == DQSYNTAX) synstack->dblquote = 1; - quotef = 0; - bqlist = NULL; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ @@ -1014,6 +1052,10 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CCTL: + if (c == dollarsq) { + out = dollarsq_escape(out); + break; + } if ((!eofmark) | synstack->dblquote | synstack->varnest) USTPUTC(CTLESC, out); @@ -1055,6 +1097,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CSQUOTE: +csquote: synstack->syntax = SQSYNTAX; quotemark: if (eofmark == NULL) { @@ -1075,6 +1118,14 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } if (synstack->dqvarnest == 0) { + if (likely(dollarsq)) { + char *p = stackblock(); + + *out = 0; + out = p + strlen(p); + dollarsq = 0; + } + synstack->syntax = BASESYNTAX; synstack->dblquote = 0; } @@ -1293,6 +1344,7 @@ parseredir: { */ parsesub: { + const char *newsyn = synstack->syntax; static const char types[] = "}-+?="; int subtype; char *p; @@ -1308,9 +1360,12 @@ parsesub: { pungetc(); PARSEBACKQNEW(); } + } else if (c == '\'' && newsyn['&']) { + STADJUST(-1, out); + dollarsq = '\\'; + goto csquote; } else if (c == '{' || is_name(c) || is_special(c)) { int typeloc = out - (char *)stackblock(); - const char *newsyn = synstack->syntax; STADJUST(!chkeofmark, out); subtype = VSNORMAL; diff --git a/src/system.h b/src/system.h index e7f968b..8cb4726 100644 --- a/src/system.h +++ b/src/system.h @@ -28,6 +28,7 @@ #include #include +#include #include #ifndef SSIZE_MAX @@ -188,3 +189,5 @@ static inline void globfree64(glob64_t *pglob) * code */ #define uninitialized_var(x) x = x + +unsigned conv_escape(char *str, char *out, bool mbchar); From 3e4868e03dd0934534022e0309315453f8953099 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 22 Jun 2024 20:19:22 +0800 Subject: [PATCH 147/401] expand: Fix leading white space regression in ifsbreakup When leading white spaces are detected in ifsbreakup ifsspc needs to be cleared. Reported-by: Martijn Dekker Fixes: c0674f487c7a ("expand: Support multi-byte characters during field splitting") Signed-off-by: Herbert Xu --- src/expand.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index 6912e39..5285b79 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1282,7 +1282,7 @@ static char *ifsbreakup_slow(struct ifs_state *ifst, struct arglist *arglist, /* Ignore IFS whitespace at start */ if (q == ifst->start && ifsspc) { ifst->start = p; - return p; + goto out_zero_ifsspc; } if (ifst->maxargs > 0 && !--ifst->maxargs) { ifst->r = q; @@ -1297,6 +1297,7 @@ static char *ifsbreakup_slow(struct ifs_state *ifst, struct arglist *arglist, return p; } +out_zero_ifsspc: ifst->ifsspc = 0; return p; } From 89b85ee84448fe392f1faafee1635d5719e5e7e3 Mon Sep 17 00:00:00 2001 From: Martijn Dekker Date: Mon, 24 Jun 2024 17:23:54 +0100 Subject: [PATCH 148/401] shell: Fix build on systems without memrchr(3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Op 22-06-2024 om 15:25 schreef Martijn Dekker: > memrchr(3) is non-standard, and has been ported from glibc to FreeBSD, NetSBD > and OpenBSD, but not to macOS, at least as of 12.7.5. So we need a test for > it. As far as I can tell, *name is a zero-terminated C string, so it should > work to use strrchr(3) as a fallback. Reading the code more closely, that's nonsense, because 'p' does not point to the end of the string if metacharacters are found. Guess the best we can do is provide a simple local fallback implementation of memrchr(3). Patch v2 attached. Signed-off-by: Herbert Xu --- configure.ac | 2 +- src/expand.c | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 338d5bd..ba4856a 100644 --- a/configure.ac +++ b/configure.ac @@ -87,7 +87,7 @@ AC_CHECK_DECL([PRIdMAX],, dnl Checks for library functions. AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \ - memfd_create mempcpy \ + memfd_create memrchr mempcpy \ sigsetmask stpcpy strchrnul strsignal strtod strtoimax \ strtoumax sysconf tee) diff --git a/src/expand.c b/src/expand.c index 5285b79..d0a6a99 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1634,6 +1634,19 @@ static char *expmeta_rmescapes(char *enddir, const char *name) return enddir - 1; } +#ifndef HAVE_MEMRCHR +static void *memrchr(const void *s, int c, size_t n) +{ + const unsigned char *str = s; + const unsigned char *cp; + + for (cp = str + n - 1; cp >= str; cp--) + if (*cp == c) + return cp; + return NULL; +} +#endif + /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ From 9c9fee9cbfc4487d188b8e1b37326745cda508ac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 3 Jul 2024 20:51:34 +1000 Subject: [PATCH 149/401] expand: Preserve MBCHAR in argstr when EXP_MBCHAR is set MBCHAR should be preserved in argstr if the EXP_MBCHAR bit is set. This broke case statements. Reported-by: Martijn Dekker Fixes: 6c44f4ee0c2b ("parser: Add support for multi-byte characters") Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index d0a6a99..d73f29c 100644 --- a/src/expand.c +++ b/src/expand.c @@ -371,7 +371,7 @@ static char *argstr(char *p, int flag) c = (signed char)*p--; mb = mbnext(p); ml = (mb >> 8) - 2; - if (flag & QUOTES_ESC) { + if (flag & (QUOTES_ESC | EXP_MBCHAR)) { length = (mb >> 8) + (mb & 0xff); if (c == (char)CTLESC) startloc += length; From 5f01c7705f3ca4fef31b515f5591f3867db6f292 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 6 Jul 2024 11:37:16 +1000 Subject: [PATCH 150/401] parser: Do not read past single quote in dollarsq_escape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function dollarsq_escape may read past the current escape code in order to provide enough data to the underlying escape code processing function. This is OK because we will call unget to return any unused characters. However, if this occurs at the end of a quoted string, this may prompt the user for more input which is wrong. Fix this by terminating the loop whenever we see a single quote. Even if this is an escaped single quote and thus does not indicate the end of the whole quoted string, it's still OK because no single escape code can continue after a single quote. Reported-by: наб Fixes: 776424a8f915 ("parser: Add dollar single quote") Signed-off-by: Herbert Xu --- src/parser.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index d1bec58..aecc18f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -938,13 +938,16 @@ static char *dollarsq_escape(char *out) unsigned len; char *p; - for (len = 0; len < sizeof(str) - 1; len++) { + for (len = 0; len < sizeof(str) - 1;) { int c = pgetc(); if (c <= PEOF) break; - str[len] = c; + str[len++] = c; + + if (c == '\'') + break; } str[len] = 0; From 6574d63a1163d8f97e2e9588bee80e0067080924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Mon, 8 Jul 2024 20:02:53 +0200 Subject: [PATCH 151/401] builtin: test = and != are strcmp, not strcoll 117067 s1 = s2 True if the strings s1 and s2 are identical; otherwise, false. 117068 s1 != s2 True if the strings s1 and s2 are not identical; otherwise, false. 117069 s1 > s2 True if s1 collates after s2 in the current locale; otherwise, false. 117070 s1 < s2 True if s1 collates before s2 in the current locale; otherwise, false. "identical" does not mean "collate equally"; this is the difference between sort | uniq and sort -u Fixes: 597850ae1fb9 ("shell: Use strcoll instead of strcmp where applicable") Signed-off-by: Herbert Xu --- src/bltin/test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bltin/test.c b/src/bltin/test.c index 2db4d0f..f7e9720 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -353,9 +353,9 @@ binop(void) /* NOTREACHED */ #endif case STREQ: - return strcoll(opnd1, opnd2) == 0; + return strcmp(opnd1, opnd2) == 0; case STRNE: - return strcoll(opnd1, opnd2) != 0; + return strcmp(opnd1, opnd2) != 0; case STRLT: return strcoll(opnd1, opnd2) < 0; case STRGT: From 1713d1d6592f09f23b5abfb4562cbe5a7615fb1a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 24 Jul 2024 18:13:17 +0800 Subject: [PATCH 152/401] eval: Reset input for pipelines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For everything but the first component of a pipeline, the input needs to be reset because it is no longer equal to that of the parent shell. Reported-by: arĉi Fixes: b1864ee9f241 ("input: Use lseek on stdin when possible") Signed-off-by: Herbert Xu --- src/eval.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/eval.c b/src/eval.c index ad2aa03..0f2a7ba 100644 --- a/src/eval.c +++ b/src/eval.c @@ -595,6 +595,7 @@ evalpipe(union node *n, int flags) close(pip[0]); } if (prevfd > 0) { + reset_input(); dup2(prevfd, 0); close(prevfd); } From 9e8cb45d7d1a6712ae5bdd316d1b982b9f395fea Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 24 Jul 2024 18:26:39 +0800 Subject: [PATCH 153/401] jobs: Add comment about reset_input in forkchild For background jobs where the stdin is redirected to /dev/null, a reset_input may be needed in future. For the time being there is no reason to do this as all possible states for stdin will work correctly with /dev/null. Signed-off-by: Herbert Xu --- src/jobs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/jobs.c b/src/jobs.c index 247fab5..51e6fa1 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -904,6 +904,9 @@ static void forkchild(struct job *jp, union node *n, int mode) if (jp->nprocs == 0) { close(0); sh_open(_PATH_DEVNULL, O_RDONLY, 0); + /* Should call reset_input here, but it's harmless + * for now. + */ } } if (!oldlvl && iflag) { From 60eeccd0324a904a5f5d79d7d9136da2060e90ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sun, 28 Jul 2024 04:42:52 +0200 Subject: [PATCH 154/401] builtin: Align test -nt and -ot with POSIX.1-2024 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 117027 pathname1 −nt pathname2 117028 True if pathname1 resolves to an existing file and pathname2 cannot be resolved, or if 117029 both resolve to existing files and pathname1 is newer than pathname2 according to 117030 their last data modification timestamps; otherwise, false. 117031 pathname1 −ot pathname2 117032 True if pathname2 resolves to an existing file and pathname1 cannot be resolved, or if 117033 both resolve to existing files and pathname1 is older than pathname2 according to 117034 their last data modification timestamps; otherwise, false. The correct output is $ [ 2024 -nt 2023 ] && echo yes yes $ [ 2023 -nt 2024 ] && echo yes $ [ 2023 -nt ENOENT ] && echo yes yes $ [ ENOENT -nt 2024 ] && echo yes and $ [ 2024 -ot 2023 ] && echo yes $ [ 2023 -ot 2024 ] && echo yes yes $ [ 2023 -ot ENOENT ] && echo yes $ [ ENOENT -ot 2024 ] && echo yes yes but dash currently returned only the first yes out of both blocks. Signed-off-by: Herbert Xu --- src/bltin/test.c | 56 ++++++++++++++++++++++++------------------------ src/dash.1 | 14 ++++++++++-- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/src/bltin/test.c b/src/bltin/test.c index f7e9720..6d844f1 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -8,17 +8,17 @@ * This program is in the Public Domain. */ -#include -#include - +#include "bltin.h" +#include "../exec.h" #include #include +#include +#include #include #include +#include +#include #include -#include -#include "bltin.h" -#include "../exec.h" /* test(1) accepts the following grammar: oexpr ::= aexpr | aexpr "-o" oexpr ; @@ -146,8 +146,8 @@ static int binop(void); static int filstat(char *, enum token); static enum token t_lex(char **); static int isoperand(char **); -static int newerf(const char *, const char *); -static int olderf(const char *, const char *); +static bool newerf(const char *, const char *); +static bool olderf(const char *, const char *); static int equalf(const char *, const char *); #ifdef HAVE_FACCESSAT @@ -466,39 +466,39 @@ static int isoperand(char **tp) return op && op->op_type == BINOP; } -static int -newerf (const char *f1, const char *f2) +static bool newerf(const char *f1, const char *f2) { struct stat64 b1, b2; + if (stat64(f1, &b1) != 0) + return false; + if (stat64(f2, &b2) != 0) + return true; + #ifdef HAVE_ST_MTIM - return (stat64(f1, &b1) == 0 && - stat64(f2, &b2) == 0 && - ( b1.st_mtim.tv_sec > b2.st_mtim.tv_sec || - (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec > b2.st_mtim.tv_nsec ))) - ); + return b1.st_mtim.tv_sec > b2.st_mtim.tv_sec || + (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && + b1.st_mtim.tv_nsec > b2.st_mtim.tv_nsec); #else - return (stat64(f1, &b1) == 0 && - stat64(f2, &b2) == 0 && - b1.st_mtime > b2.st_mtime); + return b1.st_mtime > b2.st_mtime; #endif } -static int -olderf (const char *f1, const char *f2) +static bool olderf(const char *f1, const char *f2) { struct stat64 b1, b2; + if (stat64(f2, &b2) != 0) + return false; + if (stat64(f1, &b1) != 0) + return true; + #ifdef HAVE_ST_MTIM - return (stat64(f1, &b1) == 0 && - stat64(f2, &b2) == 0 && - (b1.st_mtim.tv_sec < b2.st_mtim.tv_sec || - (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && (b1.st_mtim.tv_nsec < b2.st_mtim.tv_nsec ))) - ); + return b1.st_mtim.tv_sec < b2.st_mtim.tv_sec || + (b1.st_mtim.tv_sec == b2.st_mtim.tv_sec && + b1.st_mtim.tv_nsec < b2.st_mtim.tv_nsec); #else - return (stat64(f1, &b1) == 0 && - stat64(f2, &b2) == 0 && - b1.st_mtime < b2.st_mtime); + return b1.st_mtime < b2.st_mtime; #endif } diff --git a/src/dash.1 b/src/dash.1 index 6c4ee2d..96ce89e 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -2019,7 +2019,12 @@ and exist and .Ar file1 is newer than -.Ar file2 . +.Ar file2 , +or if +.Ar file1 +exists but +.Ar file2 +doesn't. .It Ar file1 Fl ot Ar file2 True if .Ar file1 @@ -2028,7 +2033,12 @@ and exist and .Ar file1 is older than -.Ar file2 . +.Ar file2 , +or if +.Ar file2 +exists but +.Ar file1 +doesn't. .It Ar file1 Fl ef Ar file2 True if .Ar file1 From 5627b5bbe34ba6f782d72b2c57a0145122d1ce3d Mon Sep 17 00:00:00 2001 From: Sebastien Peterson-Boudreau Date: Fri, 23 Aug 2024 19:23:19 -0300 Subject: [PATCH 155/401] histedit: Use EL_PROMPT_ESC As can be seen in the `man` page for `el_set`, using `EL_PROMPT_ESC` for the op is the same as `EL_PROMPT`, but it allows escape characters to be expanded in the prompt the same way they are when used with `echo` or `printf(1)`. As far as I know, this is not specified by POSIX, but neither is the emacs editing mode (please correct me if I am wrong), so I think this is a justified change to make it align with the behaviour or `echo` and `printf(1)`. Given that this is not specified by POSIX, there isn't much of a precident for what the value of the start/stop character should be. From what I have seen, 0o001 is common, so that is what I have included in the patch, but it may not be the most fitting. Taking a look at how ASCII defines its control characters, I believe any characters between 0o034 and 0o037 may be a more suitable choice, but this could be up for debate. Signed-off-by: Sebastien Peterson-Boudreau Signed-off-by: Herbert Xu --- src/histedit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/histedit.c b/src/histedit.c index 7692776..5ab718b 100644 --- a/src/histedit.c +++ b/src/histedit.c @@ -115,7 +115,7 @@ histedit(void) if (el != NULL) { if (hist) el_set(el, EL_HIST, history, hist); - el_set(el, EL_PROMPT, getprompt); + el_set(el, EL_PROMPT_ESC, getprompt, '\1'); } else { bad: out2str("sh: can't initialize editing\n"); From da680db665aa81b56e2ab4e6f060673f2eda3fec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Oct 2024 13:26:59 +0800 Subject: [PATCH 156/401] builtin: Keep backslash on undefined escape sequences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A lot of scripts (in particular, autoconf) relies on echo keeping undefined backslash sequences intact. Preserve this behaviour by only interpreting the few sequences required for dollar single quote. Repoted-by: Дилян Палаузов Fixes: 776424a8f915 ("parser: Add dollar single quote") Signed-off-by: Herbert Xu --- src/bltin/printf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bltin/printf.c b/src/bltin/printf.c index 2c18e93..46c6295 100644 --- a/src/bltin/printf.c +++ b/src/bltin/printf.c @@ -339,8 +339,8 @@ unsigned conv_escape(char *str0, char *out0, bool mbchar) switch (ch) { default: if (!isodigit(ch)) { - value = ch ?: '\\'; - str -= !ch; + value = '\\'; + str--; break; } @@ -428,6 +428,12 @@ unsigned conv_escape(char *str0, char *out0, bool mbchar) ch = 8; goto hex; + case '\\': + case '\"': + case '\'': + value = ch; + break; + case 'a': value = '\a'; break; /* alert */ case 'b': value = '\b'; break; /* backspace */ case 'e': value = '\033'; break; /* */ From b3e38adf6718801e7f06267b438c45caec9523bb Mon Sep 17 00:00:00 2001 From: Christoph Anton Mitterer Date: Wed, 23 Oct 2024 21:55:45 +0200 Subject: [PATCH 157/401] man: Clarify the `trap` condition `EXIT` Signed-off-by: Christoph Anton Mitterer Signed-off-by: Herbert Xu --- src/dash.1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dash.1 b/src/dash.1 index 96ce89e..dbc34c9 100644 --- a/src/dash.1 +++ b/src/dash.1 @@ -2157,7 +2157,8 @@ is .Li 0 or .Li EXIT , -the action is executed when the shell exits. +the action is executed when the shell exits normally (that is not via an +unhandled signal like SIGINT). .Ar action may be empty .Li ( "''" ) , From 73e42427a99a9efc408503d37e59fdf542043e05 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 16 Nov 2024 19:21:46 +0800 Subject: [PATCH 158/401] builtin: Reduce size of conv_escape The jump table is unnecessarily large for a function that is not performance-critical. Move some of the cases out of the switch statement to reduce its size. Move the value = ch assignment to the common path. Merge the code for '\a', '\b' and '\f'. Signed-off-by: Herbert Xu --- src/bltin/printf.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/bltin/printf.c b/src/bltin/printf.c index 46c6295..ff576ff 100644 --- a/src/bltin/printf.c +++ b/src/bltin/printf.c @@ -335,21 +335,29 @@ unsigned conv_escape(char *str0, char *out0, bool mbchar) int ch; ch = *str; + value = ch; switch (ch) { default: - if (!isodigit(ch)) { - value = '\\'; - str--; + if (ch == '"' || ch == '\'') break; + + if (ch == 'U') { + ch = 8; + goto hex; + } + + value = '\\'; + + if (isodigit(ch)) { + ch = 3; + value = 0; + do { + value <<= 3; + value += octtobin(*str++); + } while (--ch && isodigit(*str)); } - ch = 3; - value = 0; - do { - value <<= 3; - value += octtobin(*str++); - } while (--ch && isodigit(*str)); str--; break; @@ -424,20 +432,17 @@ unsigned conv_escape(char *str0, char *out0, bool mbchar) ch = 4; goto hex; - case 'U': - ch = 8; - goto hex; - case '\\': - case '\"': - case '\'': - value = ch; break; - case 'a': value = '\a'; break; /* alert */ - case 'b': value = '\b'; break; /* backspace */ + case 'a': /* alert */ + case 'b': /* backspace */ + case 'f': /* form-feed */ + value -= 'a'; + value += '\a'; + break; + case 'e': value = '\033'; break; /* */ - case 'f': value = '\f'; break; /* form-feed */ case 'n': value = '\n'; break; /* newline */ case 'r': value = '\r'; break; /* carriage-return */ case 't': value = '\t'; break; /* tab */ From a76c0f428e64d6ccc37c066ed4d47f49b52f9ae7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 17 Nov 2024 10:25:03 +0800 Subject: [PATCH 159/401] expand: Fix scanleft/right for !FNMATCH_IS_ENABLED && quotes When our own pmatch is used, loc2 is unused in scanleft/right when quotes is true. However, it is still needed when quotes is false. Fix the scanleft/right code so that loc2 is always updated (so it will be garbage when quotes is true) but only returned depending on the value of quotes. Fixes: c5bf9702ea11 ("expand: Add multi-byte support to pmatch") Reported-by: Johannes Altmanninger Signed-off-by: Herbert Xu --- src/expand.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/expand.c b/src/expand.c index d73f29c..7a30648 100644 --- a/src/expand.c +++ b/src/expand.c @@ -621,18 +621,15 @@ static char *scanleft(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return FNMATCH_IS_ENABLED && quotes ? loc : loc2; + return quotes ? loc : loc2; if (!c) break; mb = mbnext(loc); loc += (mb & 0xff) + (mb >> 8); - if (unlikely(FNMATCH_IS_ENABLED || !quotes)) { - ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; - loc2 += ml; - } else - loc2 = loc; + ml = (mb >> 8) > 3 ? (mb >> 8) - 2 : 1; + loc2 += ml; } while (1); return 0; } @@ -645,8 +642,7 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, char *loc; char *loc2; - for (loc = endp, loc2 = rmescend;; - FNMATCH_IS_ENABLED ? loc2-- : (loc2 = loc)) { + for (loc = endp, loc2 = rmescend;; loc2--) { char *s = FNMATCH_IS_ENABLED ? loc2 : loc; char c = *s; unsigned ml; @@ -659,7 +655,7 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, match = pmatch(str, s); *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; if (match) - return FNMATCH_IS_ENABLED && quotes ? loc : loc2; + return quotes ? loc : loc2; if (--loc < startp) break; if (!esc--) @@ -676,8 +672,7 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, loc -= ml + 2; if (*loc == (char)CTLESC) loc--; - if (FNMATCH_IS_ENABLED) - loc2 -= ml - 1; + loc2 -= ml - 1; } return 0; } From db54e02c54874eeaef07009e25fe40c619606a55 Mon Sep 17 00:00:00 2001 From: Sam James Date: Sun, 17 Nov 2024 02:30:17 +0000 Subject: [PATCH 160/401] builtin: fix timescmd with C23 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With C23 and LTO, we get the following warning (or error if promoted to such): ``` src/builtins.c:28:5: error: type of ‘timescmd’ does not match original declaration [-Werror=lto-type-mismatch] 28 | int timescmd(int, char **); | ^ src/bltin/times.c:15:5: note: type mismatch in parameter 1 src/bltin/times.c:15:5: note: type ‘void’ should match type ‘int’ ``` Make the two consistent. This didn't show up before because pre-C23 had unprototyped functions. Signed-off-by: Herbert Xu --- src/bltin/times.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bltin/times.c b/src/bltin/times.c index 1166a68..252b084 100644 --- a/src/bltin/times.c +++ b/src/bltin/times.c @@ -12,7 +12,7 @@ #endif #include "system.h" -int timescmd() { +int timescmd(int argc, char *argv[]) { struct tms buf; long int clk_tck = sysconf(_SC_CLK_TCK); int mutime, mstime, mcutime, mcstime; From c1968b0620174371a678bbe58e44a47ae27cd257 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 9 Mar 2025 16:51:53 +0800 Subject: [PATCH 161/401] expand: Fix scanright zero handling when fnmatch is disabled Johannes Altmanninger wrote: > I noticed another regression in c5bf970 (expand: Add multi-byte > support to pmatch, 2024-06-02). > > This command now prints "abc-def" but used to print "ef". > > x=abc-def > y="${x##*d}" > echo "$y" Fix this by setting s to the correct value in scanright based on FNMATCH_IS_ENABLED. Fixes: c5bf9702ea11 ("expand: Add multi-byte support to pmatch") Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index 7a30648..0066e1a 100644 --- a/src/expand.c +++ b/src/expand.c @@ -650,7 +650,7 @@ static char *scanright(char *startp, char *endp, char *rmesc, char *rmescend, if (zero) { *s = '\0'; - s = rmesc; + s = FNMATCH_IS_ENABLED ? rmesc : startp; } match = pmatch(str, s); *(FNMATCH_IS_ENABLED ? loc2 : loc) = c; From 20549a1a32cc3876f0f7eedf9c35916ed90aa675 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 9 Mar 2025 17:42:40 +0800 Subject: [PATCH 162/401] expand: Add bypass for literal "]" in expandmeta Jan Pechanec wrote: > > thank you for working on dash. I was testing it recently and it worked > really well. > > However, I noticed the dash code from github does filename pattern > matching even for code like "[ x = x ] && echo ok". I believe the > unquoted space after '[' should not trigger pattern matching but rather > only to invoke the test/[ utility, as before. It seems it works fine > though and only doing some extra unneeded work which may not be > immediatelly noticeable. > > dash installed on my Oracle Linux 9: > > janp:len49:~/_INST/dash$ strings /usr/bin/dash | grep dash > dash-0.5.11.5-4.el9.x86_64.debug > janp:len49:~/_INST/dash$ time dash -c 'i=0; while :; do : $((i=i+1)); [ $i -eq 500000 ] && break; done' > > real 0m0.752s > user 0m0.748s > sys 0m0.002s > > dash from github (commit b3e38adf6718801e7f06267b438c45caec9523bb) take > way more time to do the same thing: > > janp:len49:~/_INST/dash$ time ./src/dash -c 'i=0; while :; do : $((i=i+1)); [ $i -eq 500000 ] && break; done' > > real 0m4.202s > user 0m1.361s > sys 0m2.804s > > For the latter, strace shows open, fstat, getdents*, and close system > calls for each iteration and it depends on number of files in the > current directory. With more files, it takes more time: > > janp:len49:/etc$ time ~/_INST/dash/src/dash -c 'i=0; while :; do : $((i=i+1)); [ $i -eq 500000 ] && break; done' > real 0m15.591s > user 0m5.704s > sys 0m9.828s > > If I change [ to test, the dash github version behaves as before, and > possibly even faster: > > janp:len49:~/_INST/dash$ time ~/_INST/dash/src/dash -c 'i=0; while :; do : $((i=i+1)); test $i -eq 500000 && break; done' > > real 0m0.662s > user 0m0.659s > sys 0m0.002s > > Even bash would be faster than the current github version of dash: > > janp:len49:~/_INST/dash$ time bash -c 'i=0; while :; do : $((i=i+1)); [ $i -eq 500000 ] && break; done' > real 0m1.943s > user 0m1.939s > sys 0m0.002s Fix performance regression for idiomatic "[ ... ]" expression by adding a bypass for a literal "]" in pathname expansion. Reported-by: Jan Pechanec Fixes: 8d0eca2d9fb5 ("expand: Rewrite expmeta meta detection") Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index 0066e1a..b9c6460 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1555,7 +1555,7 @@ expandmeta(struct strlist *str) if (fflag) goto nometa; - if (!strpbrk(str->text, "*?]")) + if (!strpbrk(str->text, "*?]") || !memcmp(str->text, "]", 2)) goto nometa; savelastp = exparg.lastp; From fd725b7a75836fdc52deeab5c7bb6c72b57ccd76 Mon Sep 17 00:00:00 2001 From: Zurab Kvachadze Date: Tue, 29 Apr 2025 23:47:32 +0200 Subject: [PATCH 163/401] expand: Fix buffer overread caused by passing array of chars as string in pmatch strpbrk() accepts two null-terminated string arguments. stop[] is char array that is not null-terminated but is still passed as a second argument to strpbrk. This causes buffer overread, which is detected by AddressSanitizer. This commit adds an explicit null-terminated to the end of the array. Signed-off-by: Zurab Kvachadze Signed-off-by: Herbert Xu --- src/expand.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index b9c6460..1b9572d 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1886,7 +1886,9 @@ static __attribute__((noinline)) int ccmatch(char *p, const char *mbc, int ml, static int pmatch(char *pattern, const char *string) { - char stop[] = { 0, CTLESC, CTLMBCHAR }; + /* stop should be null-terminated as it passed as a string to + * strpbrk. */ + char stop[] = { 0, CTLESC, CTLMBCHAR, '\0' }; const char *q; unsigned mb; char *p; From e878137f63e61926bbdff7062a558f0143ed5872 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 3 May 2025 15:22:44 +0800 Subject: [PATCH 164/401] expand: Move stop array closer to strpbrk call Move the stop array closer to the strpbrk(3) call in pmatch. Signed-off-by: Herbert Xu --- src/expand.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/expand.c b/src/expand.c index 1b9572d..25df03c 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1886,9 +1886,6 @@ static __attribute__((noinline)) int ccmatch(char *p, const char *mbc, int ml, static int pmatch(char *pattern, const char *string) { - /* stop should be null-terminated as it passed as a string to - * strpbrk. */ - char stop[] = { 0, CTLESC, CTLMBCHAR, '\0' }; const char *q; unsigned mb; char *p; @@ -1918,11 +1915,17 @@ static int pmatch(char *pattern, const char *string) c = *++p; if (!c) return 1; - stop[0] = CTLESC; - if (c != '?' && c != '[') - stop[0] = c; + if (c == '?' || c != '[') + c = CTLESC; for (;;) { - if (stop[0] != (char)CTLESC) { + if (c != CTLESC) { + /* Stop should be null-terminated + * as it is passed as a string to + * strpbrk(3). + */ + const char stop[] = { + c, CTLESC, CTLMBCHAR, 0, + }; q = strpbrk(q, stop); if (!q) return 0; From b4ef25d7466c2f85247c6271a47c6ccc064b4625 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 May 2025 11:00:51 +0800 Subject: [PATCH 165/401] input: Reset eof in reset_input Ensure that the EOF state is reset in reset_input as otherwise the new stdin may be treated as empty. Reported-by: Nathan Royce Fixes: 69786bc79f9b ("input: Fix pungetc on PEOF") Signed-off-by: Herbert Xu --- src/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/input.c b/src/input.c index 6388b83..1aff3d4 100644 --- a/src/input.c +++ b/src/input.c @@ -684,6 +684,7 @@ void __attribute__((noinline)) flush_input(void) void reset_input(void) { - flush_input(); stdin_istty = -1; + basepf.eof = 0; + flush_input(); } From 0cc3211208a78c644edde37b0134538398e6df3e Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 22 Aug 2025 02:05:14 +0100 Subject: [PATCH 166/401] jobs: avoid blocking signals on vfork As pointed out by Denys Vlasenko, we can avoid blocking signals on vfork() by making the signal handler of a vfork child immediately return. This saves a syscall. Signed-off-by: Herbert Xu --- src/jobs.c | 5 +---- src/trap.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 51e6fa1..83d9694 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -991,20 +991,17 @@ struct job *vforkexec(union node *n, char **argv, const char *path, int idx) jp = makejob(1); - sigblockall(NULL); - vforked++; + vforked = getpid(); pid = vfork(); if (!pid) { forkchild(jp, n, FORK_FG); - sigclearmask(); shellexec(argv, path, idx); /* NOTREACHED */ } vforked = 0; - sigclearmask(); forkparent(jp, n, FORK_FG, pid); return jp; diff --git a/src/trap.c b/src/trap.c index aebffa0..23829a5 100644 --- a/src/trap.c +++ b/src/trap.c @@ -312,7 +312,7 @@ ignoresig(int signo) void onsig(int signo) { - if (vforked) + if (vforked && getpid() != vforked) return; if (signo == SIGCHLD) { From da72f6988ceb2e50dbdd894a45c40d02edca9293 Mon Sep 17 00:00:00 2001 From: "Kuhl, Brian" Date: Wed, 28 May 2025 21:47:10 +0000 Subject: [PATCH 167/401] shell: Add VxWorks support Recent versions of VxWorks support fork() and as result can support dash. For example, to cross compile for IA with this patch applied, and your VSB environment sourced (aka sysroot) ./configure --build=x86_64-pc-linux-gnu --host=x86_64-wrs-vxworks --prefix=/usr \ CC=wr-cc CXX=wr-c++ LD=wr-ld AR=wr-ar NM=wr-nm OBJCOPY=wr-objcopy OBJDUMP=wr-objdump RANLIB=wr-ranlib READELF=wr-readelf SIZE=wr-size STRIP= wr-strip \ ac_cv_func_faccessat=no \ CFLAGS="-DJOBS=0 " make install DESTDIR=${VSB}/usr/3pp/develop For other architectures update your appropriately. Signed-off-by: Herbert Xu --- configure.ac | 4 ++-- src/bltin/test.c | 2 ++ src/jobs.c | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index ba4856a..bb09534 100644 --- a/configure.ac +++ b/configure.ac @@ -44,7 +44,7 @@ AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) dnl Checks for libraries. dnl Checks for header files. -AC_CHECK_HEADERS(alloca.h paths.h) +AC_CHECK_HEADERS(alloca.h paths.h sys/wait.h) dnl Check for declarations AC_CHECK_DECL([_PATH_BSHELL],,AC_DEFINE_UNQUOTED([_PATH_BSHELL], "/bin/sh", [Define to system shell path]),[ @@ -89,7 +89,7 @@ dnl Checks for library functions. AC_CHECK_FUNCS(bsearch faccessat getpwnam getrlimit isalpha killpg \ memfd_create memrchr mempcpy \ sigsetmask stpcpy strchrnul strsignal strtod strtoimax \ - strtoumax sysconf tee) + strtoumax sysconf tee wait3) dnl Check whether it's worth working around FreeBSD PR kern/125009. dnl The traditional behavior of access/faccessat is crazy, but diff --git a/src/bltin/test.c b/src/bltin/test.c index 6d844f1..0323944 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -418,8 +418,10 @@ filstat(char *nm, enum token mode) return (s.st_mode & S_ISUID) != 0; case FILSGID: return (s.st_mode & S_ISGID) != 0; +#ifdef S_ISVTX case FILSTCK: return (s.st_mode & S_ISVTX) != 0; +#endif case FILGZ: return !!s.st_size; case FILUID: diff --git a/src/jobs.c b/src/jobs.c index 83d9694..143f15c 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1199,7 +1199,11 @@ waitproc(int block, int *status) do { gotsigchld = 0; do +#ifdef HAVE_WAIT3 err = wait3(status, flags, NULL); +#else + err = waitpid((pid_t)-1, status, flags, NULL); +#endif while (err < 0 && errno == EINTR); if (err || (err = -!block)) From 4e32f89a5c1a9231305fd2db4b6c39472b892008 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 3 Aug 2025 18:15:13 +0200 Subject: [PATCH 168/401] options: Simplify procargs procargs(int argc, char **argv) argc is used in just one place: if (argc > 0) xargv++; Trivially replaceable by if(xargv[0] != NULL), so can avoid passing this argument. char **xargv; xargv = argv; xargv is always equal to argv, so why having a separate variable? const char *xminusc; xminusc = minusc; Similar situation with xminusc being equal to minusc during the range where it is live, they diverge here: if (xminusc) { minusc = *xargv++; but after this, xminusc is not used. Signed-off-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/main.c | 2 +- src/options.c | 12 ++++-------- src/options.h | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/main.c b/src/main.c index 1e192f8..e9771fc 100644 --- a/src/main.c +++ b/src/main.c @@ -149,7 +149,7 @@ main(int argc, char **argv) rootpid = getpid(); init(); setstackmark(&smark); - login = procargs(argc, argv); + login = procargs(argv); if (login) { state = 1; read_profile("/etc/profile"); diff --git a/src/options.c b/src/options.c index c74e4fe..3e6c450 100644 --- a/src/options.c +++ b/src/options.c @@ -119,26 +119,22 @@ STATIC int getopts(char *, char *, char **); */ int -procargs(int argc, char **argv) +procargs(char **xargv) { int i; - const char *xminusc; - char **xargv; int login; - xargv = argv; login = xargv[0] && xargv[0][0] == '-'; arg0 = xargv[0]; - if (argc > 0) + if (xargv[0]) xargv++; for (i = 0; i < NOPTS; i++) optlist[i] = 2; argptr = xargv; login |= options(1); xargv = argptr; - xminusc = minusc; if (*xargv == NULL) { - if (xminusc) + if (minusc) sh_error("-c requires an argument"); sflag = 1; } @@ -153,7 +149,7 @@ procargs(int argc, char **argv) debug = 1; #endif /* POSIX 1003.2: first arg after -c cmd is $0, remainder $1... */ - if (xminusc) { + if (minusc) { minusc = *xargv++; if (*xargv) goto setarg0; diff --git a/src/options.h b/src/options.h index f421316..0ad5535 100644 --- a/src/options.h +++ b/src/options.h @@ -76,7 +76,7 @@ extern char **argptr; /* argument list for builtin commands */ extern char *optionarg; /* set by nextopt */ extern char *optptr; /* used by nextopt */ -int procargs(int, char **); +int procargs(char **); void optschanged(void); void setparam(char **); void freeparam(volatile struct shparam *); From 6abb589582c62cf64b61c6725af17e853561e3df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 24 Aug 2025 09:51:04 +0800 Subject: [PATCH 169/401] input: Call input_init on first use When the shell is used to execute another utility, it makes no sense to initialise stdin, which costs up to two system calls. Since the input layer can already handle first-use initialisation because of reset_init, simply make this always the case. Signed-off-by: Herbert Xu --- src/input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/input.c b/src/input.c index 1aff3d4..c36d120 100644 --- a/src/input.c +++ b/src/input.c @@ -77,7 +77,7 @@ MKINIT struct parsefile *toppf = &basepf; MKINIT struct stdin_state stdin_state; struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ -int stdin_istty; +int stdin_istty = -1; MKINIT void input_init(void); @@ -99,8 +99,6 @@ INCLUDE "syntax.h" INIT { basepf.nextc = basepf.buf = basebuf; basepf.linno = 1; - - input_init(); } RESET { From d609f6c0195254cf8e781f79d9147242bd2f5517 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 24 Aug 2025 10:42:19 +0800 Subject: [PATCH 170/401] jobs: Cache getpid As the vfork path now requires the use of getpid, cache the current PID in a new global variable mypid. Signed-off-by: Herbert Xu --- src/jobs.c | 7 +++++-- src/main.c | 3 ++- src/main.h | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/jobs.c b/src/jobs.c index 143f15c..6622bdd 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -872,6 +872,7 @@ static void forkchild(struct job *jp, union node *n, int mode) lvforked = vforked; if (!lvforked) { + mypid = 0; shlvl++; forkreset(mode == FORK_NOJOB ? n : NULL); @@ -887,7 +888,7 @@ static void forkchild(struct job *jp, union node *n, int mode) pid_t pgrp; if (jp->nprocs == 0) - pgrp = getpid(); + mypid = pgrp = getpid(); else pgrp = jp->ps[0].pid; /* This can fail because we are doing it in the parent also */ @@ -991,7 +992,9 @@ struct job *vforkexec(union node *n, char **argv, const char *path, int idx) jp = makejob(1); - vforked = getpid(); + if (!mypid) + mypid = getpid(); + vforked = mypid; pid = vfork(); diff --git a/src/main.c b/src/main.c index e9771fc..5d25b8d 100644 --- a/src/main.c +++ b/src/main.c @@ -64,6 +64,7 @@ #define PROFILE 0 int rootpid; +int mypid; int shlvl; #ifdef __GLIBC__ int *dash_errno; @@ -146,7 +147,7 @@ main(int argc, char **argv) opentrace(); trputs("Shell args: "); trargs(argv); #endif - rootpid = getpid(); + mypid = rootpid = getpid(); init(); setstackmark(&smark); login = procargs(argv); diff --git a/src/main.h b/src/main.h index 19e4983..ef242a1 100644 --- a/src/main.h +++ b/src/main.h @@ -38,6 +38,8 @@ /* pid of main shell */ extern int rootpid; +/* pid of current shell */ +extern int mypid; /* shell level: 0 for the main shell, 1 for its children, and so on */ extern int shlvl; #define rootshell (!shlvl) From 2ef2074bf7a72db7c3e8cc58cab515d67668f527 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 27 Aug 2025 17:13:02 +0800 Subject: [PATCH 171/401] parser: Do not display prompts in expandstr Set needprompt to zero as it may have been set by the caller. It is safe to do so here as the only calls to expandstr within the parser set needprompt to zero already. Reported-by: Aleksander Ushakov Signed-off-by: Herbert Xu --- src/parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.c b/src/parser.c index aecc18f..a3b004b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1742,6 +1742,7 @@ expandstr(const char *ps) heredoclist = NULL; saveprompt = doprompt; doprompt = 0; + needprompt = 0; result = ps; savehandler = handler; if (unlikely(err = setjmp(jmploc.loc))) From 92ff9ac34980d30116c02f8a5748edffc8c1d29b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 28 Aug 2025 15:36:01 +0800 Subject: [PATCH 172/401] builtin: Fix argv overrun in aexpr and oexpr Abort aexpr and oexpr if t_wp hits a NULL at the start. Link: https://lore.kernel.org/dash/52d97ed4-7c78-44ec-8c1b-60569491aa31@astralinux.ru/ Reported-by: Aleksander Ushakov Signed-off-by: Herbert Xu --- src/bltin/test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bltin/test.c b/src/bltin/test.c index 0323944..ac479bd 100644 --- a/src/bltin/test.c +++ b/src/bltin/test.c @@ -250,6 +250,8 @@ oexpr(enum token n) for (;;) { res |= aexpr(n); + if (!*t_wp) + break; n = t_lex(t_wp + 1); if (n != BOR) break; @@ -266,6 +268,8 @@ aexpr(enum token n) for (;;) { if (!nexpr(n)) res = 0; + if (!*t_wp) + break; n = t_lex(t_wp + 1); if (n != BAND) break; From 98cc11a5dbbc41f71fe68df0766d6ed647e83445 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 28 Aug 2025 17:27:09 +0800 Subject: [PATCH 173/401] expand: Do not call rmescapes in expari Remove the rmescapes call from expari as the string produced by the recursive argstr call is not escaped. Link: https://lore.kernel.org/dash/fc8ed5da-3024-4d22-b6da-83dccabafe99@astralinux.ru/ Fixes: 3cd538634f71 ("expand: Do not reprocess data when expanding words") Reported-by: Aleksander Ushakov Signed-off-by: Herbert Xu --- src/expand.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/expand.c b/src/expand.c index 25df03c..35481d6 100644 --- a/src/expand.c +++ b/src/expand.c @@ -513,9 +513,6 @@ static char *expari(char *start, int flag) removerecordregions(begoff); - if (likely(flag & QUOTES_ESC)) - rmescapes(start); - pushstackmark(&sm, endoff); result = arith(start); popstackmark(&sm); From 64507de26e3ad91570eeb978e1813ad76f18af52 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 13 Sep 2025 17:59:28 +0800 Subject: [PATCH 174/401] parser: Skip parsing of old-style command substitution here-doc marker On Fri, Aug 29, 2025 at 07:40:16PM -0700, Nathan Mills wrote: > > * Crash #0 is a null-pointer-dereference. I haven't found the fix yet > but it is probably simple. > > redir->nhere.doc is NULL. The global variable **heredoclist** is also > NULL and parseheredoc does nothing when heredoclist is NULL. > > 1. readtoken1 > 2. parseheredoc (heredoclist equals heredoc which is the same as the > value in the next step) > 3. parseheredoc (heredoclist is NULL, heredoc is 0x5555560bb6a8 ) > 4. parsefname > 5. parseheredoc (heredoclist->here->nhere.doc->narg.text equals the > empty string) > 6. **SIGSEGV** openhere (redir->nhere.doc is **NULL**) > > ** Crash #0: null pointer dereference ** > > Base64'd > > dmVzjHdyPDwAAACAYAAAAHd3cjw8AAAAgGAAAACA/38zZGlsaQ== > > Minimized: > > src/dash -c $(echo -c "<<\`<<\0\`") This is caused by the unnecessary recursive parsing of old-style command substitution which then gets confused by the embedded here-document. Fix this by skipping the recursive parsing of command substitution if it is old-style and a here-document marker. Reported-by: Nathan Mills Fixes: 7a11b3e330a3 ("parser: Extend coverage of CHKEOFMARK") Signed-off-by: Herbert Xu --- src/parser.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/parser.c b/src/parser.c index a3b004b..975cf89 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1510,7 +1510,6 @@ parsebackq: { int uninitialized_var(saveprompt); struct heredoc *saveheredoclist; struct nodelist **nlpp; - size_t psavelen; size_t savelen; union node *n; unsigned ml; @@ -1521,24 +1520,28 @@ parsebackq: { STADJUST(oldstyle - 1, out); out[-1] = CTLBACKQ; } - str = stackblock(); - savelen = out - (char *)stackblock(); - grabstackblock(savelen); + if (!chkeofmark || !oldstyle) { + str = stackblock(); + savelen = out - (char *)stackblock(); + grabstackblock(savelen); + STARTSTACKSTR(out); + } if (oldstyle) { /* We must read until the closing backquote, giving special treatment to some slashes, and then push the string and reread it as input, interpreting it normally. */ - char *pout; + bool done = false; + char *pout = out; int pc; - STARTSTACKSTR(pout); - for (;;) { + while (!done) { if (needprompt) { setprompt(2); } switch (pc = pgetc_eatbnl()) { case '`': - goto done; + done = true; + break; case '\\': pc = pgetc(); @@ -1564,9 +1567,11 @@ parsebackq: { } STPUTC(pc, pout); } -done: - STPUTC('\0', pout); - psavelen = pout - (char *)stackblock(); + if (chkeofmark) { + out = pout; + goto parsebackq_oldreturn; + } + pout[-1] = 0; pstr = grabstackstr(pout); setinputstring(pstr); } @@ -1608,10 +1613,6 @@ parsebackq: { * parsing. */ tokpushback = 0; - if (chkeofmark) { - pstr[psavelen - 1] = '`'; - out = stnputs(pstr, psavelen, out); - } goto parsebackq_oldreturn; } else { if (chkeofmark) { From 4595bd6549344c90ce7e8e90f1034cec724884db Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 13 Sep 2025 21:18:46 +0800 Subject: [PATCH 175/401] expand: Check for NUL before parsing range expression in pmatch Check for NUL before parsing range expression in pmatch. Reported-by: Nathan Mills Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index 35481d6..d57373c 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1982,7 +1982,7 @@ static int pmatch(char *pattern, const char *string) mbp >>= 8; p += mbp; } - if (*p == '-' && p[1] != ']') { + if (*p == '-' && p[1] && p[1] != ']') { p++; if (*p == (char)CTLESC) p++; From 694f47f39bb67e9493a269e613bf6e80d8041a27 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 13 Sep 2025 21:45:39 +0800 Subject: [PATCH 176/401] parser: Fix USTPUTC overflow for new-style command substition here-doc marker Use STPUTC after aommdntextcont when parsing a new-style command substitution used as a here-document marker. Reported-by: Nathan Mills Fixes: 7a11b3e330a3 ("parser: Extend coverage of CHKEOFMARK") Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index 975cf89..eb402a7 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1617,7 +1617,7 @@ parsebackq: { } else { if (chkeofmark) { out = commandtextcont(n, out); - USTPUTC(')', out); + STPUTC(')', out); } goto parsebackq_newreturn; } From ca418018f6986297c3c7e6b69073f5c0031d9e2e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 14 Sep 2025 08:44:10 +0800 Subject: [PATCH 177/401] expand: Do not use memcmp on strings Using memcmp past the end of a string may crash if it hits a page boundary. Fix this by calling strcmp/strncmp instead. Reported-by: Nathan Mills Reported-by: Harald van Dijk Signed-off-by: Herbert Xu --- src/expand.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/expand.c b/src/expand.c index d57373c..992cbe8 100644 --- a/src/expand.c +++ b/src/expand.c @@ -354,8 +354,7 @@ static char *argstr(char *p, int flag) continue; case CTLQUOTEMARK: /* "$@" syntax adherence hack */ - if (!inquotes && !memcmp(p, dolatstr + 1, - DOLATSTRLEN - 1)) { + if (!inquotes && !strcmp(p, dolatstr + 1)) { p = evalvar(p + 1, flag | EXP_QUOTED) + 1; goto start; } @@ -1552,7 +1551,7 @@ expandmeta(struct strlist *str) if (fflag) goto nometa; - if (!strpbrk(str->text, "*?]") || !memcmp(str->text, "]", 2)) + if (!strpbrk(str->text, "*?]") || !strcmp(str->text, "]")) goto nometa; savelastp = exparg.lastp; @@ -1996,7 +1995,7 @@ static int pmatch(char *pattern, const char *string) chr >= c && chr <= *p) found = 1; p++; - } else if (!memcmp(mbs, q, mb)) + } else if (!strncmp(mbs, q, mb)) found = 1; } while ((c = *p++) != ']'); if (found == invert) @@ -2011,7 +2010,7 @@ static int pmatch(char *pattern, const char *string) q += mb & 0xff; mb >>= 8; - if (memcmp(p - 1, q - 1, mb + 1)) + if (strncmp(p - 1, q - 1, mb + 1)) return 0; p += mb; From a8d4986223fd7349e2882b263cdf195957f39d31 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 14 Sep 2025 09:37:51 +0800 Subject: [PATCH 178/401] mystring: Ignore ERANGE errors in atomax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove ERANGE check in atomax for consistency as other direct calls to strtoimax do not perform the range check. Reported-by: Szász Gergely Signed-off-by: Herbert Xu --- src/mystring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mystring.c b/src/mystring.c index ca0cd39..97e240c 100644 --- a/src/mystring.c +++ b/src/mystring.c @@ -129,9 +129,6 @@ intmax_t atomax(const char *s, int base) errno = 0; r = strtoimax(s, &p, base); - if (errno == ERANGE) - badnum(s); - /* * Disallow completely blank strings in non-arithmetic (base != 0) * contexts. From 6925b23f44d4c986f6b348b9f81355e90c460dfb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 14 Sep 2025 09:45:22 +0800 Subject: [PATCH 179/401] options: Check stderr instead of stdout for interactivity Steffen Nurpmeso wrote: > > In an email communication with kre@ on NetBSD's tech-userlevel we > came over > > commit a373e69a196cd8d45f2806d805e548fa65a982ba > Author: kre > AuthorDate: 2017-07-24 12:35:37 +0000 > Commit: kre > CommitDate: 2017-07-24 12:35:37 +0000 > > PR standards/52406 > > Absent other information, the shell should be interactive if reading > from stdin, and stdin and stderr are ttys, not stdin and stdout. > > and i had in mind that dash changed this already (i thought i saw > a patch flying by on the ML?), but looking at procargs() there is > > if (iflag == 2 && sflag == 1 && stdin_istty && isatty(1)) > iflag = 1; > > where that would require > > if (iflag == 2 && sflag == 1 && stdin_istty && isatty(2)) > iflag = 1; Check stderr instead of stdout in procargs for interactivity. Signed-off-by: Herbert Xu --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 3e6c450..c318e0f 100644 --- a/src/options.c +++ b/src/options.c @@ -138,7 +138,7 @@ procargs(char **xargv) sh_error("-c requires an argument"); sflag = 1; } - if (iflag == 2 && sflag == 1 && stdin_istty && isatty(1)) + if (iflag == 2 && sflag == 1 && stdin_istty && isatty(2)) iflag = 1; if (mflag == 2) mflag = iflag; From 1365bb36a92656eb52b64cb04d249c47a1d3c4cf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Sep 2025 12:43:45 +0800 Subject: [PATCH 180/401] Release 0.5.13. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index bb09534..2a8622e 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([dash],[0.5.12]) +AC_INIT([dash],[0.5.13]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) From 6dcc007a72f13c3e518a65bffef571795ad6678c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 23 Sep 2025 23:00:30 +0800 Subject: [PATCH 181/401] expand: Fix typo in pmatch for wildcard search Skip the optimisation for * for only if the next character is ? and [, rather than ? and anything but [. Reported-by: Reilly Brogan Fixes: e878137f63e6 ("expand: Move stop array closer to strpbrk call") Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index 992cbe8..1fab394 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1911,7 +1911,7 @@ static int pmatch(char *pattern, const char *string) c = *++p; if (!c) return 1; - if (c == '?' || c != '[') + if (c == '?' || c == '[') c = CTLESC; for (;;) { if (c != CTLESC) { From 85ae9ea3b7a9d5bc4e95d1bacf3446c545b6ed8b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Sep 2025 11:45:18 +0800 Subject: [PATCH 182/401] expand: Fix off-by-one bug in expmeta Ensure the terminating NUL character is included when expanding path names. Reported-by: Reilly Brogan Fixes: a9012f4078be ("expand: Process multi-byte characters in expmeta") Signed-off-by: Herbert Xu --- src/expand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index 1fab394..912384d 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1686,7 +1686,7 @@ static char *expmeta(char *name, unsigned name_len, size_t expdir_len) goto out_opendir; enddir = expmeta_rmescapes(enddir, name); if (lstat64(cp, &statb) >= 0) - cp = addfnamealt(enddir, expdir_len); + cp = addfnamealt(enddir + 1, expdir_len); goto out_opendir; } start = memrchr(name, '/', p - name); From 22377664e7bea4cbb1e1cd8139b9bff0cb56c9de Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 6 Oct 2025 11:26:02 +0800 Subject: [PATCH 183/401] builtin: Keep backslash on quotes outside of dollarsq Only dollar single quote should eat the backslash character before a quote. Make the skipping of the backslash conditional on mbchar in conv_escape. Reported-by: Juergen Daubert Fixes: 776424a8f915 ("parser: Add dollar single quote") Signed-off-by: Herbert Xu --- src/bltin/printf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bltin/printf.c b/src/bltin/printf.c index ff576ff..106aecd 100644 --- a/src/bltin/printf.c +++ b/src/bltin/printf.c @@ -339,7 +339,7 @@ unsigned conv_escape(char *str0, char *out0, bool mbchar) switch (ch) { default: - if (ch == '"' || ch == '\'') + if (mbchar && (ch == '"' || ch == '\'')) break; if (ch == 'U') { From a768fcc0aafbe50a37d2d06bba23eb6671d3a9b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Oct 2025 12:46:55 +0800 Subject: [PATCH 184/401] Release 0.5.13.1. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 2a8622e..5efd9e2 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([dash],[0.5.13]) +AC_INIT([dash],[0.5.13.1]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) From 5139d6a27763fc0be386e15634db21e45598b299 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Oct 2025 21:29:32 +0800 Subject: [PATCH 185/401] shell: Fix unsigned char promotion and truncation When a char is promoted to an int, it needs to be signed as otherwise comparisons on it may fail. Alternatively, an integer needs to be truncated to char before comparing it against another char. Reported-by: Juergen Daubert Fixes: e878137f63e6 ("expand: Do not call rmescapes in expari") Fixes: c5bf9702ea11 ("expand: Add multi-byte support to pmatch") Fixes: 8f01c3796f0f ("[PARSER] Add FAKEEOFMARK for expandstr") Signed-off-by: Herbert Xu --- src/expand.c | 4 ++-- src/parser.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/expand.c b/src/expand.c index 912384d..8c8bf0e 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1914,7 +1914,7 @@ static int pmatch(char *pattern, const char *string) if (c == '?' || c == '[') c = CTLESC; for (;;) { - if (c != CTLESC) { + if (c != (char)CTLESC) { /* Stop should be null-terminated * as it is passed as a string to * strpbrk(3). @@ -1985,7 +1985,7 @@ static int pmatch(char *pattern, const char *string) p++; if (*p == (char)CTLESC) p++; - else if (*p == CTLMBCHAR) { + else if (*p == (char)CTLMBCHAR) { mbp = mbnext(p); p += mbp & 0xff; p += mbp >> 8; diff --git a/src/parser.c b/src/parser.c index eb402a7..5714958 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1240,7 +1240,7 @@ checkend: { markloc = out - (char *)stackblock(); for (p = eofmark; STPUTC(c, out), *p; p++) { - if (c != *p) + if (c != (signed char)*p) goto more_heredoc; c = pgetc(); From 74adb5aaa63a857d5db678bd873fcd6a193da8d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 31 Oct 2025 13:10:00 +0800 Subject: [PATCH 186/401] options: Call input_init before using stdin_istty Call input_init before using stdin_istty as otherwise it will always be true. Reported-by: Juergen Daubert Fixes: 6abb589582c6 ("input: Call input_init on first use") Signed-off-by: Herbert Xu --- src/input.c | 2 -- src/input.h | 1 + src/options.c | 7 +++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/input.c b/src/input.c index c36d120..c7236d0 100644 --- a/src/input.c +++ b/src/input.c @@ -79,8 +79,6 @@ struct parsefile *parsefile = &basepf; /* current input file */ int whichprompt; /* 1 == PS1, 2 == PS2 */ int stdin_istty = -1; -MKINIT void input_init(void); - STATIC void pushfile(void); static void popstring(void); static int preadfd(void); diff --git a/src/input.h b/src/input.h index 706ac73..59dc5a1 100644 --- a/src/input.h +++ b/src/input.h @@ -119,6 +119,7 @@ void unwindfiles(struct parsefile *); void popallfiles(void); void flush_input(void); void reset_input(void); +void input_init(void); static inline int input_get_lleft(struct parsefile *pf) { diff --git a/src/options.c b/src/options.c index c318e0f..c4eedeb 100644 --- a/src/options.c +++ b/src/options.c @@ -138,8 +138,11 @@ procargs(char **xargv) sh_error("-c requires an argument"); sflag = 1; } - if (iflag == 2 && sflag == 1 && stdin_istty && isatty(2)) - iflag = 1; + if (iflag == 2 && sflag == 1) { + input_init(); + if (stdin_istty && isatty(2)) + iflag = 1; + } if (mflag == 2) mflag = iflag; for (i = 0; i < NOPTS; i++) From d22fb976bfac796635efc8eeeff1a8711dda24f9 Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Fri, 16 Jan 2026 21:55:39 +0100 Subject: [PATCH 187/401] expand: Fix SIGBUS due to unaligned access Replace direct uint64_t dereference with memcpy() to avoid SIGBUS on armv7 when p is not 8-byte aligned. Fixes crashes in dash during IFS splitting on strict-alignment architectures. Signed-off-by: Natanael Copa Signed-off-by: Herbert Xu --- src/expand.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/expand.c b/src/expand.c index 8c8bf0e..eed8150 100644 --- a/src/expand.c +++ b/src/expand.c @@ -961,13 +961,15 @@ static size_t memtodest(const char *p, size_t len, int flags) if (likely(!(flags & (expq >> 3 | expq >> 4 | expq >> 8) & (QUOTES_ESC | EXP_MBCHAR)))) { while (len >= 8) { - uint64_t x = *(uint64_t *)(p + count); + uint64_t x; + + memcpy(&x, p + count, sizeof(x)); if ((x | (x - 0x0101010101010101)) & 0x8080808080808080) break; - *(uint64_t *)(q + count) = x; + memcpy(q + count, &x, sizeof(x)); count += 8; len -= 8; @@ -1335,7 +1337,7 @@ ifsbreakup(char *string, int maxargs, struct arglist *arglist) unsigned char b[8]; } x; - x.qw = *(uint64_t *)p; + memcpy(&x.qw, p, sizeof(x.qw)); if ((x.qw & 0x8080808080808080)) break; From 67dbc1e463f901b053d82f622b66544e5c0baf11 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 Jan 2026 12:31:35 +0800 Subject: [PATCH 188/401] shell: Add support for building without tee(2) In order to test the fallback path when tee(2) is not available, allow tee(2) to be disabled with --disable-tee. Signed-off-by: Herbert Xu --- configure.ac | 7 +++++++ src/input.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5efd9e2..89759c5 100644 --- a/configure.ac +++ b/configure.ac @@ -40,6 +40,7 @@ fi AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--enable-fnmatch, \ [Use fnmatch(3) from libc])) AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) +AC_ARG_ENABLE(tee, AS_HELP_STRING(--disable-tee, [Do not use tee(2)])) dnl Checks for libraries. @@ -131,6 +132,12 @@ if test "$use_fnmatch" = yes && test "$enable_glob" = yes; then AC_CHECK_FUNCS(glob) fi +if test "$enable_tee" != no; then + AC_DEFINE([USE_TEE], [1], [Non-zero if tee(2) should be used]) +else + AC_DEFINE([USE_TEE], [0], [Non-zero if tee(2) should be used]) +fi + dnl Check for klibc signal. AC_CHECK_FUNC(signal) if test "$ac_cv_func_signal" != yes; then diff --git a/src/input.c b/src/input.c index c7236d0..1a9f9b8 100644 --- a/src/input.c +++ b/src/input.c @@ -183,7 +183,7 @@ static int stdin_tee(void *buf, int nr) flush_tee(buf, nr, stdin_state.pending); - err = tee(0, stdin_state.pip[1], nr, 0); + err = USE_TEE ? tee(0, stdin_state.pip[1], nr, 0) : -1; stdin_state.pending = err; return err; } From c7f78932d63feb1cf3c5418369e09085e6812c24 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Feb 2026 13:04:21 +0800 Subject: [PATCH 189/401] shell: Add support for disabling memfd_create(3) In order to test the fallback path for systems without memfd_create(3), add a --disable-memfd-create argument to the configure script. Signed-off-by: Herbert Xu --- configure.ac | 10 ++++++++++ src/redir.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 89759c5..cd22d28 100644 --- a/configure.ac +++ b/configure.ac @@ -41,6 +41,8 @@ AC_ARG_ENABLE(fnmatch, AS_HELP_STRING(--enable-fnmatch, \ [Use fnmatch(3) from libc])) AC_ARG_ENABLE(glob, AS_HELP_STRING(--enable-glob, [Use glob(3) from libc])) AC_ARG_ENABLE(tee, AS_HELP_STRING(--disable-tee, [Do not use tee(2)])) +AC_ARG_ENABLE(memfd_create, AS_HELP_STRING(--disable-memfd-create, + [Do not use memfd_create(3)])) dnl Checks for libraries. @@ -138,6 +140,14 @@ else AC_DEFINE([USE_TEE], [0], [Non-zero if tee(2) should be used]) fi +if test "$enable_memfd_create" != no; then + AC_DEFINE([USE_MEMFD_CREATE], [1], + [Non-zero if memfd_create(3) should be used]) +else + AC_DEFINE([USE_MEMFD_CREATE], [0], + [Non-zero if memfd_create(3) should be used]) +fi + dnl Check for klibc signal. AC_CHECK_FUNC(signal) if test "$ac_cv_func_signal" != yes; then diff --git a/src/redir.c b/src/redir.c index 8d1c8f6..e61d3da 100644 --- a/src/redir.c +++ b/src/redir.c @@ -331,7 +331,7 @@ static void dupredirect(union node *redir, int f) int sh_pipe(int pip[2], int memfd) { if (memfd) { - pip[0] = memfd_create("dash", 0); + pip[0] = USE_MEMFD_CREATE ? memfd_create("dash", 0) : -1; if (pip[0] >= 0) { pip[1] = sh_dup2(pip[0], -1, pip[0]); return 1; From 8347bea6fd658ae665582d6ea103d7cc1859e812 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 Feb 2026 13:36:03 +0800 Subject: [PATCH 190/401] jobs: Fix here-document crash When forkchild is called while expanding here-documents, the job pointer is set to NULL. If this is the case, there is no need to free the job pointer and other outstanding jobs. Simply return if this is the case. Reported-by: Hadrien LACOUR Fixes: 9881d00e939e ("jobs: Preserve parent jobs for simple commands") Signed-off-by: Herbert Xu --- src/jobs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/jobs.c b/src/jobs.c index 6622bdd..36c6261 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -919,6 +919,9 @@ static void forkchild(struct job *jp, union node *n, int mode) if (lvforked) return; + if (!jp) + return; + freejob(jp); if (issimplecmd(n, JOBSCMD->name)) From 079059a9eca0610cb495a1a8fe85acc2c38111d6 Mon Sep 17 00:00:00 2001 From: Tiago de Paula Date: Wed, 11 Feb 2026 23:29:44 -0800 Subject: [PATCH 191/401] input: Fix heap-buffer-overflow in preadbuffer on long lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update lleft before calling preadfd so buffered data is preserved when refilling the buffer during long line scanning, preventing out-of-bounds reads. Reported-by: Jyri-Matti Lähteenmäki Fixes: a065467315dc ("input: Move newline loop into preadbuffer") Signed-off-by: Tiago de Paula Signed-off-by: Herbert Xu --- src/input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/input.c b/src/input.c index 1a9f9b8..71282fb 100644 --- a/src/input.c +++ b/src/input.c @@ -384,6 +384,7 @@ static int preadbuffer(void) again: nr = q - parsefile->nextc; + input_set_lleft(parsefile, nr); more = preadfd(); q = parsefile->nextc + nr; if (more <= 0) { From e8c0324af64c68f59d063ecfa473c0793d1c0c98 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Mar 2026 14:44:53 +0900 Subject: [PATCH 192/401] parser: Catch bogus case patterns If the token in a case statement pattern is not a TWORD, do not insert it into the pattern list. Instead abort the parsing with a syntax error. Reported-by: Aleksander Ushakov Signed-off-by: Herbert Xu --- src/parser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser.c b/src/parser.c index 5714958..bbee1e3 100644 --- a/src/parser.c +++ b/src/parser.c @@ -442,6 +442,8 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); cp->type = NCLIST; app = &cp->nclist.pattern; for (;;) { + if (lasttoken != TWORD) + synexpect(TWORD); *app = ap = (union node *)stalloc(sizeof (struct narg)); ap->type = NARG; ap->narg.text = wordtext; From 53845fd6bfd3690a93e7ffb3a2220c7023590d5f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Mar 2026 18:30:59 +0900 Subject: [PATCH 193/401] builtin: Fix read overflow in umaskcmd If the first character after ugw is NUL, emit an error immediately instead of continuing to read past the NUL. Reported-by: Aleksander Ushakov Signed-off-by: Herbert Xu --- src/miscbltin.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/miscbltin.c b/src/miscbltin.c index 5aa2b24..e66666c 100644 --- a/src/miscbltin.c +++ b/src/miscbltin.c @@ -274,7 +274,10 @@ umaskcmd(int argc, char **argv) } if (!positions) positions = 0111; /* default is a */ - if (!strchr("=+-", op = *ap)) + op = *ap; + if (!op) + goto error; + if (!strchr("=+-", op)) break; ap++; new_val = 0; @@ -314,6 +317,7 @@ umaskcmd(int argc, char **argv) break; } if (*ap) { +error: sh_error("Illegal mode: %s", *argptr); return 1; } From 57ed27014008dcb4668a5dcee5cc55f99f69fd2a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 15 Mar 2026 21:45:08 +0900 Subject: [PATCH 194/401] expand: Use memmove when copying multi-byte chars in rmescapes Use memmove instead of mempcpy in rmescapes as the two regions can indeed overlap when escaps are removed in place. Fixes: 990bbd15346d ("expand: Process multi-byte characters in subevalvar") Reported-by: Aleksander Ushakov Signed-off-by: Herbert Xu --- src/expand.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/expand.c b/src/expand.c index eed8150..bbf8454 100644 --- a/src/expand.c +++ b/src/expand.c @@ -2128,7 +2128,8 @@ _rmescapes(char *str, int flag) tail = 0; } - q = mempcpy(q, p, ml); + memmove(q, p, ml); + q += ml; p += ml + tail; goto setnesc; } From d029fb8c56f600d14bd7dce327c6e9e3aab50227 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Mar 2026 13:24:32 +0900 Subject: [PATCH 195/401] jobs: Make stoppedjobs trivial if JOBS == 0 When job control is disabled or unavailable, stoppedjobs doesn't need to do any work. Reported-by: Denys Vlasenko Signed-off-by: Herbert Xu --- src/jobs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jobs.c b/src/jobs.c index 36c6261..4aa65b6 100644 --- a/src/jobs.c +++ b/src/jobs.c @@ -1237,6 +1237,8 @@ stoppedjobs(void) int retval; retval = 0; + if (!JOBS) + goto out; if (job_warning) goto out; jp = curjob; From b308564a1ff6530f9fd3e7dd7b19aabae32ed375 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 21 Mar 2026 17:59:26 +0900 Subject: [PATCH 196/401] Release 0.5.13.2. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index cd22d28..c37eefe 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([dash],[0.5.13.1]) +AC_INIT([dash],[0.5.13.2]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) From 8341dbd6b4e3ef3c6e9939578e455f9ec39e6d06 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 12 Apr 2026 17:45:05 +0800 Subject: [PATCH 197/401] parser: Only reject non-word tokens in case pattern Allow keywords to be used as case patterns and only reject non-word tokens. Reported-by: Nicolas Cavallari Fixes: e8c0324af64c ("parser: Catch bogus case patterns") Signed-off-by: Herbert Xu --- src/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.c b/src/parser.c index bbee1e3..bea4148 100644 --- a/src/parser.c +++ b/src/parser.c @@ -442,7 +442,7 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); cp->type = NCLIST; app = &cp->nclist.pattern; for (;;) { - if (lasttoken != TWORD) + if (lasttoken < TWORD) synexpect(TWORD); *app = ap = (union node *)stalloc(sizeof (struct narg)); ap->type = NARG; From 0034bfe185d3d875cebace8cb3ca5c9dabf9e0f3 Mon Sep 17 00:00:00 2001 From: Muchen Hou <996029583@qq.com> Date: Mon, 13 Apr 2026 10:28:29 +0800 Subject: [PATCH 198/401] arith: Fix CVE-2026-31323 INTMAX_MIN / -1 overflow Division and remainder currently guard against division by zero, but not against the signed overflow case INTMAX_MIN / -1. On affected systems this can trigger SIGFPE during arithmetic expansion. Add an explicit guard before evaluating division or remainder. Signed-off-by: Muchen Hou <996029583@qq.com> Merge the overflow check with the zero division check. Signed-off-by: Herbert Xu --- src/arith_yacc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arith_yacc.c b/src/arith_yacc.c index 1a087c3..b978ef0 100644 --- a/src/arith_yacc.c +++ b/src/arith_yacc.c @@ -98,8 +98,8 @@ static intmax_t do_binop(int op, intmax_t a, intmax_t b) default: case ARITH_REM: case ARITH_DIV: - if (!b) - yyerror("division by zero"); + if (!b || (a == INTMAX_MIN && b == -1)) + yyerror("division error"); return op == ARITH_REM ? a % b : a / b; case ARITH_MUL: return a * b; From 41457be81fa7b45af9d9ca402454fee0e555620f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 14 Apr 2026 07:45:08 +0800 Subject: [PATCH 199/401] input: Fix EINTR handling when reading from a pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Errors from tee(2) should not lead to the fallback path in every case. In fact, only EINVAL should trigger an attempt to call read(2). Every other error (and zero == EOF) returned by tee(2) should be treated as if it came from read(2). Reported-by: Ignacy Gawędzki Signed-off-by: Herbert Xu --- configure.ac | 4 ++++ src/input.c | 11 ++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index c37eefe..d749440 100644 --- a/configure.ac +++ b/configure.ac @@ -135,6 +135,10 @@ if test "$use_fnmatch" = yes && test "$enable_glob" = yes; then fi if test "$enable_tee" != no; then + AC_CHECK_FUNCS(tee, use_tee=yes) +fi + +if test "$use_tee" = yes; then AC_DEFINE([USE_TEE], [1], [Non-zero if tee(2) should be used]) else AC_DEFINE([USE_TEE], [0], [Non-zero if tee(2) should be used]) diff --git a/src/input.c b/src/input.c index 71282fb..4e30010 100644 --- a/src/input.c +++ b/src/input.c @@ -183,7 +183,12 @@ static int stdin_tee(void *buf, int nr) flush_tee(buf, nr, stdin_state.pending); - err = USE_TEE ? tee(0, stdin_state.pip[1], nr, 0) : -1; + if (USE_TEE) + err = tee(0, stdin_state.pip[1], nr, 0); + else { + errno = EINVAL; + err = -1; + } stdin_state.pending = err; return err; } @@ -324,13 +329,13 @@ preadfd(void) if (!fd && !stdin_bufferable()) { nr = stdin_tee(buf, nr); fd = stdin_state.pip[0]; - if (nr <= 0) { + if (nr < 0 && errno == EINVAL) { fd = 0; nr = 1; } } - if (nr >= 0) + if (nr > 0) nr = read(fd, buf, nr); if (nr < 0) { From 601bc50bfc2858ab7a9ec327fe4e33a9c4877759 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Apr 2026 11:08:13 +0800 Subject: [PATCH 200/401] Release 0.5.13.3. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index d749440..dd135e9 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([dash],[0.5.13.2]) +AC_INIT([dash],[0.5.13.3]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) From e8c6952f14f332f6d0e7de3d0c9b3e91c50676f2 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 Dec 2017 10:36:17 -0800 Subject: [PATCH 201/401] another todo note about backticks --- Makefile.am | 8 ++++++++ src/error.c | 1 - src/input.c | 3 +-- src/input.h | 1 + src/main.c | 5 +++++ 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Makefile.am b/Makefile.am index af437a6..4910c12 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1 +1,9 @@ SUBDIRS = src + +lib : libdash.a libdash.so.1.0.1 + +libdash.a : + ar rcs $@ src/*.o + +libdash.so.1.0.1 : + gcc -shared -o $@ src/*.o diff --git a/src/error.c b/src/error.c index 728ff88..66c79f8 100644 --- a/src/error.c +++ b/src/error.c @@ -78,7 +78,6 @@ static void exverror(int, const char *, va_list) void exraise(int e) { -#ifdef DEBUG if (handler == NULL) abort(); #endif diff --git a/src/input.c b/src/input.c index 4e30010..3a9758b 100644 --- a/src/input.c +++ b/src/input.c @@ -82,7 +82,6 @@ int stdin_istty = -1; STATIC void pushfile(void); static void popstring(void); static int preadfd(void); -static void setinputfd(int fd, int push); static int preadbuffer(void); #ifdef mkinit @@ -571,7 +570,7 @@ setinputfile(const char *fname, int flags) * interrupts off. */ -static void +void setinputfd(int fd, int push) { pushfile(); diff --git a/src/input.h b/src/input.h index 59dc5a1..3087a36 100644 --- a/src/input.h +++ b/src/input.h @@ -112,6 +112,7 @@ void pungetc(void); void pungetn(int); void pushstring(char *, void *); int setinputfile(const char *, int); +void setinputfd(int fd, int push); void setinputstring(char *); void pushstdin(void); void popfile(void); diff --git a/src/main.c b/src/main.c index 5d25b8d..dfbe719 100644 --- a/src/main.c +++ b/src/main.c @@ -78,7 +78,10 @@ MKINIT struct jmploc main_handler; STATIC void read_profile(const char *); STATIC char *find_dot_file(char *); static int cmdloop(int); + +#ifdef MAIN int main(int, char **); +#endif /* * Main routine. We initialize things, parse the arguments, execute @@ -88,6 +91,7 @@ int main(int, char **); * is used to figure out how far we had gotten. */ +#ifdef MAIN int main(int argc, char **argv) { @@ -194,6 +198,7 @@ main(int argc, char **argv) /* NOTREACHED */ } +#endif /* * Read and execute commands. "Top" is nonzero for the top level command From 8b6da2a3e589246e0b40c32f6e59b2e5bc5210e3 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 09:36:07 -0400 Subject: [PATCH 202/401] expose hooks --- src/alias.c | 4 +--- src/alias.h | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/alias.c b/src/alias.c index 1b9b979..9da2da6 100644 --- a/src/alias.c +++ b/src/alias.c @@ -48,11 +48,9 @@ struct alias *atab[ATABSIZE]; -STATIC void setalias(const char *, const char *); STATIC struct alias *freealias(struct alias *); STATIC struct alias **__lookupalias(const char *); -STATIC void setalias(const char *name, const char *val) { @@ -187,7 +185,7 @@ unaliascmd(int argc, char **argv) return (i); } -STATIC struct alias * +struct alias * freealias(struct alias *ap) { struct alias *next; diff --git a/src/alias.h b/src/alias.h index fb841d6..d4ec646 100644 --- a/src/alias.h +++ b/src/alias.h @@ -45,6 +45,7 @@ struct alias { }; struct alias *lookupalias(const char *, int); +void setalias(const char *, const char *); int aliascmd(int, char **); int unaliascmd(int, char **); void rmaliases(void); From 48f5b8d34a593491787d3f3d761329ebca1319f4 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 09:36:26 -0400 Subject: [PATCH 203/401] parser command with builtin exception handler --- src/parser.c | 24 ++++++++++++++++++++++++ src/parser.h | 3 +++ 2 files changed, 27 insertions(+) diff --git a/src/parser.c b/src/parser.c index bea4148..3065837 100644 --- a/src/parser.c +++ b/src/parser.c @@ -55,6 +55,7 @@ #include "var.h" #include "error.h" #include "memalloc.h" +#include "init.h" /* MMG 2018-09-25 for reset() */ #include "mystring.h" #include "alias.h" #include "show.h" @@ -164,9 +165,32 @@ parsecmd(int interact) if (doprompt) setprompt(doprompt); needprompt = 0; + return list(1); } +/* MMG 2018-09-25 manually install a handler here */ +union node * +parsecmd_safe(int interact) +{ + struct jmploc jmploc; + + tokpushback = 0; + checkkwd = 0; + heredoclist = 0; + doprompt = interact; + if (doprompt) + setprompt(doprompt); + needprompt = 0; + + if (unlikely(setjmp(jmploc.loc))) { + reset(); + return NERR; + } + handler = &jmploc; + + return list(1); +} STATIC union node * list(int nlflag) diff --git a/src/parser.h b/src/parser.h index 7a9605b..b415aef 100644 --- a/src/parser.h +++ b/src/parser.h @@ -83,6 +83,8 @@ union node; extern int lasttoken; extern int tokpushback; #define NEOF ((union node *)&tokpushback) +/* MMG 2018-09-25 similar story for an error return value */ +#define NERR ((union node *)&lasttoken) extern int whichprompt; /* 1 == PS1, 2 == PS2 */ extern int checkkwd; @@ -90,6 +92,7 @@ extern int checkkwd; int isassignment(const char *p); int issimplecmd(union node *n, const char *name); union node *parsecmd(int); +union node *parsecmd_safe(int); void fixredir(union node *, const char *, int); const char *getprompt(void *); const char *const *findkwd(const char *); From 5d87991041b773656b5077ce2acbee2d932badc9 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 09:44:35 -0400 Subject: [PATCH 204/401] fix POSIX compliance bug, per bug report of 2017-01-19 --- src/var.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/var.c b/src/var.c index eb4075f..0ebab25 100644 --- a/src/var.c +++ b/src/var.c @@ -327,7 +327,7 @@ lookupvar(const char *name) intmax_t lookupvarint(const char *name) { - return atomax(lookupvar(name) ?: nullstr, 0); + return atomax(lookupvar(name) ?: "0", 0); } From ae0450f4416edaa1333d2ff60c56aa2b31e4a410 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 09:48:56 -0400 Subject: [PATCH 205/401] missing #ifdef --- src/error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/error.c b/src/error.c index 66c79f8..728ff88 100644 --- a/src/error.c +++ b/src/error.c @@ -78,6 +78,7 @@ static void exverror(int, const char *, va_list) void exraise(int e) { +#ifdef DEBUG if (handler == NULL) abort(); #endif From 2334210820b2a1ffbb83b1f7bba8247948840551 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 09:49:44 -0400 Subject: [PATCH 206/401] update gitignore --- src/bltin/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/bltin/.gitignore diff --git a/src/bltin/.gitignore b/src/bltin/.gitignore new file mode 100644 index 0000000..ec96903 --- /dev/null +++ b/src/bltin/.gitignore @@ -0,0 +1,2 @@ +.deps +.dirstamp From d496719a2460a773ee2a717f190c663962fec959 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 09:54:28 -0400 Subject: [PATCH 207/401] readme --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..0c925df --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +*libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. + +# How to use the parser + +The ideal interface to use is `parsecmd_safe` in `parser.c`. Parsing the POSIX shell is a complicated affair: beyond the usual locale issues, aliases affect the lexer, so one must use `setalias` and `unalias` to manage any aliases that ought to exist. + +# How work with the parsed nodes + +The general AST is described in `nodes.h`. There are some tricky invariants around the precise formatting of control codes; the OCaml code shows some examples of working with the `args` fields. From f8e8ee3c57f7fd74275c329c54c01720b8f9ffeb Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 5 May 2017 15:23:21 -0700 Subject: [PATCH 208/401] starting in on CLI --- .gitignore | 9 + Makefile | 17 ++ ast.ml | 399 +++++++++++++++++++++++++++++ compile.ml | 161 ++++++++++++ dash.ml | 520 ++++++++++++++++++++++++++++++++++++++ expansion.ml | 59 +++++ main.ml | 27 ++ round_trip.sh | 22 ++ test.c | 426 +++++++++++++++++++++++++++++++ tests/braces_amp.sh | 5 + tests/diverge.sh | 3 + tests/escaping | 1 + tests/for_spaces.sh | 8 + tests/grab_submissions.sh | 15 ++ tests/grade.sh | 60 +++++ tests/run_grader.sh | 26 ++ tests/run_lda.sh | 57 +++++ tests/send_emails.sh | 17 ++ tests/syntax | 35 +++ tests/test.sh | 5 + tests/timeout3 | 92 +++++++ 21 files changed, 1964 insertions(+) create mode 100644 Makefile create mode 100644 ast.ml create mode 100644 compile.ml create mode 100644 dash.ml create mode 100644 expansion.ml create mode 100644 main.ml create mode 100755 round_trip.sh create mode 100644 test.c create mode 100644 tests/braces_amp.sh create mode 100755 tests/diverge.sh create mode 100644 tests/escaping create mode 100755 tests/for_spaces.sh create mode 100755 tests/grab_submissions.sh create mode 100755 tests/grade.sh create mode 100755 tests/run_grader.sh create mode 100755 tests/run_lda.sh create mode 100755 tests/send_emails.sh create mode 100644 tests/syntax create mode 100644 tests/test.sh create mode 100755 tests/timeout3 diff --git a/.gitignore b/.gitignore index e349901..08ba06a 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,12 @@ Makefile .Spotlight* .Trash* *[Tt]humbs.db + +test +*.native +*~ +*.o +*.cmx +*.cmi +_build +test.err diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a1b894e --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +DASH=$(shell (cd ../dash; pwd)) +DASHSRC=$(DASH)/src + +TESTING : main.native $(wildcard tests/*) + @for f in tests/*; do \ + ./round_trip.sh ./main.native $$f 2>test.err; \ + done + +main.native : main.ml dash.ml ast.ml compile.ml + ocamlbuild -no-hygiene -pkg ctypes.foreign -lflags "-cclib -force_load $(DASH)/libdash.a" $@ + +test : test.c + gcc -Wall -I $(DASHSRC) -L $(DASH) -ldash $^ -o $@ + +clean : + rm -f *.o test *~ *.cmi *.cmx main.native + rm -rf _build diff --git a/ast.ml b/ast.ml new file mode 100644 index 0000000..ae91a2b --- /dev/null +++ b/ast.ml @@ -0,0 +1,399 @@ +type linno = int + +type t = + | Command of linno * assign list * args * redirection list (* assign, args, redir *) + | Pipe of bool * t list (* background?, commands *) + | Redir of linno * t * redirection list + | Background of linno * t * redirection list + | Subshell of linno * t * redirection list + | And of t * t + | Or of t * t + | Not of t + | Semi of t * t + | If of t * t * t (* cond, then, else *) + | While of t * t (* test, body *) (* until encoded as a While . Not *) + | For of linno * arg * t * string (* args, body, var *) + | Case of linno * arg * case list + | Defun of linno * string * t (* name, body *) + and assign = string * arg + and redirection = + | File of redir_type * int * arg + | Dup of dup_type * int * int + | Heredoc of heredoc_type * int * arg + and redir_type = To | Clobber | From | FromTo | Append + and dup_type = ToFD | FromFD + and heredoc_type = Here | XHere (* for when in a quote... not sure when this comes up *) + and args = arg list + and arg = arg_char list + and arg_char = + | C of char + | E of char (* escape... necessary for expansion *) + | A of arg (* arith *) + | V of var_type * bool (* VSNUL? *) * string * arg + | Q of arg (* quoted *) + | B of t (* backquote *) + and var_type = + | Normal + | Minus + | Plus + | Question + | Assign + | TrimR + | TrimRMax + | TrimL + | TrimLMax + | Length + and case = { cpattern : arg; cbody : t } + +let var_type = function + | 0x0 -> (* VSNORMAL ${var} *) Normal + | 0x2 -> (* VSMINUS ${var-text} *) Minus + | 0x3 -> (* VSPLUS ${var+text} *) Plus + | 0x4 -> (* VSQUESTION ${var?message} *) Question + | 0x5 -> (* VSASSIGN ${var=text} *) Assign + | 0x6 -> (* VSTRIMRIGHT ${var%pattern} *) TrimR + | 0x7 -> (* VSTRIMRIGHTMAX ${var%%pattern} *) TrimRMax + | 0x8 -> (* VSTRIMLEFT ${var#pattern} *) TrimL + | 0x9 -> (* VSTRIMLEFTMAX ${var##pattern} *) TrimLMax + | 0xa -> (* VSLENGTH ${#var}) *) Length + | vs -> failwith ("Unknown VSTYPE: " ^ string_of_int vs) + +let string_of_var_type = function + | Normal -> "" + | Minus -> "-" + | Plus -> "+" + | Question -> "?" + | Assign -> "=" + | TrimR -> "%" + | TrimRMax -> "%%" + | TrimL -> "#" + | TrimLMax -> "##" + | Length -> "#" + +(* Some possible further simplifications: + + * Drop bool from pipe + dash *always* forks for a pipe, but sometimes it waits + * Drop redirection from Command, etc. + Just use Redir... though this may affect subshell behavior. + NCMD: expredir, pushredir, redirectsafe REDIR_PUSH|REDIR_SAVEFD2 + NREDIR: expredir, pushredir, redirectsafe REDIR_PUSH + NBACKGND: expredir, redirect 0 +*) + + +open Ctypes +open Foreign +open Dash + +let skip = Command (-1,[],[],[]) + +let rec of_node (n : node union ptr) : t = + match (n @-> node_type) with + (* NCMD *) + | 0 -> + let n = n @-> node_ncmd in + Command (getf n ncmd_linno, + to_assigns (getf n ncmd_assign), + to_args (getf n ncmd_args), + redirs (getf n ncmd_redirect)) + (* NPIPE *) + | 1 -> + let n = n @-> node_npipe in + Pipe (getf n npipe_backgnd <> 0, + List.map of_node (nodelist (getf n npipe_cmdlist))) + (* NREDIR *) + | 2 -> let (ty,fd,arg) = of_nredir n in Redir (ty,fd,arg) + (* NBACKGND *) + | 3 -> let (ty,fd,arg) = of_nredir n in Background (ty,fd,arg) + (* NSUBSHELL *) + | 4 -> let (ty,fd,arg) = of_nredir n in Subshell (ty,fd,arg) + (* NAND *) + | 5 -> let (l,r) = of_binary n in And (l,r) + (* NOR *) + | 6 -> let (l,r) = of_binary n in Or (l,r) + (* NSEMI *) + | 7 -> let (l,r) = of_binary n in Semi (l,r) + (* NIF *) + | 8 -> + let n = n @-> node_nif in + let else_part = getf n nif_elsepart in + If (of_node (getf n nif_test), + of_node (getf n nif_ifpart), + if nullptr else_part + then skip + else of_node else_part) + (* NWHILE *) + | 9 -> let (t,b) = of_binary n in While (t,b) + (* NUNTIL *) + | 10 -> let (t,b) = of_binary n in While (Not t,b) + (* NFOR *) + | 11 -> + let n = n @-> node_nfor in + For (getf n nfor_linno, + to_arg (getf n nfor_args @-> node_narg), + of_node (getf n nfor_body), + getf n nfor_var) + (* NCASE *) + | 12 -> + let n = n @-> node_ncase in + Case (getf n ncase_linno, + to_arg (getf n ncase_expr @-> node_narg), + List.map + (fun (pattern,body) -> + { cpattern = to_arg (pattern @-> node_narg); + cbody = of_node body}) + (caselist (getf n ncase_cases))) + (* NDEFUN *) + | 14 -> + let n = n @-> node_ndefun in + Defun (getf n ndefun_linno, + getf n ndefun_text, + of_node (getf n ndefun_body)) + (* NNOT *) + | 25 -> Not (of_node (getf (n @-> node_nnot) nnot_com)) + | nt -> failwith ("Unexpected top level node_type " ^ string_of_int nt) + +and of_nredir (n : node union ptr) = + let n = n @-> node_nredir in + (getf n nredir_linno, of_node (getf n nredir_n), redirs (getf n nredir_redirect)) + +and redirs (n : node union ptr) = + if nullptr n + then [] + else + let mk_file ty = + let n = n @-> node_nfile in + File (ty,getf n nfile_fd,to_arg (getf n nfile_fname @-> node_narg)) in + let mk_dup ty = + let n = n @-> node_ndup in + Dup (ty,getf n ndup_fd,getf n ndup_dupfd) in + let mk_here ty = + let n = n @-> node_nhere in + Heredoc (ty,getf n nhere_fd,to_arg (getf n nhere_doc @-> node_narg)) in + let h = match n @-> node_type with + (* NTO *) + | 16 -> mk_file To + (* NCLOBBER *) + | 17 -> mk_file Clobber + (* NFROM *) + | 18 -> mk_file From + (* NFROMTO *) + | 19 -> mk_file FromTo + (* NAPPEND *) + | 20 -> mk_file Append + (* NTOFD *) + | 21 -> mk_dup ToFD + (* NFROMFD *) + | 22 -> mk_dup FromFD + (* NHERE quoted heredoc---no expansion)*) + | 23 -> mk_here Here + (* NXHERE unquoted heredoc (param/command/arith expansion) *) + | 24 -> mk_here XHere + | nt -> failwith ("unexpected node_type in redirlist: " ^ string_of_int nt) + in + h :: redirs (getf (n @-> node_nfile) nfile_next) + +and of_binary (n : node union ptr) = + let n = n @-> node_nbinary in + (of_node (getf n nbinary_ch1), of_node (getf n nbinary_ch2)) + +and to_arg (n : narg structure) : arg = + let a,s,bqlist,stack = parse_arg (explode (getf n narg_text)) (getf n narg_backquote) [] in + (* we should have used up the string and have no backquotes left in our list *) + assert (s = []); + assert (nullptr bqlist); + assert (stack = []); + a + +and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = + match s,stack with + | [],[] -> [],[],bqlist,[] + | [],`CTLVar::_ -> failwith "End of string before CTLENDVAR" + | [],`CTLAri::_ -> failwith "End of string before CTLENDARI" + | [],`CTLQuo::_ -> failwith "End of string before CTLQUOTEMARK" + (* CTLESC *) + | '\129'::c::s,_ -> arg_char (E c) s bqlist stack + (* CTLVAR *) + | '\130'::t::s,_ -> + let var_name,s = split_at (fun c -> c = '=') s in + let t = int_of_char t in + let v,s,bqlist,stack = match t land 0x0f, s with + (* VSNORMAL and VSLENGTH get special treatment + + neither ever gets VSNUL + VSNORMAL is terminated just with the =, without a CTLENDVAR *) + (* VSNORMAL *) + | 0x1,'='::s -> + V (Normal,false,implode var_name,[]),s,bqlist,stack + (* VSLENGTH *) + | 0xa,'='::'\131'::s -> + V (Length,false,implode var_name,[]),s,bqlist,stack + | 0x1,c::_ | 0xa,c::_ -> + failwith ("Missing CTLENDVAR for VSNORMAL/VSLENGTH, found " ^ Char.escaped c) + (* every other VSTYPE takes mods before CTLENDVAR *) + | vstype,'='::s -> + let a,s,bqlist,stack' = parse_arg s bqlist (`CTLVar::stack) in + V (var_type vstype,t land 0x10 = 1,implode var_name,a), s, bqlist, stack' + | _,c::_ -> failwith ("Expected '=' terminating variable name, found " ^ Char.escaped c) + | _,[] -> failwith "Expected '=' terminating variable name, found EOF" + in + arg_char v s bqlist stack + (* CTLENDVAR *) + | '\131'::s,`CTLVar::stack' -> [],s,bqlist,stack' + | '\131'::_,`CTLAri::_ -> failwith "Saw CTLENDVAR before CTLENDARI" + | '\131'::_,`CTLQuo::_ -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" + | '\131'::_,[] -> failwith "Saw CTLENDVAR outside of CTLVAR" + (* CTLBACKQ *) + | '\132'::s,_ -> + if nullptr bqlist + then failwith "Saw CTLBACKQ but bqlist was null" + else arg_char (B (of_node (bqlist @-> nodelist_n))) s (bqlist @-> nodelist_next) stack + (* CTLARI *) + | '\134'::s,_ -> + let a,s,bqlist,stack' = parse_arg s bqlist (`CTLAri::stack) in + assert (stack = stack'); + arg_char (A a) s bqlist stack' + (* CTLENDARI *) + | '\135'::s,`CTLAri::stack' -> [],s,bqlist,stack' + | '\135'::_,`CTLVar::_' -> failwith "Saw CTLENDARI before CTLENDVAR" + | '\135'::_,`CTLQuo::_' -> failwith "Saw CTLENDARI before CTLQUOTEMARK" + | '\135'::_,[] -> failwith "Saw CTLENDARI outside of CTLARI" + (* CTLQUOTEMARK *) + | '\136'::s,`CTLQuo::stack' -> [],s,bqlist,stack' + | '\136'::s,_ -> + let a,s,bqlist,stack' = parse_arg s bqlist (`CTLQuo::stack) in + assert (stack' = stack); + arg_char (Q a) s bqlist stack' + (* ordinary character *) + | c::s,_ -> + arg_char (C c) s bqlist stack + +and arg_char c s bqlist stack = + let a,s,bqlist,stack = parse_arg s bqlist stack in + (c::a,s,bqlist,stack) + +and to_assign v = function + | [] -> failwith ("Never found an '=' sign in assignment, got " ^ implode v) + | C '=' :: a -> (implode v,a) + | C c :: a -> to_assign (v @ [c]) a + | _ -> failwith "Unexpected special character in assignment" + +and to_assigns n = List.map (to_assign []) (to_args n) + +and to_args (n : node union ptr) : args = + if nullptr n + then [] + else (assert (n @-> node_type = 15); + let n = n @-> node_narg in + to_arg n::to_args (getf n narg_next)) + +let separated f l = intercalate " " (List.map f l) + +let show_unless expected actual = + if expected = actual + then "" + else string_of_int actual + +let braces s = "{ " ^ s ^ " ; }" + +let background s = "{ " ^ s ^ " & }" + +let rec to_string = function + | Command (_,assigns,cmds,redirs) -> + separated string_of_assign assigns ^ + (if List.length assigns = 0 || List.length cmds = 0 then "" else " ") ^ + separated string_of_arg cmds ^ string_of_redirs redirs + | Pipe (bg,ps) -> + let p = intercalate " | " (List.map to_string ps) in + if bg then background p else p + | Redir (_,a,redirs) -> + to_string a ^ string_of_redirs redirs + | Background (_,a,redirs) -> + (* we translate + cmds... & + to + { cmds & } + this avoids issues with parsing; in particular, + cmd1 & ; cmd2 & ; cmd3 + doesn't parse; it must be: + cmd1 & cmd2 & cmd3 + it's a little too annoying to track "was the last thing + backgrounded?" so the braces resolve the issue. testing + indicates that they're semantically equivalent. + *) + background (to_string a ^ string_of_redirs redirs) + | Subshell (_,a,redirs) -> + parens (to_string a ^ string_of_redirs redirs) + | And (a1,a2) -> to_string a1 ^ " && " ^ to_string a2 + | Or (a1,a2) -> to_string a1 ^ " || " ^ to_string a2 + | Not a -> "! " ^ braces (to_string a) + | Semi (a1,a2) -> to_string a1 ^ " ; " ^ to_string a2 + | If (c,t,e) -> string_of_if c t e + | While (Not t,b) -> + "until " ^ to_string t ^ "; do " ^ to_string b ^ "; done " + | While (t,b) -> + "while " ^ to_string t ^ "; do " ^ to_string b ^ "; done " + | For (_,a,body,var) -> + "for " ^ var ^ " in " ^ string_of_arg a ^ "; do " ^ + to_string body ^ "; done" + | Case (_,a,cs) -> + "case " ^ string_of_arg a ^ " in " ^ + separated string_of_case cs ^ " esac" + | Defun (_,name,body) -> name ^ "() {\n" ^ to_string body ^ "\n}" + +and string_of_if c t e = + "if " ^ to_string c ^ + "; then " ^ to_string t ^ + (match e with + | Command (-1,[],[],[]) -> "; fi" (* one-armed if *) + | If (c,t,e) -> "; el" ^ string_of_if c t e + | _ -> "; else " ^ to_string e ^ "; fi") + +and string_of_arg_char = function + | E '\'' -> "\\'" + | E '\"' -> "\\\"" + | E '(' -> "\\(" + | E ')' -> "\\)" + | E '{' -> "\\{" + | E '}' -> "\\}" + | E '$' -> "\\$" + | E '!' -> "\\!" + | E '&' -> "\\&" + | E '|' -> "\\|" + | E ';' -> "\\;" + | C c -> String.make 1 c + | E c -> Char.escaped c + | A a -> "$((" ^ string_of_arg a ^ "))" + | V (Length,_,name,_) -> "${#" ^ name ^ "}" + | V (vt,nul,name,a) -> + "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg a ^ "}" + | Q a -> "\"" ^ string_of_arg a ^ "\"" + | B t -> "$(" ^ to_string t ^ ")" + +and string_of_arg = function + | [] -> "" + | c :: a -> string_of_arg_char c ^ string_of_arg a + +and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a + +and string_of_case c = + string_of_arg c.cpattern ^ ") " ^ to_string c.cbody ^ ";;" + +and string_of_redir = function + | File (To,fd,a) -> show_unless 1 fd ^ ">" ^ string_of_arg a + | File (Clobber,fd,a) -> show_unless 1 fd ^ ">|" ^ string_of_arg a + | File (From,fd,a) -> show_unless 0 fd ^ "<" ^ string_of_arg a + | File (FromTo,fd,a) -> show_unless 0 fd ^ "<>" ^ string_of_arg a + | File (Append,fd,a) -> show_unless 1 fd ^ ">>" ^ string_of_arg a + | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_int tgt + | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_int tgt + | Heredoc (t,fd,a) -> + let heredoc = string_of_arg a in + let marker = fresh_marker (lines heredoc) "EOF" in + show_unless 0 fd ^ "<<" ^ + (if t = XHere then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker ^ "\n" + +and string_of_redirs rs = + let ss = List.map string_of_redir rs in + (if List.length ss > 0 then " " else "") ^ intercalate " " ss diff --git a/compile.ml b/compile.ml new file mode 100644 index 0000000..2c0e36a --- /dev/null +++ b/compile.ml @@ -0,0 +1,161 @@ +type var = string + +type pat = string + +type expr = + | Var of var (* : whatever the var is *) + + | Int of int (* : int *) + | Eq of expr * expr (* : bool *) + | Not of expr (* : int *) + + + | Bind of var * expr * expr + | If of expr * expr * expr + + | Lookup of string (* : string *) + | Assign of string * expr (* : () *) + | Execve of string * string list (* : int/pid *) + | Match of string * pat (* : bool *) + | Defun of string * expr (* : () *) + | PushRedir (* : () *) + | PopRedir (* : () *) + | Fork of expr (* : int/pid *) + | Wait of expr (* : int/status code *) + | Pipe of expr list (* : int/status code *) + | Capture of expr (* : string *) + + | Arith of expr (* : string *) + + | Str of string (* : string *) + | Concat of expr list (* : string *) + | Length of expr (* : int *) + +let bind x e1 e2 = Bind(x,e1,e2) + +let cond i t e = If(i,t,e) + +let fresh : var -> var * expr = + let ctr = ref 0 in + fun base -> let s = base ^ string_of_int !ctr in incr ctr; (s,Var s) + +let set_status (run : expr) : expr = + let (ec,ec_var) = fresh "ec" in + bind ec run (bind "_" (Assign ("?",ec_var)) ec_var) + +let rec setup_redir (r : Ast.redirection) : expr = failwith "setup_redir" + +let with_redirects (rs : Ast.redirection list) (c : expr) : expr = + List.fold_right (fun r cmd -> bind "_" (setup_redir r) cmd) rs c + +let save_fds (c : expr) : expr = + let (status,status_var) = fresh "status" in + bind "_" PushRedir + (bind status c + (bind "_" PopRedir status_var)) + +let split_fields (arg : expr list) : expr = failwith "split_fields" + +let expand_paths (fields : expr) : expr = failwith "expand_paths" + +let remove_quotes (fields : expr) : expr = failwith "remove_quotes" + +(* TODO + +$?, etc. -- shell state explicitly or implicitly carried? + +pipes -- open followed by dup2/fcntl + +break, continue, return -- evalskip/break count +source, eval -- builtin? analyses just have to take account +trap -- need abstract signal handlers + +aliases -- just keep a table somewhere + +*) + +(* invariant: returns status code *) +let rec compile (c : Ast.t) : expr = + set_status + (begin match c with + | Ast.Command (_,assigns,args,redirs) -> + (* expand assignments + expand arguments + determine if it's a command or a built in + execve command/run builtin/call defun, or error on bad lookup + *) + with_redirects redirs (failwith "command") + | Ast.Pipe (bg,cs) -> + let (pid,pid_var) = fresh "pid" in + bind pid (Fork (Pipe (List.map compile cs))) + (if bg then (Int 0) else Wait pid_var) + | Ast.Redir (_,c,redirs) -> + save_fds (with_redirects redirs (compile c)) + | Ast.Background (_,c,redirs) -> + let (pid,pid_var) = fresh "pid" in + bind pid (Fork (with_redirects redirs (compile c))) (Int 0) + | Ast.Subshell (_,c,redirs) -> + let (pid,pid_var) = fresh "pid" in + bind pid (Fork (with_redirects redirs (compile c))) (Wait pid_var) + | Ast.And (c1,c2) -> + let (status,status_var) = fresh "status" in + bind status (compile c1) (cond status_var (compile c2) status_var) + | Ast.Or (c1,c2) -> + let (status,status_var) = fresh "status" in + bind status (compile c1) (cond status_var status_var (compile c2)) + | Ast.Not c -> + let (status,status_var) = fresh "status" in + bind status (compile c) (Not status_var) + | Ast.Semi (c1,c2) -> + bind "_" (compile c1) (compile c2) + | Ast.If (c_cond,c_then,c_else) -> + let (status,status_var) = fresh "status" in + bind status (compile c_cond) (cond status_var (compile c_then) (compile c_else)) + | Ast.While (cond,body) -> failwith "while" + | Ast.For (_,args,body,var) -> failwith "for" + | Ast.Case (_,args,cases) -> failwith "cases" + | Ast.Defun (_,name,body) -> Defun (name,compile body) + end) + +(* invariant: returns expanded string *) +and expands (quoted : bool) (args : Ast.args) : expr = + Concat (List.map (expand quoted) args) (* TODO insert field separations *) + +(* invariant: returns expanded string *) +and expand (quoted : bool) (arg : Ast.arg) : expr = + (* TODO field splitting, path expansion, quote removal *) + remove_quotes (expand_paths (split_fields (List.map (expand_char quoted) arg))) + +(* invariant: returns expanded string *) +and expand_char (quoted : bool) (a : Ast.arg_char) : expr = + match a with + | Ast.C chr -> Str (String.make 1 chr) + | Ast.E esc -> Str (String.make 1 esc) + | Ast.A ari -> Arith (expand quoted ari) + | Ast.V(fmt,nul,var,arg) -> expand_var quoted fmt nul var arg + | Ast.Q arg -> expand true arg + | Ast.B cmd -> Capture (compile cmd) + +and expand_var (quoted : bool) (fmt : Ast.var_type) (nul : bool) (var : string) (arg : Ast.arg) : expr = + match var with + | "@" -> failwith "$@" + | "*" -> failwith "$*" + | "?" -> failwith "$?" + | "$" -> failwith "$$" + | "#" -> failwith "$#" + | "!" -> failwith "$!" + | "-" -> failwith "$-" + | _ -> let (res,res_var) = fresh "res" in + bind res (Lookup var) + (begin match fmt with + | Ast.Normal -> res_var + | Ast.Minus -> failwith "${-}" + | Ast.Plus -> failwith "${+}" + | Ast.Question -> failwith "${?}" + | Ast.Assign -> failwith "${=}" + | Ast.TrimR -> failwith "${%}" + | Ast.TrimRMax -> failwith "${%%}" + | Ast.TrimL -> failwith "${#}" + | Ast.TrimLMax -> failwith "${##}" + | Ast.Length -> Length res_var + end) diff --git a/dash.ml b/dash.ml new file mode 100644 index 0000000..03cd126 --- /dev/null +++ b/dash.ml @@ -0,0 +1,520 @@ +open Printf +open Ctypes +open Ctypes_types +open Foreign + +type stackmark + +let stackmark : stackmark structure typ = structure "stackmark" +let stackp = field stackmark "stackp" (ptr void) +let nxt = field stackmark "nxt" string +let size = field stackmark "stacknleft" PosixTypes.size_t +let () = seal stackmark + +let init_stack () = + let stack = make stackmark in (* ??? do we want to save this *) + foreign "setstackmark" (ptr stackmark @-> returning void) (addr stack) + +let init : unit -> unit = foreign "init" (void @-> returning void) + +let initialize () = + init (); + init_stack () + +let setinputstring : string -> unit = + foreign "setinputstring" (string @-> returning void) + +let setinputtostdin () : unit = + foreign "setinputfd" (int @-> int @-> returning void) 0 0 (* don't both pushing the file *) + +let setinputfile (s : string) : unit = + let _ = foreign "setinputfile" (string @-> int @-> returning int) s 0 in + () + +(* first, we define the node type... *) + +type node +let node : node union typ = union "node" +let node_type = field node "type" int +(* but we don't seal it yet! *) + +type nodelist +let nodelist : nodelist structure typ = structure "nodelist" +let nodelist_next = field nodelist "next" (ptr nodelist) +let nodelist_n = field nodelist "n" (ptr node) +let () = seal nodelist + +type ncmd + +let ncmd : ncmd structure typ = structure "ncmd" +let ncmd_type = field ncmd "type" int +let ncmd_linno = field ncmd "linno" int +let ncmd_assign = field ncmd "assign" (ptr node) +let ncmd_args = field ncmd "args" (ptr node) +let ncmd_redirect = field ncmd "redirect" (ptr node) +let () = seal ncmd + +let node_ncmd = field node "ncmd" ncmd + +type npipe + +let npipe : npipe structure typ = structure "npipe" +let npipe_type = field npipe "type" int +let npipe_backgnd = field npipe "backgnd" int +let npipe_cmdlist = field npipe "cmdlist" (ptr nodelist) +let () = seal npipe + +let node_npipe = field node "npipe" npipe + +type nredir + +let nredir : nredir structure typ = structure "nredir" +let nredir_type = field nredir "type" int +let nredir_linno = field nredir "linno" int +let nredir_n = field nredir "n" (ptr node) +let nredir_redirect = field nredir "redirect" (ptr node) +let () = seal nredir + +let node_nredir = field node "nredir" nredir + +type nbinary + +let nbinary : nbinary structure typ = structure "nbinary" +let nbinary_type = field nbinary "type" int +let nbinary_ch1 = field nbinary "ch1" (ptr node) +let nbinary_ch2 = field nbinary "ch2" (ptr node) +let () = seal nbinary + +let node_nbinary = field node "nbinary" nbinary + +type nif + +let nif : nif structure typ = structure "nif" +let nif_type = field nif "type" int +let nif_test = field nif "test" (ptr node) +let nif_ifpart = field nif "ifpart" (ptr node) +let nif_elsepart = field nif "elsepart" (ptr node) +let () = seal nif + +let node_nif = field node "nif" nif + +type nfor + +let nfor : nfor structure typ = structure "nfor" +let nfor_type = field nfor "type" int +let nfor_linno = field nfor "linno" int +let nfor_args = field nfor "args" (ptr node) +let nfor_body = field nfor "body" (ptr node) +let nfor_var = field nfor "var" string +let () = seal nfor + +let node_nfor = field node "nfor" nfor + +type ncase + +let ncase : ncase structure typ = structure "ncase" +let ncase_type = field ncase "type" int +let ncase_linno = field ncase "linno" int +let ncase_expr = field ncase "expr" (ptr node) +let ncase_cases = field ncase "cases" (ptr node) +let () = seal ncase + +let node_ncase = field node "ncase" ncase + +type nclist + +let nclist : nclist structure typ = structure "nclist" +let nclist_type = field nclist "type" int +let nclist_next = field nclist "next" (ptr node) +let nclist_pattern = field nclist "pattern" (ptr node) +let nclist_body = field nclist "body" (ptr node) +let () = seal nclist + +let node_nclist = field node "nclist" nclist + +type ndefun + +let ndefun : ndefun structure typ = structure "ndefun" +let ndefun_type = field ndefun "type" int +let ndefun_linno = field ndefun "linno" int +let ndefun_text = field ndefun "text" string +let ndefun_body = field ndefun "body" (ptr node) +let () = seal ndefun + +let node_ndefun = field node "ndefun" ndefun + +type narg + +let narg : narg structure typ = structure "narg" +let narg_type = field narg "type" int +let narg_next = field narg "next" (ptr node) +let narg_text = field narg "text" string +let narg_backquote = field narg "backquote" (ptr nodelist) +let () = seal narg + +let node_narg = field node "narg" narg + +type nfile + +let nfile : nfile structure typ = structure "nfile" +let nfile_type = field nfile "type" int +let nfile_next = field nfile "next" (ptr node) +let nfile_fd = field nfile "fd" int +let nfile_fname = field nfile "fname" (ptr node) +let nfile_expfname = field nfile "expfname" string +let () = seal nfile + +let node_nfile = field node "nfile" nfile + +type ndup + +let ndup : ndup structure typ = structure "ndup" +let ndup_type = field ndup "type" int +let ndup_next = field ndup "next" (ptr node) +let ndup_fd = field ndup "fd" int +let ndup_dupfd = field ndup "dupfd" int +let ndup_vname = field ndup "vname" (ptr node) +let () = seal ndup + +let node_ndup = field node "ndup" ndup + +type nhere + +let nhere : nhere structure typ = structure "nhere" +let nhere_type = field nhere "type" int +let nhere_next = field nhere "next" (ptr node) +let nhere_fd = field nhere "fd" int +let nhere_doc = field nhere "doc" (ptr node) +let () = seal nhere + +let node_nhere = field node "nhere" nhere + +type nnot + +let nnot : nnot structure typ = structure "nnot" +let nnot_type = field nnot "type" int +let nnot_com = field nnot "com" (ptr node) +let () = seal nnot + +let node_nnot = field node "nnot" nnot +let () = seal node + +let parsecmd : int -> node union ptr = + foreign "parsecmd" (int @-> returning (ptr node)) + +let parse s = + setinputstring s; (* TODO set stack mark? *) + parsecmd 0 + +let neof : node union ptr = foreign_value "tokpushback" node + +let addrof p = raw_address_of_ptr (to_voidp p) + +let eqptr p1 p2 = addrof p1 = addrof p2 + +let nullptr (p : 'a ptr) = addrof p = Nativeint.zero + +let parse_next () = + let n = parsecmd 0 in + if eqptr n neof + then `Done + else if nullptr n + then `Null (* comment or blank line ... *) + else `Parsed n + +let rec parse_all () : (node union ptr) list = + match parse_next () with + | `Done -> [] + | `Null -> parse_all () + | `Parsed n -> n::parse_all () + +let (@->) (s : ('b, 'c) structured ptr) (f : ('a, ('b, 'c) structured) field) = + getf (!@ s) f + +let rec arglist (n : narg structure) : (narg structure) list = + let next = getf n narg_next in + if nullptr next + then [n] + else + (assert (next @-> node_type = 15); + n::arglist (next @-> node_narg)) + +let rec nodelist (n : nodelist structure ptr) : (node union ptr) list = + if nullptr n + then [] + else (n @-> nodelist_n)::nodelist (n @-> nodelist_next) + +let rec redirlist (n : node union ptr) = + if nullptr n + then [] + else + let h = match n @-> node_type with + (* NTO *) + | 16 -> `File (1,">",n @-> node_nfile) + (* NCLOBBER *) + | 17 -> `File (1,">|",n @-> node_nfile) + (* NFROM *) + | 18 -> `File (0,"<",n @-> node_nfile) + (* NFROMTO *) + | 19 -> `File (0,"<>",n @-> node_nfile) + (* NAPPEND *) + | 20 -> `File (1,">>",n @-> node_nfile) + (* NTOFD *) + | 21 -> `Dup (1,">&",n @-> node_ndup) + (* NFROMFD *) + | 22 -> `Dup (0,"<&",n @-> node_ndup) + (* NHERE quoted heredoc---no expansion)*) + | 23 -> `Here (0,"<<",false,n @-> node_nhere) + (* NXHERE unquoted heredoc (param/command/arith expansion) *) + | 24 -> `Here (0,"<<",true,n @-> node_nhere) + | nt -> failwith ("unexpected node_type in redirlist: " ^ string_of_int nt) + in + h :: redirlist (getf (n @-> node_nfile) nfile_next) + +let rec caselist (n : node union ptr) = + if nullptr n + then [] + else + let n = n @-> node_nclist in + assert (getf n nclist_type = 13); (* NCLIST *) + (getf n nclist_pattern, getf n nclist_body)::caselist (getf n nclist_next) + +let explode s = + let rec exp i l = + if i < 0 then l else exp (i - 1) (s.[i] :: l) in + exp (String.length s - 1) [] + +let implode l = + let s = Bytes.create (List.length l) in + let rec imp i l = + match l with + | [] -> () + | (c::l) -> (Bytes.set s i c; imp (i+1) l) + in + imp 0 l; + Bytes.unsafe_to_string s + +let rec intercalate p ss = + match ss with + | [] -> "" + | [s] -> s + | s::ss -> s ^ p ^ intercalate p ss + +let lines = Str.split (Str.regexp "[\n\r]+") + +let rec fresh_marker ls s = + if List.mem s ls + then fresh_marker ls (s ^ (String.sub s (String.length s - 1) 1)) + else s + +let rec split_at p xs = + match xs with + | [] -> ([],[]) + | x::xs -> + if p x + then ([],x::xs) + else let (xs,ys) = split_at p xs in + (x::xs, ys) + +let string_of_vs = function + | 0x1 -> (* VSNORMAL ${var} *) [] + | 0x2 -> (* VSMINUS ${var-text} *) ['-'] + | 0x3 -> (* VSPLUS ${var+text} *) ['+'] + | 0x4 -> (* VSQUESTION ${var?message} *) ['?'] + | 0x5 -> (* VSASSIGN ${var=text} *) ['='] + | 0x6 -> (* VSTRIMRIGHT ${var%pattern} *) ['%'] + | 0x7 -> (* VSTRIMRIGHTMAX ${var%%pattern} *) ['%';'%'] + | 0x8 -> (* VSTRIMLEFT ${var#pattern} *) ['#'] + | 0x9 -> (* VSTRIMLEFTMAX ${var##pattern} *) ['#';'#'] + | vs -> failwith ("Unknown VSTYPE: " ^ string_of_int vs) + +let braces s = "{ " ^ s ^ " ; }" +let parens s = "( " ^ s ^ " )" + +let rec show (n : node union ptr) : string = + match (n @-> node_type) with + (* NCMD *) + | 0 -> + let n = n @-> node_ncmd in + let raw_cmd = intercalate " " (List.map sharg (arglist (getf n ncmd_args @-> node_narg))) in + let vars = if nullptr (getf n ncmd_assign) then "" else intercalate " " (List.map sharg (arglist (getf n ncmd_assign @-> node_narg))) ^ " " in + vars ^ raw_cmd ^ shredir (getf n ncmd_redirect) + (* NPIPE *) + | 1 -> + let n = n @-> node_npipe in + let cmds = nodelist (getf n npipe_cmdlist) in + intercalate " | " (List.map show cmds) ^ if (getf n npipe_backgnd) = 0 then "" else " &" + (* NREDIR *) + | 2 -> shnredir braces n + (* NBACKGND *) + | 3 -> shnredir braces n ^ " &" + (* NSUBSHELL *) + | 4 -> shnredir parens n + (* NAND *) + | 5 -> shbinary "&&" (n @-> node_nbinary) + (* NOR *) + | 6 -> shbinary "||" (n @-> node_nbinary) + (* NSEMI *) + | 7 -> shbinary ";" (n @-> node_nbinary) + (* NIF *) + | 8 -> shif (n @-> node_nif) + (* NWHILE *) + | 9 -> + let n = n @-> node_nbinary in + "while " ^ show (getf n nbinary_ch1) ^ "; do " ^ show (getf n nbinary_ch2) ^ "; done" + (* NUNTIL *) + | 10 -> + let n = n @-> node_nbinary in + "until " ^ show (getf n nbinary_ch1) ^ "; do " ^ show (getf n nbinary_ch2) ^ "; done" + (* NFOR *) + | 11 -> + let n = n @-> node_nfor in + "for " ^ (getf n nfor_var) ^ " in " ^ sharg (getf n nfor_args @-> node_narg) ^ "; do " ^ show (getf n nfor_body) ^ "; done" + (* NCASE *) + | 12 -> + let n = n @-> node_ncase in + "case " ^ sharg (getf n ncase_expr @-> node_narg) ^ " in " ^ shclist (getf n ncase_cases) ^ " esac" + (* NDEFUN *) + | 14 -> + let n = n @-> node_ndefun in + (getf n ndefun_text) ^ "() " ^ braces (show (getf n ndefun_body)) + (* NARG *) + | 15 -> failwith "Didn't expect narg at the top-level" + (* NNOT *) + | 25 -> "! { " ^ show (getf (n @-> node_nnot) nnot_com) ^ " }" + | nt -> failwith ("unexpected node_type " ^ string_of_int nt) + +and shbinary (op : string) (n : nbinary structure) : string = + show (getf n nbinary_ch1) ^ " " ^ op ^ " " ^ show (getf n nbinary_ch2) + +and shnredir parenthesize n = + let nr = n @-> node_nredir in + parenthesize (show (getf nr nredir_n)) ^ shredir (getf nr nredir_redirect) + +and shif n = + "if " ^ show (getf n nif_test) ^ + "; then " ^ show (getf n nif_ifpart) ^ + (let else_part = getf n nif_elsepart in + if nullptr else_part + then "; fi" + else if (else_part @-> node_type = 8) + then "; el" ^ shif (else_part @-> node_nif) + else "; else " ^ show else_part ^ "; fi") + +and shclist clist = intercalate " " (List.map shcase (caselist clist)) (* handles NCLIST = 13 *) + +and shcase (pat,body) = + assert (pat @-> node_type = 15); + sharg (pat @-> node_narg) ^ ") " ^ show body ^ ";;" + +and shredir (n : node union ptr) : string = + let redirs = redirlist n in + if redirs = [] + then "" + else " " ^ intercalate " " (List.map show_redir redirs) +and show_redir n : string = + match n with + | `File (src,sym,f) -> show_redir_src (getf f nfile_fd) src ^ sym ^ sharg ((getf f nfile_fname) @-> node_narg) + | `Dup (src,sym,d) -> show_redir_src (getf d ndup_fd) src ^ sym ^ string_of_int (getf d ndup_dupfd) + | `Here (src,sym,exp,h) -> + let heredoc = sharg ((getf h nhere_doc) @-> node_narg) in + let marker = fresh_marker (lines heredoc) "EOF" in + show_redir_src (getf h nhere_fd) src ^ sym ^ (if exp then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker +and show_redir_src actual expected = + if actual = expected + then "" + else string_of_int actual + +and sharg (n : narg structure) : string = + let str,s',bqlist,stack = show_arg (explode (getf n narg_text)) (getf n narg_backquote) [] in + (* we should have used up the string and have no backquotes left in our list *) + assert (s' = []); + assert (nullptr bqlist); + assert (stack = []); + str +and show_arg (s : char list) (bqlist : nodelist structure ptr) stack = + (* we have to look at the string and interpret control characters... *) + match s,stack with + | [],[] -> "",[],bqlist,[] + | [],`CTLVar::stack' -> failwith "End of string before CTLENDVAR" + | [],`CTLAri::stack' -> failwith "End of string before CTLENDARI" + | [],`CTLQuo::stack' -> failwith "End of string before CTLQUOTEMARK" + (* CTLESC *) + | '\129'::c::s',_ -> + let str,s'',bqlist',stack' = show_arg s' bqlist stack in + let c' = match c with + | '\'' -> "\\'" + | '\"' -> "\\\"" + | _ -> Char.escaped c + in + c' ^ str,s'',bqlist',stack' + (* CTLVAR *) + | '\130'::t::s',_ -> + let v,s'',bqlist',stack' = show_var (int_of_char t) s' bqlist stack in + assert (stack = stack'); + let str,s''',bqlist'',stack'' = show_arg s'' bqlist' stack' in + "${" ^ v ^ "}" ^ str, s''', bqlist'', stack'' + (* CTLENDVAR *) + | '\131'::s',`CTLVar::stack' -> "",[],bqlist,stack' (* s' gets handled by CTLVAR *) + | '\131'::s',`CTLAri::stack' -> failwith "Saw CTLENDVAR before CTLENDARI" + | '\131'::s',`CTLQuo::stack' -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" + | '\131'::s',[] -> failwith "Saw CTLENDVAR outside of CTLVAR" + (* CTLBACKQ *) + | '\132'::s',_ -> + if nullptr bqlist + then failwith "Saw CTLBACKQ but bqlist was null" + else + let n = bqlist @-> nodelist_n in + (* MMG: !!! dash has a bug in its sharg function... it doesn't advance the list! *) + let bqlist' = bqlist @-> nodelist_next in + let str,s'',bqlist'',stack' = show_arg s' bqlist' stack in + "$(" ^ show n ^ ")" ^ str,s'',bqlist'',stack' + (* CTLARI *) + | '\134'::s',_ -> + let ari,s'',bqlist',stack' = show_arg s' bqlist (`CTLAri::stack) in + assert (stack = stack'); + let str,s''',bqlist'',stack'' = show_arg s'' bqlist' stack' in + "$((" ^ ari ^ "))" ^ str, s''', bqlist'', stack'' + (* CTLENDARI *) + | '\135'::s',`CTLAri::stack' -> "",s',bqlist,stack' + | '\135'::s',`CTLVar::stack' -> failwith "Saw CTLENDARI before CTLENDVAR" + | '\135'::s',`CTLQuo::stack' -> failwith "Saw CTLENDARI before CTLQUOTEMARK" + | '\135'::s',[] -> failwith "Saw CTLENDARI outside of CTLARI" + (* CTLQUOTEMARK *) + | '\136'::s',[`CTLQuo] -> "",s',bqlist,[] + | '\136'::s',_ -> + let quoted,s'',bqlist',stack' = show_arg s' bqlist [`CTLQuo] in + assert (stack' = []); + let str,s''',bqlist'',stack'' = show_arg s'' bqlist' stack in + "\"" ^ quoted ^ "\"" ^ str, s''', bqlist'', stack'' + (* ordinary character *) + | c::s',_ -> + let str,s',bqlist',stack' = show_arg s' bqlist stack in + let c' = match c with + | '\'' -> "\\'" + | '\"' -> "\\\"" + | _ -> String.make 1 c + in + c' ^ str,s',bqlist',stack' +and show_var (t : int) (s : char list) (bqlist : nodelist structure ptr) stack = + let var_name,s' = split_at (fun c -> c = '=') s in + (* mask out VSNUL, check VSTYPE *) + match t land 0x0f, s' with + (* VSNORMAL and VSLENGTH get special treatment + + neither ever gets VSNUL + VSNORMAL is terminated just with the =, without a CTLENDVAR *) + (* VSNORMAL *) + | 0x1,'='::s'' -> implode var_name, s'', bqlist, stack + (* VSLENGTH *) + | 0xa,'='::'\131'::s'' -> implode (['#'] @ var_name), s'', bqlist, stack + | 0x1,c::_ | 0xa,c::_ -> failwith ("Missing CTLENDVAR for VSNORMAL/VSLENGTH, found " ^ Char.escaped c) + (* every other VSTYPE takes mods before CTLENDVAR *) + | vstype,'='::s' -> + (* check VSNUL *) + let vsnul = if t land 0x10 = 1 then [] else [':'] in + let mods,s'',bqlist',stack' = show_arg s' bqlist (`CTLVar::stack) in + implode (var_name @ vsnul @ string_of_vs vstype) ^ mods, s'', bqlist', stack' + | _,c::s' -> failwith ("Expected '=' terminating variable name, found " ^ Char.escaped c) + | _,[] -> failwith "Expected '=' terminating variable name, found EOF" + diff --git a/expansion.ml b/expansion.ml new file mode 100644 index 0000000..caf460b --- /dev/null +++ b/expansion.ml @@ -0,0 +1,59 @@ +(* + + + + +*) + +type param = Param of string + +type op = + | Hyp + | ColonHyp + | Eq + | ColonEq + | Ques + | ColonQues + | Plus + | ColonPlus + | Percent + | PercentPercent + | Hash + | HashHash + +type word + = WLiteral of string + | WParam of param + | WArith of string + | WTilde of string + | WSubst of param * op * word + | WLength of param + | WCommand of string + | WDoubleQuote of string + | WCat of word * word + +let cat (w1 : word) (w2: word) = match (w1, w2) with + | WLiteral str1, WLiteral str2 = WLiteral (str1 ^ str2) + | _, _ -> WCat (w1, w2) + +let rec parse_word (str : char list) : word = str match + (* Section 2.3, bullet 1 *) + | [] -> WLiteral "" + (* Section 2.2.1 *) + | '\' :: '\n' :: rest -> parse_word rest + | '\' :: ch :: rest -> cat (WLiteral (Char.to_string ch)) (parse_word rest) + (* Section 2.2.2 *) + | '\'' :: rest -> parse_word_in_single_quotes rest [] + (* Section 2.3, bullet 5 *) + | '$' :: '(' :: '(' :: rest -> parse_arith_word rest + +and parse_arith_word (str : char list) : word = str match + | ')' :: ')' :: rest + +(* Section 2.2.2 *) +and parse_word_in_single_quotes (str : char list) (chars : char list): word = + match chars with + | '\'' :: rest -> cat (WLiteral (List.of_char_list (List.rev chars))) parse_word rest + | ch :: rest -> parse_word_in_single_quotes rest (ch :: chars) + | [] -> failwith "EOF reading a single-quoted string" + diff --git a/main.ml b/main.ml new file mode 100644 index 0000000..715a0c3 --- /dev/null +++ b/main.ml @@ -0,0 +1,27 @@ +open Dash + +let verbose = ref false +let input_src : string option ref = ref None + +let set_input_src () = + match !input_src with + | None -> setinputtostdin () + | Some f -> setinputfile f + +let parse_args () = + Arg.parse + ["-v",Arg.Set verbose,"verbose mode"] + (function | "-" -> input_src := None | f -> input_src := Some f) + "Final argument should be either a filename or - (for STDIN); only the last such argument is used" +;; + initialize (); + parse_args (); + set_input_src (); + let ns = parse_all () in + let cs = List.map Ast.of_node ns in + let try_compile c = + try ignore (Compile.compile c) + with _ -> prerr_endline "couldn't compile"; () in + List.iter try_compile cs; + List.map + (fun c -> print_endline (Ast.to_string c)) cs diff --git a/round_trip.sh b/round_trip.sh new file mode 100755 index 0000000..fa85b3b --- /dev/null +++ b/round_trip.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +if [ $# -ne 2 ]; then + echo "Usage: ${0##*/} program target" + exit 2 +fi + +p=$1 +tgt=$2 + +orig=$(${p} ${tgt}) +rt=$(${p} ${tgt} | ${p}) + +if [ "${orig}" = "${rt}" ]; +then echo ${tgt} OK; exit 0 +else + echo ${tgt} FAILED + echo ${orig} + echo ========== + echo ${rt} + exit 1 +fi diff --git a/test.c b/test.c new file mode 100644 index 0000000..a00ebe3 --- /dev/null +++ b/test.c @@ -0,0 +1,426 @@ +#include +#include +#include +#include + +#define DEBUG +#include "shell.h" +#include "parser.h" +#include "nodes.h" +#include "mystring.h" +#include "show.h" +#include "options.h" +#include "init.h" +#include "input.h" + +/* This file is a mash-up of dash/src/show.c, compiling out a special + version of dash into a shared library. The end goal is pulling out + the dash parser so we can work with it in Ocaml. + + I mostly wrote this file to make sure I built the library + correctly. Next step is getting the C AST into Ocaml. +*/ + +static void shtree(union node *, int, char *, FILE*); +static void shcmd(union node *, FILE *); +static void sharg(union node *, FILE *); +static void indent(int, char *, FILE *); +static void trstring(char *); + +int main(int argc, char** argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s [command to evaluate]\n", argv[0]); + return -1; + } + + char *cmd = strdup(argv[1]); + + /* debugging unsigned char issues */ + // printf("CTLESC = %hhu\n", (unsigned char) CTLESC); + // printf("CTLVAR = %hhu\n", (unsigned char) CTLVAR); + // printf("CTLENDVAR = %hhu\n", (unsigned char) CTLENDVAR); + // printf("CTLBACKQ = %hhu\n", (unsigned char) CTLBACKQ); + // printf("CTLARI = %hhu\n", (unsigned char) CTLARI); + // printf("CTLENDARI = %hhu\n", (unsigned char) CTLENDARI); + // printf("CTLQUOTEMARK = %hhu\n", (unsigned char) CTLQUOTEMARK); + + init(); + + setinputstring(cmd); + + union node *n = parsecmd(0); + if (n != (union node *) EOF) { + showtree(n); + } else { + fprintf(stderr, "Hit eof..."); + + } + + popfile(); + free(cmd); + + return 0; +} + +void +showtree(union node *n) +{ + trputs("showtree called\n"); + shtree(n, 1, NULL, stdout); +} + + +static void +shtree(union node *n, int ind, char *pfx, FILE *fp) +{ + struct nodelist *lp; + const char *s; + + if (n == NULL) + return; + + indent(ind, pfx, fp); + switch(n->type) { + case NSEMI: + s = "; "; + goto binop; + case NAND: + s = " && "; + goto binop; + case NOR: + s = " || "; +binop: + shtree(n->nbinary.ch1, ind, NULL, fp); + /* if (ind < 0) */ + fputs(s, fp); + shtree(n->nbinary.ch2, ind, NULL, fp); + break; + case NCMD: + shcmd(n, fp); + if (ind >= 0) + putc('\n', fp); + break; + case NPIPE: + for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) { + shcmd(lp->n, fp); + if (lp->next) + fputs(" | ", fp); + } + if (n->npipe.backgnd) + fputs(" &", fp); + if (ind >= 0) + putc('\n', fp); + break; + case NWHILE: + fprintf(fp, "while\n"); + shtree(n->nbinary.ch1, ind+1, NULL, fp); + indent(ind, pfx, fp); + fprintf(fp, "do;\n"); + shtree(n->nbinary.ch2, ind+1, NULL, fp); + indent(ind, pfx, fp); + fprintf(fp,"done\n"); + break; + default: + fprintf(fp, "", n->type); + if (ind >= 0) + putc('\n', fp); + break; + } +} + + + +static void +shcmd(union node *cmd, FILE *fp) +{ + union node *np; + int first; + const char *s; + int dftfd; + + first = 1; + for (np = cmd->ncmd.args ; np ; np = np->narg.next) { + if (! first) + putchar(' '); + sharg(np, fp); + first = 0; + } + for (np = cmd->ncmd.redirect ; np ; np = np->nfile.next) { + if (! first) + putchar(' '); + switch (np->nfile.type) { + case NTO: s = ">"; dftfd = 1; break; + case NCLOBBER: s = ">|"; dftfd = 1; break; + case NAPPEND: s = ">>"; dftfd = 1; break; + case NTOFD: s = ">&"; dftfd = 1; break; + case NFROM: s = "<"; dftfd = 0; break; + case NFROMFD: s = "<&"; dftfd = 0; break; + case NFROMTO: s = "<>"; dftfd = 0; break; + default: s = "*error*"; dftfd = 0; break; + } + if (np->nfile.fd != dftfd) + fprintf(fp, "%d", np->nfile.fd); + fputs(s, fp); + if (np->nfile.type == NTOFD || np->nfile.type == NFROMFD) { + fprintf(fp, "%d", np->ndup.dupfd); + } else { + sharg(np->nfile.fname, fp); + } + first = 0; + } +} + + + +static void +sharg(union node *arg, FILE *fp) +{ + char *p; + struct nodelist *bqlist; + int subtype; + + if (arg->type != NARG) { + printf("\n", arg->type); + abort(); + } + bqlist = arg->narg.backquote; + for (p = arg->narg.text ; *p ; p++) { + switch ((signed char)*p) { + case CTLESC: + putc(*++p, fp); + break; + case CTLVAR: + putc('$', fp); + putc('{', fp); + subtype = *++p; + if (subtype == VSLENGTH) + putc('#', fp); + + while (*p != '=') + putc(*p++, fp); + + if (subtype & VSNUL) + putc(':', fp); + + switch (subtype & VSTYPE) { + case VSNORMAL: + putc('}', fp); + break; + case VSMINUS: + putc('-', fp); + break; + case VSPLUS: + putc('+', fp); + break; + case VSQUESTION: + putc('?', fp); + break; + case VSASSIGN: + putc('=', fp); + break; + case VSTRIMLEFT: + putc('#', fp); + break; + case VSTRIMLEFTMAX: + putc('#', fp); + putc('#', fp); + break; + case VSTRIMRIGHT: + putc('%', fp); + break; + case VSTRIMRIGHTMAX: + putc('%', fp); + putc('%', fp); + break; + case VSLENGTH: + break; + default: + printf("", subtype); + } + break; + case CTLENDVAR: + putc('}', fp); + break; + case CTLBACKQ: + putc('$', fp); + putc('(', fp); + shtree(bqlist->n, -1, NULL, fp); + putc(')', fp); + break; + default: + putc(*p, fp); + break; + } + } +} + + +static void +indent(int amount, char *pfx, FILE *fp) +{ + int i; + + for (i = 0 ; i < amount ; i++) { + if (pfx && i == amount - 1) + fputs(pfx, fp); + putc('\t', fp); + } +} + + + +/* + * Debugging stuff. + */ + + +FILE *tracefile; + + +void +trputc(int c) +{ + if (debug != 1) + return; + putc(c, tracefile); +} + +void +trace(const char *fmt, ...) +{ + va_list va; + + if (debug != 1) + return; + va_start(va, fmt); + (void) vfprintf(tracefile, fmt, va); + va_end(va); +} + +void +tracev(const char *fmt, va_list va) +{ + if (debug != 1) + return; + (void) vfprintf(tracefile, fmt, va); +} + + +void +trputs(const char *s) +{ + if (debug != 1) + return; + fputs(s, tracefile); +} + + +static void +trstring(char *s) +{ + char *p; + char c; + + if (debug != 1) + return; + putc('"', tracefile); + for (p = s ; *p ; p++) { + switch ((signed char)*p) { + case '\n': c = 'n'; goto backslash; + case '\t': c = 't'; goto backslash; + case '\r': c = 'r'; goto backslash; + case '"': c = '"'; goto backslash; + case '\\': c = '\\'; goto backslash; + case CTLESC: c = 'e'; goto backslash; + case CTLVAR: c = 'v'; goto backslash; + case CTLBACKQ: c = 'q'; goto backslash; +backslash: putc('\\', tracefile); + putc(c, tracefile); + break; + default: + if (*p >= ' ' && *p <= '~') + putc(*p, tracefile); + else { + putc('\\', tracefile); + putc(*p >> 6 & 03, tracefile); + putc(*p >> 3 & 07, tracefile); + putc(*p & 07, tracefile); + } + break; + } + } + putc('"', tracefile); +} + + +void +trargs(char **ap) +{ + if (debug != 1) + return; + while (*ap) { + trstring(*ap++); + if (*ap) + putc(' ', tracefile); + else + putc('\n', tracefile); + } +} + + +void +opentrace(void) +{ + char s[100]; +#ifdef O_APPEND + int flags; +#endif + + if (debug != 1) { + if (tracefile) + fflush(tracefile); + /* leave open because libedit might be using it */ + return; + } +#ifdef not_this_way + { + char *p; + if ((p = getenv(homestr)) == NULL) { + if (geteuid() == 0) + p = "/"; + else + p = "/tmp"; + } + scopy(p, s); + strcat(s, "/trace"); + } +#else + scopy("./trace", s); +#endif /* not_this_way */ + if (tracefile) { +#ifndef __KLIBC__ + if (!freopen(s, "a", tracefile)) { +#else + if (!(!fclose(tracefile) && (tracefile = fopen(s, "a")))) { +#endif /* __KLIBC__ */ + fprintf(stderr, "Can't re-open %s\n", s); + debug = 0; + return; + } + } else { + if ((tracefile = fopen(s, "a")) == NULL) { + fprintf(stderr, "Can't open %s\n", s); + debug = 0; + return; + } + } +#ifdef O_APPEND + if ((flags = fcntl(fileno(tracefile), F_GETFL, 0)) >= 0) + fcntl(fileno(tracefile), F_SETFL, flags | O_APPEND); +#endif +#ifndef __KLIBC__ + setlinebuf(tracefile); +#endif /* __KLIBC__ */ + fputs("\nTracing started.\n", tracefile); +} + diff --git a/tests/braces_amp.sh b/tests/braces_amp.sh new file mode 100644 index 0000000..fc82664 --- /dev/null +++ b/tests/braces_amp.sh @@ -0,0 +1,5 @@ +for x in foo; do a & b & c & d & done +echo a & echo b +for y in foo; do a & b & done +while false; do a & b & done +until true; do forever & ever & ever & done diff --git a/tests/diverge.sh b/tests/diverge.sh new file mode 100755 index 0000000..1cf8ce5 --- /dev/null +++ b/tests/diverge.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +while true; do true; done diff --git a/tests/escaping b/tests/escaping new file mode 100644 index 0000000..9ea9a1d --- /dev/null +++ b/tests/escaping @@ -0,0 +1 @@ +${x=;|&!~*\}\{()\$\' "this is a \"quoted\" string"} \ No newline at end of file diff --git a/tests/for_spaces.sh b/tests/for_spaces.sh new file mode 100755 index 0000000..ed36e7a --- /dev/null +++ b/tests/for_spaces.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +oldifs=$IFS +IFS=$(echo -e "\t") +for f in `ls *`; do + echo $f +done +IFS=$oldifs diff --git a/tests/grab_submissions.sh b/tests/grab_submissions.sh new file mode 100755 index 0000000..3d2370c --- /dev/null +++ b/tests/grab_submissions.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ -d "$1" ]; then + echo "Grading directory already exists" + exit 2 +fi + +mkdir $1 +mkdir $1/submissions +cp ../dropbox/$1/* $1/submissions diff --git a/tests/grade.sh b/tests/grade.sh new file mode 100755 index 0000000..a0f2ad2 --- /dev/null +++ b/tests/grade.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +score=0 +total=0 + +if [ -d output ]; then + echo "output directory already exists, aborting" + exit 1 +fi + +mkdir output + +echo "LEXER/PARSER AUTOGRADER RESULTS" +echo + +# check success cases +for i in right/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 0 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +# check failure cases +for i in wrong/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 1 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +echo +echo "TOTAL: $score / $total" +echo +echo "PROBLEM 1: XXX / 5" +echo +let total=total+5 +echo "FINAL GRADE: $score + XXX / $total" diff --git a/tests/run_grader.sh b/tests/run_grader.sh new file mode 100755 index 0000000..10017b9 --- /dev/null +++ b/tests/run_grader.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/grading" ]; then + echo "Couldn't find grading directory (looked in $1/grading)" + exit 2 +fi + +cd $1/grading + +errors="" +for s in `ls`; do + echo "GRADING $s" + (cd $s; make) + if [ "$?" != "0" ]; then + errors+=" $s" + fi +done + +echo +echo "There were errors for the following students:${errors}" +echo ${errors} >"$1/grading/errors.log" diff --git a/tests/run_lda.sh b/tests/run_lda.sh new file mode 100755 index 0000000..a2e8698 --- /dev/null +++ b/tests/run_lda.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export PYTHONIOENCODING=utf8 + +if test $# -ne 0; +then + KS="$*"; +else + KS="50 75 100 125 150 175 200" +fi + +DIR=`date "+%Y-%m-%d_%H:%M"` +START=`date "+%Y-%m-%d %H:%M"` + +# TODO error handling + +echo "SETTING UP" +mkdir ${DIR} + +echo "PARSING" +python parse.py + +for dat in abstracts.dat vocab.dat docs.dat; do + mv ${dat} ${DIR} +done + +# we don't want to lose this one! +cp stopwords.dat ${DIR} + +echo "RUNNING LDA" + +ABS=${DIR}/abstracts.dat + +for k in ${KS}; do + lda est 1/50 ${k} settings.txt ${ABS} seeded ${DIR}/lda${k} & + echo lda${k} >>${DIR}/.gitignore +done + +wait +echo "PROCESSING TOPICS" + +for k in ${KS}; do + python debug_topics.py ${DIR} ${k} > ${DIR}/lda${k}_topics.txt +done + +echo "GENERATING CSV" + +for i in ${DIR}/lda*; do + test -d ${i} && python post.py ${i}/final.gamma ${DIR}/docs.dat > ${i}.csv + test -d ${i} && python by_year.py ${i}/final.gamma ${DIR}/docs.dat > ${i}_by_year.csv +done + +echo "MOVING TO OUTPUT DIRECTORY" +mv ${DIR} ../out + +echo "DONE" +echo All done. Started at ${START}, done at `date "+%Y-%m-%d %H:%M"`. diff --git a/tests/send_emails.sh b/tests/send_emails.sh new file mode 100755 index 0000000..9e3515f --- /dev/null +++ b/tests/send_emails.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/mail" ]; then + echo "Couldn't find mail directory (looked in $1/grading)" + exit 2 +fi + +cd $1/mail + +for s in `ls`; do + ../../mail.scpt "[cs131] $1 grade" $s +done diff --git a/tests/syntax b/tests/syntax new file mode 100644 index 0000000..55fb8ae --- /dev/null +++ b/tests/syntax @@ -0,0 +1,35 @@ +ls * ${x:-$(foo)} ${#foo}seven $x "foo\"${x}"${y} $((x + ${x})) `ls 1` $(bq1)x$(bq2) >foo 2>&1 </dev/null +mv /tmp/foo /tmp/bar & +foo | bar | baz | quux +foo | bar | baz | quux & +if /bin/true; then always; else never; fi +if [ -x some_file ]; then maybe; elif [ -d some_dir ]; then otherwise; fi +if something; then we are looking for the one-armed bandit; fi +while [ ! -x some_file ]; do try to make some_file; done +until [ -x some_file ]; do seriously make that file; done +while { ! a && ! b ; } ; do certainly not c; done +for x in a b c d; do something to those letters; done +for x; do something to those arguments implicitly; done +case "$1" in start) echo starting ;; stop) oh noes ;; *) blargh ;; esac +case "$1" in start) echo starting ;; stop) oh noes ;; *) blargh;; esac +function foo { echo $*; export x=$((x + 1)) } +function foo { echo $*; export x=$((x + 1)) } +foo() { echo $*; export x=$((x + 1)) ; } +this < + +scriptName="${0##*/}" + +declare -i DEFAULT_TIMEOUT=9 +declare -i DEFAULT_INTERVAL=1 +declare -i DEFAULT_DELAY=1 + +# Timeout. +declare -i timeout=DEFAULT_TIMEOUT +# Interval between checks if the process is still alive. +declare -i interval=DEFAULT_INTERVAL +# Delay between posting the SIGTERM signal and destroying the process by SIGKILL. +declare -i delay=DEFAULT_DELAY + +printUsage() { + cat < /dev/null & + +exec "${@}" + From b3beb7ca1ee3e83e59b775ab4d8e6b1d9e58cd47 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 8 May 2017 10:17:16 -0700 Subject: [PATCH 209/401] first cut on command-line tool --- .gitignore | 4 ++++ Makefile | 21 ++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 08ba06a..3c66d6e 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,10 @@ Makefile .Trash* *[Tt]humbs.db + +*.a +*.cmxa + test *.native *~ diff --git a/Makefile b/Makefile index a1b894e..b1945e9 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,27 @@ DASH=$(shell (cd ../dash; pwd)) DASHSRC=$(DASH)/src -TESTING : main.native $(wildcard tests/*) +OCAMLLIB=$(shell opam config var lib) +OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes -I /usr/local/lib/ocaml +OCAMLLIBS=unix.cmxa bigarray.cmxa str.cmxa ctypes.cmxa ctypes-foreign-base.cmxa ctypes-foreign-unthreaded.cmxa + +test : main.native $(wildcard tests/*) @for f in tests/*; do \ ./round_trip.sh ./main.native $$f 2>test.err; \ done -main.native : main.ml dash.ml ast.ml compile.ml - ocamlbuild -no-hygiene -pkg ctypes.foreign -lflags "-cclib -force_load $(DASH)/libdash.a" $@ -test : test.c - gcc -Wall -I $(DASHSRC) -L $(DASH) -ldash $^ -o $@ +#/usr/local/bin/ocamlopt.opt -cclib -force_load /Users/mgree/fsh/dash/libdash.a -I /Users/mgree/.opam/coq-8.4/lib/bytes -I /Users/mgree/.opam/coq-8.4/lib/ctypes -I /usr/local/lib/ocaml /usr/local/lib/ocaml/unix.cmxa /usr/local/lib/ocaml/bigarray.cmxa /usr/local/lib/ocaml/str.cmxa /Users/mgree/.opam/coq-8.4/lib/ctypes/ctypes.cmxa /Users/mgree/.opam/coq-8.4/lib/ctypes/ctypes-foreign-base.cmxa /Users/mgree/.opam/coq-8.4/lib/ctypes/ctypes-foreign-unthreaded.cmxa dash.cmx ast.cmx compile.cmx main.cmx -o main.native + +main.native : dash.cmx ast.cmx compile.cmx main.cmx + ocamlopt.opt -cclib -force_load $(DASH)/libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ +# ocamlbuild -log build.log -no-hygiene -pkg ctypes.foreign -lflags "-cclib -force_load $(DASH)/libdash.a" $@ + +dash.cmxa : dash.cmx ast.cmx compile.cmx + ocamlopt.opt -cclib -force_load $(DASH)/libdash.a $(OCAMLINCLUDES) $^ -a -o $@ + +%.cmx : %.ml + ocamlopt.opt $(OCAMLINCLUDES) -c -o $@ $< clean : rm -f *.o test *~ *.cmi *.cmx main.native From b7568c032a520fdf076a9e31f347dc0a7a69b1f8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 8 May 2017 11:06:44 -0700 Subject: [PATCH 210/401] tiny fixes to makefiles, works with latest ctypes --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b1945e9..b9d82a7 100644 --- a/Makefile +++ b/Makefile @@ -24,5 +24,5 @@ dash.cmxa : dash.cmx ast.cmx compile.cmx ocamlopt.opt $(OCAMLINCLUDES) -c -o $@ $< clean : - rm -f *.o test *~ *.cmi *.cmx main.native + rm -f *.o test *~ *.cmi *.cmx main.native dash.a dash.cmxa rm -rf _build From 1ddf3ed21af7b783492fafcbb2bc4bb0eea7cdef Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 21 Jun 2017 09:45:31 -0700 Subject: [PATCH 211/401] add tildes to ast parsing --- ast.ml | 22 ++++++++++++++++++++++ compile.ml | 2 ++ 2 files changed, 24 insertions(+) diff --git a/ast.ml b/ast.ml index ae91a2b..f894170 100644 --- a/ast.ml +++ b/ast.ml @@ -28,6 +28,7 @@ type t = and arg_char = | C of char | E of char (* escape... necessary for expansion *) + | T of string option (* tilde *) | A of arg (* arith *) | V of var_type * bool (* VSNUL? *) * string * arg | Q of arg (* quoted *) @@ -265,10 +266,29 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = let a,s,bqlist,stack' = parse_arg s bqlist (`CTLQuo::stack) in assert (stack' = stack); arg_char (Q a) s bqlist stack' + (* tildes *) + | '~'::s,stack -> + let uname,s' = parse_tilde [] s in + arg_char (T uname) s' bqlist stack (* ordinary character *) | c::s,_ -> arg_char (C c) s bqlist stack +and parse_tilde acc = + let ret = if acc = [] then None else Some (implode acc) in + function + | [] -> (ret , []) + (* CTLESC *) + | '\129'::_ as s -> None, s + (* CTLQUOTEMARK *) + | '\136'::_ as s -> None, s + (* terminal: CTLENDVAR, /, : *) + | '\131'::_ as s -> ret, s + | ':'::_ as s -> ret, s + | '/'::_ as s -> ret, s + (* ordinary char *) + | c::s' -> parse_tilde (acc @ [c]) s' + and arg_char c s bqlist stack = let a,s,bqlist,stack = parse_arg s bqlist stack in (c::a,s,bqlist,stack) @@ -364,6 +384,8 @@ and string_of_arg_char = function | E ';' -> "\\;" | C c -> String.make 1 c | E c -> Char.escaped c + | T None -> "~" + | T (Some u) -> "~" ^ u | A a -> "$((" ^ string_of_arg a ^ "))" | V (Length,_,name,_) -> "${#" ^ name ^ "}" | V (vt,nul,name,a) -> diff --git a/compile.ml b/compile.ml index 2c0e36a..e620fb6 100644 --- a/compile.ml +++ b/compile.ml @@ -131,6 +131,8 @@ and expand_char (quoted : bool) (a : Ast.arg_char) : expr = match a with | Ast.C chr -> Str (String.make 1 chr) | Ast.E esc -> Str (String.make 1 esc) + | Ast.T None -> Str "~" + | Ast.T (Some usr) -> Str ("~" ^ usr) | Ast.A ari -> Arith (expand quoted ari) | Ast.V(fmt,nul,var,arg) -> expand_var quoted fmt nul var arg | Ast.Q arg -> expand true arg From 2950595153d85599f87678e4f091c6774895852c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 23 Jun 2017 13:58:26 -0700 Subject: [PATCH 212/401] fix VSNUL reading bug, manage todos, tiny fix in expansion --- ast.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ast.ml b/ast.ml index f894170..c7859e3 100644 --- a/ast.ml +++ b/ast.ml @@ -235,7 +235,7 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = (* every other VSTYPE takes mods before CTLENDVAR *) | vstype,'='::s -> let a,s,bqlist,stack' = parse_arg s bqlist (`CTLVar::stack) in - V (var_type vstype,t land 0x10 = 1,implode var_name,a), s, bqlist, stack' + V (var_type vstype,t land 0x10 = 0x10,implode var_name,a), s, bqlist, stack' | _,c::_ -> failwith ("Expected '=' terminating variable name, found " ^ Char.escaped c) | _,[] -> failwith "Expected '=' terminating variable name, found EOF" in From 9415d0f7a63ed56f6f5003b47b4216668ea06e3e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 15 Sep 2017 11:59:54 -0700 Subject: [PATCH 213/401] working on using setjmp to setup an error handler for dash, but no luck actually having things get called --- dash.ml | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/dash.ml b/dash.ml index 03cd126..44992ed 100644 --- a/dash.ml +++ b/dash.ml @@ -14,7 +14,31 @@ let () = seal stackmark let init_stack () = let stack = make stackmark in (* ??? do we want to save this *) foreign "setstackmark" (ptr stackmark @-> returning void) (addr stack) - + +(* on OS X x86_64 *) +let jmp_buf_t : 'a Ctypes_static.carray typ = array 18 int + +type jmploc +let jmploc : jmploc structure typ = structure "jmploc" +let jmp_buf = field jmploc "jmp_buf" jmp_buf_t +let () = seal jmploc + +let setjmp : int ptr -> int = foreign "setjmp" (ptr int @-> returning int) + +let with_handler (k : int -> 'a) : 'a = + let jmptgt = make jmploc in + let r = setjmp (CArray.start (getf jmptgt jmp_buf)) in + if r = 0 + then (* normal return *) + let handler = foreign_value "handler" (ptr jmploc) in + handler <-@ addr jmptgt; + k 0 + else (* coming from a longjmp *) + begin + fprintf stderr "dash raised exception %d\n" r; + k r + end + let init : unit -> unit = foreign "init" (void @-> returning void) let initialize () = From c23cb88564ddc3bac10ba8432cf4ea1312559109 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 15 Sep 2017 14:37:42 -0700 Subject: [PATCH 214/401] remove handler call --- dash.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/dash.ml b/dash.ml index 44992ed..b304f66 100644 --- a/dash.ml +++ b/dash.ml @@ -34,6 +34,7 @@ let with_handler (k : int -> 'a) : 'a = handler <-@ addr jmptgt; k 0 else (* coming from a longjmp *) + (* TODO we're never actually landing here, for some reason... *) begin fprintf stderr "dash raised exception %d\n" r; k r From 0915e9930c56ba06b07e4590cb425de2476b4e60 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 26 Jan 2018 10:21:54 -0800 Subject: [PATCH 215/401] fix Makefiles to not look in global spots---just use opam --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b9d82a7..428cda4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ DASH=$(shell (cd ../dash; pwd)) DASHSRC=$(DASH)/src OCAMLLIB=$(shell opam config var lib) -OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes -I /usr/local/lib/ocaml +OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes OCAMLLIBS=unix.cmxa bigarray.cmxa str.cmxa ctypes.cmxa ctypes-foreign-base.cmxa ctypes-foreign-unthreaded.cmxa test : main.native $(wildcard tests/*) From ed452e3971b2ceb9ee998db22c8b1a32f70463a0 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 13 Aug 2018 10:56:50 -0400 Subject: [PATCH 216/401] fix case statements to allow for | in patterns --- ast.ml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ast.ml b/ast.ml index c7859e3..c398297 100644 --- a/ast.ml +++ b/ast.ml @@ -44,7 +44,7 @@ type t = | TrimL | TrimLMax | Length - and case = { cpattern : arg; cbody : t } + and case = { cpattern : arg list; cbody : t } let var_type = function | 0x0 -> (* VSNORMAL ${var} *) Normal @@ -142,7 +142,7 @@ let rec of_node (n : node union ptr) : t = to_arg (getf n ncase_expr @-> node_narg), List.map (fun (pattern,body) -> - { cpattern = to_arg (pattern @-> node_narg); + { cpattern = to_args pattern; cbody = of_node body}) (caselist (getf n ncase_cases))) (* NDEFUN *) @@ -400,7 +400,8 @@ and string_of_arg = function and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a and string_of_case c = - string_of_arg c.cpattern ^ ") " ^ to_string c.cbody ^ ";;" + let pats = List.map string_of_arg c.cpattern in + intercalate "|" pats ^ ") " ^ to_string c.cbody ^ ";;" and string_of_redir = function | File (To,fd,a) -> show_unless 1 fd ^ ">" ^ string_of_arg a From a9fb6ac170de4905fbe74cd021ca0bd2dfb5683e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 20 Aug 2018 13:32:52 -0400 Subject: [PATCH 217/401] basic env initialization, lots more working --- dash.ml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dash.ml b/dash.ml index b304f66..467d70e 100644 --- a/dash.ml +++ b/dash.ml @@ -55,6 +55,10 @@ let setinputtostdin () : unit = let setinputfile (s : string) : unit = let _ = foreign "setinputfile" (string @-> int @-> returning int) s 0 in () + +let setvar (x : string) (v : string) : unit = + let _ = foreign "setvar" (string @-> string @-> int @-> returning (ptr void)) x v 0 in + () (* first, we define the node type... *) From 5250271bbf48c843c417c6a58d366a74ad6d4c5e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 5 Sep 2018 12:10:54 -0400 Subject: [PATCH 218/401] implemented eval --- dash.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dash.ml b/dash.ml index 467d70e..3a67592 100644 --- a/dash.ml +++ b/dash.ml @@ -45,6 +45,9 @@ let init : unit -> unit = foreign "init" (void @-> returning void) let initialize () = init (); init_stack () + +let popfile : unit -> unit = + foreign "popfile" (void @-> returning void) let setinputstring : string -> unit = foreign "setinputstring" (string @-> returning void) From 6e63eaa8623021e45a855d73c9290e599f577589 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 5 Sep 2018 13:32:57 -0400 Subject: [PATCH 219/401] implement source/., add eval tests, fix how exit works --- dash.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dash.ml b/dash.ml index 3a67592..9687fbf 100644 --- a/dash.ml +++ b/dash.ml @@ -55,8 +55,8 @@ let setinputstring : string -> unit = let setinputtostdin () : unit = foreign "setinputfd" (int @-> int @-> returning void) 0 0 (* don't both pushing the file *) -let setinputfile (s : string) : unit = - let _ = foreign "setinputfile" (string @-> int @-> returning int) s 0 in +let setinputfile ?push:(push=false) (s : string) : unit = + let _ = foreign "setinputfile" (string @-> int @-> returning int) s (if push then 1 else 0) in () let setvar (x : string) (v : string) : unit = From 2602ddfd95bc0a5f6a2635ca2d29f0e2984a22d6 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 25 Sep 2018 10:33:42 -0400 Subject: [PATCH 220/401] fix handling of PS1 and PS2 --- Makefile | 2 ++ dash.ml | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 428cda4..346f498 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,8 @@ OCAMLLIB=$(shell opam config var lib) OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes OCAMLLIBS=unix.cmxa bigarray.cmxa str.cmxa ctypes.cmxa ctypes-foreign-base.cmxa ctypes-foreign-unthreaded.cmxa +all : main.native dash.cmxa + test : main.native $(wildcard tests/*) @for f in tests/*; do \ ./round_trip.sh ./main.native $$f 2>test.err; \ diff --git a/dash.ml b/dash.ml index 9687fbf..2755eec 100644 --- a/dash.ml +++ b/dash.ml @@ -246,19 +246,19 @@ let eqptr p1 p2 = addrof p1 = addrof p2 let nullptr (p : 'a ptr) = addrof p = Nativeint.zero -let parse_next () = - let n = parsecmd 0 in +let parse_next ?interactive:(i=false) () = + let n = parsecmd (if i then 1 else 0) in if eqptr n neof then `Done else if nullptr n then `Null (* comment or blank line ... *) else `Parsed n -let rec parse_all () : (node union ptr) list = - match parse_next () with +let rec parse_all ?interactive:(i=false) () : (node union ptr) list = + match parse_next ~interactive:i () with | `Done -> [] - | `Null -> parse_all () - | `Parsed n -> n::parse_all () + | `Null -> parse_all ~interactive:i () + | `Parsed n -> n::parse_all ~interactive:i () let (@->) (s : ('b, 'c) structured ptr) (f : ('a, ('b, 'c) structured) field) = getf (!@ s) f From 8a6f520850cd5fef70949c5f393a57350bdd0131 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 25 Sep 2018 13:14:44 -0400 Subject: [PATCH 221/401] knock off TODOs in web interface, cleaner error handling --- Makefile | 2 +- dash.ml | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 346f498..d81c901 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ main.native : dash.cmx ast.cmx compile.cmx main.cmx ocamlopt.opt -cclib -force_load $(DASH)/libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ # ocamlbuild -log build.log -no-hygiene -pkg ctypes.foreign -lflags "-cclib -force_load $(DASH)/libdash.a" $@ -dash.cmxa : dash.cmx ast.cmx compile.cmx +dash.cmxa : dash.cmx ast.cmx compile.cmx $(DASH)/libdash.a ocamlopt.opt -cclib -force_load $(DASH)/libdash.a $(OCAMLINCLUDES) $^ -a -o $@ %.cmx : %.ml diff --git a/dash.ml b/dash.ml index 2755eec..c18e9aa 100644 --- a/dash.ml +++ b/dash.ml @@ -231,14 +231,15 @@ let () = seal nnot let node_nnot = field node "nnot" nnot let () = seal node -let parsecmd : int -> node union ptr = - foreign "parsecmd" (int @-> returning (ptr node)) +let parsecmd_safe : int -> node union ptr = + foreign "parsecmd_safe" (int @-> returning (ptr node)) let parse s = setinputstring s; (* TODO set stack mark? *) - parsecmd 0 + parsecmd_safe 0 let neof : node union ptr = foreign_value "tokpushback" node +let nerr : node union ptr = foreign_value "lasttoken" node let addrof p = raw_address_of_ptr (to_voidp p) @@ -246,19 +247,26 @@ let eqptr p1 p2 = addrof p1 = addrof p2 let nullptr (p : 'a ptr) = addrof p = Nativeint.zero +type parse_result = Done | Error | Null | Parsed of (node union ptr) + +exception Parse_error + let parse_next ?interactive:(i=false) () = - let n = parsecmd (if i then 1 else 0) in + let n = parsecmd_safe (if i then 1 else 0) in if eqptr n neof - then `Done + then Done + else if eqptr n nerr + then Error else if nullptr n - then `Null (* comment or blank line ... *) - else `Parsed n + then Null (* comment or blank line or error ... *) + else Parsed n let rec parse_all ?interactive:(i=false) () : (node union ptr) list = match parse_next ~interactive:i () with - | `Done -> [] - | `Null -> parse_all ~interactive:i () - | `Parsed n -> n::parse_all ~interactive:i () + | Done -> [] + | Error -> raise Parse_error + | Null -> parse_all ~interactive:i () + | Parsed n -> n::parse_all ~interactive:i () let (@->) (s : ('b, 'c) structured ptr) (f : ('a, ('b, 'c) structured) field) = getf (!@ s) f From b1d5d24e766273c81b88a9d97073018c39070b4e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 25 Sep 2018 14:55:10 -0400 Subject: [PATCH 222/401] support aliases --- Makefile | 5 +++++ dash.ml | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/Makefile b/Makefile index d81c901..e5ca608 100644 --- a/Makefile +++ b/Makefile @@ -28,3 +28,8 @@ dash.cmxa : dash.cmx ast.cmx compile.cmx $(DASH)/libdash.a clean : rm -f *.o test *~ *.cmi *.cmx main.native dash.a dash.cmxa rm -rf _build + +# ocamldep output +ast.cmx : dash.cmx +compile.cmx : ast.cmx +main.cmx : dash.cmx compile.cmx ast.cmx diff --git a/dash.ml b/dash.ml index c18e9aa..e8d0df0 100644 --- a/dash.ml +++ b/dash.ml @@ -63,6 +63,12 @@ let setvar (x : string) (v : string) : unit = let _ = foreign "setvar" (string @-> string @-> int @-> returning (ptr void)) x v 0 in () +let setalias (name : string) (mapping : string) : unit = + foreign "setalias" (string @-> string @-> returning void) name mapping + +let unalias (name : string) : unit = + foreign "unalias" (string @-> returning void) name + (* first, we define the node type... *) type node From 53cec04e525ecd5b23894c71451f4fba01877586 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 10:01:04 -0400 Subject: [PATCH 223/401] move ocaml bindings to a separate directory --- Makefile => ocaml/Makefile | 0 ast.ml => ocaml/ast.ml | 0 compile.ml => ocaml/compile.ml | 0 dash.ml => ocaml/dash.ml | 0 expansion.ml => ocaml/expansion.ml | 0 main.ml => ocaml/main.ml | 0 round_trip.sh => ocaml/round_trip.sh | 0 test.c => ocaml/test.c | 0 {tests => ocaml/tests}/braces_amp.sh | 0 {tests => ocaml/tests}/diverge.sh | 0 {tests => ocaml/tests}/escaping | 0 {tests => ocaml/tests}/for_spaces.sh | 0 {tests => ocaml/tests}/grab_submissions.sh | 0 {tests => ocaml/tests}/grade.sh | 0 {tests => ocaml/tests}/run_grader.sh | 0 {tests => ocaml/tests}/run_lda.sh | 0 {tests => ocaml/tests}/send_emails.sh | 0 {tests => ocaml/tests}/syntax | 0 {tests => ocaml/tests}/test.sh | 0 {tests => ocaml/tests}/timeout3 | 0 20 files changed, 0 insertions(+), 0 deletions(-) rename Makefile => ocaml/Makefile (100%) rename ast.ml => ocaml/ast.ml (100%) rename compile.ml => ocaml/compile.ml (100%) rename dash.ml => ocaml/dash.ml (100%) rename expansion.ml => ocaml/expansion.ml (100%) rename main.ml => ocaml/main.ml (100%) rename round_trip.sh => ocaml/round_trip.sh (100%) rename test.c => ocaml/test.c (100%) rename {tests => ocaml/tests}/braces_amp.sh (100%) rename {tests => ocaml/tests}/diverge.sh (100%) rename {tests => ocaml/tests}/escaping (100%) rename {tests => ocaml/tests}/for_spaces.sh (100%) rename {tests => ocaml/tests}/grab_submissions.sh (100%) rename {tests => ocaml/tests}/grade.sh (100%) rename {tests => ocaml/tests}/run_grader.sh (100%) rename {tests => ocaml/tests}/run_lda.sh (100%) rename {tests => ocaml/tests}/send_emails.sh (100%) rename {tests => ocaml/tests}/syntax (100%) rename {tests => ocaml/tests}/test.sh (100%) rename {tests => ocaml/tests}/timeout3 (100%) diff --git a/Makefile b/ocaml/Makefile similarity index 100% rename from Makefile rename to ocaml/Makefile diff --git a/ast.ml b/ocaml/ast.ml similarity index 100% rename from ast.ml rename to ocaml/ast.ml diff --git a/compile.ml b/ocaml/compile.ml similarity index 100% rename from compile.ml rename to ocaml/compile.ml diff --git a/dash.ml b/ocaml/dash.ml similarity index 100% rename from dash.ml rename to ocaml/dash.ml diff --git a/expansion.ml b/ocaml/expansion.ml similarity index 100% rename from expansion.ml rename to ocaml/expansion.ml diff --git a/main.ml b/ocaml/main.ml similarity index 100% rename from main.ml rename to ocaml/main.ml diff --git a/round_trip.sh b/ocaml/round_trip.sh similarity index 100% rename from round_trip.sh rename to ocaml/round_trip.sh diff --git a/test.c b/ocaml/test.c similarity index 100% rename from test.c rename to ocaml/test.c diff --git a/tests/braces_amp.sh b/ocaml/tests/braces_amp.sh similarity index 100% rename from tests/braces_amp.sh rename to ocaml/tests/braces_amp.sh diff --git a/tests/diverge.sh b/ocaml/tests/diverge.sh similarity index 100% rename from tests/diverge.sh rename to ocaml/tests/diverge.sh diff --git a/tests/escaping b/ocaml/tests/escaping similarity index 100% rename from tests/escaping rename to ocaml/tests/escaping diff --git a/tests/for_spaces.sh b/ocaml/tests/for_spaces.sh similarity index 100% rename from tests/for_spaces.sh rename to ocaml/tests/for_spaces.sh diff --git a/tests/grab_submissions.sh b/ocaml/tests/grab_submissions.sh similarity index 100% rename from tests/grab_submissions.sh rename to ocaml/tests/grab_submissions.sh diff --git a/tests/grade.sh b/ocaml/tests/grade.sh similarity index 100% rename from tests/grade.sh rename to ocaml/tests/grade.sh diff --git a/tests/run_grader.sh b/ocaml/tests/run_grader.sh similarity index 100% rename from tests/run_grader.sh rename to ocaml/tests/run_grader.sh diff --git a/tests/run_lda.sh b/ocaml/tests/run_lda.sh similarity index 100% rename from tests/run_lda.sh rename to ocaml/tests/run_lda.sh diff --git a/tests/send_emails.sh b/ocaml/tests/send_emails.sh similarity index 100% rename from tests/send_emails.sh rename to ocaml/tests/send_emails.sh diff --git a/tests/syntax b/ocaml/tests/syntax similarity index 100% rename from tests/syntax rename to ocaml/tests/syntax diff --git a/tests/test.sh b/ocaml/tests/test.sh similarity index 100% rename from tests/test.sh rename to ocaml/tests/test.sh diff --git a/tests/timeout3 b/ocaml/tests/timeout3 similarity index 100% rename from tests/timeout3 rename to ocaml/tests/timeout3 From 38d03df1725f13612549fbb0673c9f414c8c17c1 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 10:22:59 -0400 Subject: [PATCH 224/401] building static library and executable --- .gitignore | 5 ++++- configure.ac | 7 +++++++ src/Makefile.am | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3c66d6e..4abc1f9 100644 --- a/.gitignore +++ b/.gitignore @@ -41,7 +41,6 @@ Makefile .Trash* *[Tt]humbs.db - *.a *.cmxa @@ -53,3 +52,7 @@ test *.cmi _build test.err + +ar-lib +config.* +src/libdash.a diff --git a/configure.ac b/configure.ac index dd135e9..bf6afe7 100644 --- a/configure.ac +++ b/configure.ac @@ -8,8 +8,15 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) dnl Checks for programs. AC_PROG_CC + AC_USE_SYSTEM_EXTENSIONS +dnl AC_GNU_SOURCE +dnl AC_PROG_YACC +dnl MMG 2018-09-26 support building the library +AC_PROG_RANLIB +AM_PROG_AR + AC_MSG_CHECKING([for build system compiler]) if test "$cross_compiling" = yes; then CC_FOR_BUILD=${CC_FOR_BUILD-cc} diff --git a/src/Makefile.am b/src/Makefile.am index 1732465..d8432c6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,6 +16,10 @@ COMPILE_FOR_BUILD = \ bin_PROGRAMS = dash +dash_CFLAGS = -DMAIN + +noinst_LIBRARIES = libdash.a + dash_CFILES = \ alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ @@ -30,6 +34,8 @@ dash_SOURCES = \ show.h system.h trap.h var.h dash_LDADD = builtins.o init.o nodes.o signames.o syntax.o +libdash_a_SOURCES = $(dash_SOURCES) + HELPERS = mkinit mksyntax mknodes mksignames BUILT_SOURCES = builtins.h nodes.h syntax.h token.h token_vars.h From 9c61c4e565fef82e7ac6c33ca8f082410b876982 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 10:54:25 -0400 Subject: [PATCH 225/401] clean build of everything --- ocaml/Makefile | 25 +-- ocaml/ast.ml | 2 - ocaml/ast.mli | 55 ++++++ ocaml/compile.ml | 163 ----------------- ocaml/dash.ml | 4 +- ocaml/dash.mli | 220 +++++++++++++++++++++++ ocaml/expansion.ml | 59 ------- ocaml/main.ml | 12 +- ocaml/test.c | 426 --------------------------------------------- src/Makefile.am | 2 + 10 files changed, 297 insertions(+), 671 deletions(-) create mode 100644 ocaml/ast.mli delete mode 100644 ocaml/compile.ml create mode 100644 ocaml/dash.mli delete mode 100644 ocaml/expansion.ml delete mode 100644 ocaml/test.c diff --git a/ocaml/Makefile b/ocaml/Makefile index e5ca608..3f5e8b7 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -1,5 +1,4 @@ -DASH=$(shell (cd ../dash; pwd)) -DASHSRC=$(DASH)/src +DASH=.. OCAMLLIB=$(shell opam config var lib) OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes @@ -13,23 +12,25 @@ test : main.native $(wildcard tests/*) done -#/usr/local/bin/ocamlopt.opt -cclib -force_load /Users/mgree/fsh/dash/libdash.a -I /Users/mgree/.opam/coq-8.4/lib/bytes -I /Users/mgree/.opam/coq-8.4/lib/ctypes -I /usr/local/lib/ocaml /usr/local/lib/ocaml/unix.cmxa /usr/local/lib/ocaml/bigarray.cmxa /usr/local/lib/ocaml/str.cmxa /Users/mgree/.opam/coq-8.4/lib/ctypes/ctypes.cmxa /Users/mgree/.opam/coq-8.4/lib/ctypes/ctypes-foreign-base.cmxa /Users/mgree/.opam/coq-8.4/lib/ctypes/ctypes-foreign-unthreaded.cmxa dash.cmx ast.cmx compile.cmx main.cmx -o main.native +main.native : dash.cmx ast.cmx main.cmx + ocamlopt.opt -cclib -force_load $(DASH)/src/libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ -main.native : dash.cmx ast.cmx compile.cmx main.cmx - ocamlopt.opt -cclib -force_load $(DASH)/libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ -# ocamlbuild -log build.log -no-hygiene -pkg ctypes.foreign -lflags "-cclib -force_load $(DASH)/libdash.a" $@ -dash.cmxa : dash.cmx ast.cmx compile.cmx $(DASH)/libdash.a - ocamlopt.opt -cclib -force_load $(DASH)/libdash.a $(OCAMLINCLUDES) $^ -a -o $@ +dash.cmxa : dash.cmx ast.cmx $(DASH)/src/libdash.a + ocamlopt.opt -cclib -force_load $(DASH)/src/libdash.a $(OCAMLINCLUDES) $^ -a -o $@ %.cmx : %.ml - ocamlopt.opt $(OCAMLINCLUDES) -c -o $@ $< + ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ + +%.cmi : %.mli + ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ clean : rm -f *.o test *~ *.cmi *.cmx main.native dash.a dash.cmxa rm -rf _build # ocamldep output -ast.cmx : dash.cmx -compile.cmx : ast.cmx -main.cmx : dash.cmx compile.cmx ast.cmx +ast.cmx : dash.cmx ast.cmi +ast.cmi : dash.cmi +dash.cmx : dash.cmi +main.cmx : dash.cmx ast.cmx diff --git a/ocaml/ast.ml b/ocaml/ast.ml index c398297..814b0ba 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -315,8 +315,6 @@ let show_unless expected actual = then "" else string_of_int actual -let braces s = "{ " ^ s ^ " ; }" - let background s = "{ " ^ s ^ " & }" let rec to_string = function diff --git a/ocaml/ast.mli b/ocaml/ast.mli new file mode 100644 index 0000000..d64fe25 --- /dev/null +++ b/ocaml/ast.mli @@ -0,0 +1,55 @@ +type linno = int + +type t = + Command of linno * assign list * args * redirection list + | Pipe of bool * t list + | Redir of linno * t * redirection list + | Background of linno * t * redirection list + | Subshell of linno * t * redirection list + | And of t * t + | Or of t * t + | Not of t + | Semi of t * t + | If of t * t * t + | While of t * t + | For of linno * arg * t * string + | Case of linno * arg * case list + | Defun of linno * string * t +and assign = string * arg +and redirection = + File of redir_type * int * arg + | Dup of dup_type * int * int + | Heredoc of heredoc_type * int * arg +and redir_type = To | Clobber | From | FromTo | Append +and dup_type = ToFD | FromFD +and heredoc_type = Here | XHere +and args = arg list +and arg = arg_char list +and arg_char = + C of char + | E of char + | T of string option + | A of arg + | V of var_type * bool * string * arg + | Q of arg + | B of t +and var_type = + Normal + | Minus + | Plus + | Question + | Assign + | TrimR + | TrimRMax + | TrimL + | TrimLMax + | Length +and case = { cpattern : arg list; cbody : t; } + +val of_node : Dash.node Ctypes.union Ctypes.ptr -> t + +(* command that does nothing *) +val skip : t + +(* render to string *) +val to_string : t -> string diff --git a/ocaml/compile.ml b/ocaml/compile.ml deleted file mode 100644 index e620fb6..0000000 --- a/ocaml/compile.ml +++ /dev/null @@ -1,163 +0,0 @@ -type var = string - -type pat = string - -type expr = - | Var of var (* : whatever the var is *) - - | Int of int (* : int *) - | Eq of expr * expr (* : bool *) - | Not of expr (* : int *) - - - | Bind of var * expr * expr - | If of expr * expr * expr - - | Lookup of string (* : string *) - | Assign of string * expr (* : () *) - | Execve of string * string list (* : int/pid *) - | Match of string * pat (* : bool *) - | Defun of string * expr (* : () *) - | PushRedir (* : () *) - | PopRedir (* : () *) - | Fork of expr (* : int/pid *) - | Wait of expr (* : int/status code *) - | Pipe of expr list (* : int/status code *) - | Capture of expr (* : string *) - - | Arith of expr (* : string *) - - | Str of string (* : string *) - | Concat of expr list (* : string *) - | Length of expr (* : int *) - -let bind x e1 e2 = Bind(x,e1,e2) - -let cond i t e = If(i,t,e) - -let fresh : var -> var * expr = - let ctr = ref 0 in - fun base -> let s = base ^ string_of_int !ctr in incr ctr; (s,Var s) - -let set_status (run : expr) : expr = - let (ec,ec_var) = fresh "ec" in - bind ec run (bind "_" (Assign ("?",ec_var)) ec_var) - -let rec setup_redir (r : Ast.redirection) : expr = failwith "setup_redir" - -let with_redirects (rs : Ast.redirection list) (c : expr) : expr = - List.fold_right (fun r cmd -> bind "_" (setup_redir r) cmd) rs c - -let save_fds (c : expr) : expr = - let (status,status_var) = fresh "status" in - bind "_" PushRedir - (bind status c - (bind "_" PopRedir status_var)) - -let split_fields (arg : expr list) : expr = failwith "split_fields" - -let expand_paths (fields : expr) : expr = failwith "expand_paths" - -let remove_quotes (fields : expr) : expr = failwith "remove_quotes" - -(* TODO - -$?, etc. -- shell state explicitly or implicitly carried? - -pipes -- open followed by dup2/fcntl - -break, continue, return -- evalskip/break count -source, eval -- builtin? analyses just have to take account -trap -- need abstract signal handlers - -aliases -- just keep a table somewhere - -*) - -(* invariant: returns status code *) -let rec compile (c : Ast.t) : expr = - set_status - (begin match c with - | Ast.Command (_,assigns,args,redirs) -> - (* expand assignments - expand arguments - determine if it's a command or a built in - execve command/run builtin/call defun, or error on bad lookup - *) - with_redirects redirs (failwith "command") - | Ast.Pipe (bg,cs) -> - let (pid,pid_var) = fresh "pid" in - bind pid (Fork (Pipe (List.map compile cs))) - (if bg then (Int 0) else Wait pid_var) - | Ast.Redir (_,c,redirs) -> - save_fds (with_redirects redirs (compile c)) - | Ast.Background (_,c,redirs) -> - let (pid,pid_var) = fresh "pid" in - bind pid (Fork (with_redirects redirs (compile c))) (Int 0) - | Ast.Subshell (_,c,redirs) -> - let (pid,pid_var) = fresh "pid" in - bind pid (Fork (with_redirects redirs (compile c))) (Wait pid_var) - | Ast.And (c1,c2) -> - let (status,status_var) = fresh "status" in - bind status (compile c1) (cond status_var (compile c2) status_var) - | Ast.Or (c1,c2) -> - let (status,status_var) = fresh "status" in - bind status (compile c1) (cond status_var status_var (compile c2)) - | Ast.Not c -> - let (status,status_var) = fresh "status" in - bind status (compile c) (Not status_var) - | Ast.Semi (c1,c2) -> - bind "_" (compile c1) (compile c2) - | Ast.If (c_cond,c_then,c_else) -> - let (status,status_var) = fresh "status" in - bind status (compile c_cond) (cond status_var (compile c_then) (compile c_else)) - | Ast.While (cond,body) -> failwith "while" - | Ast.For (_,args,body,var) -> failwith "for" - | Ast.Case (_,args,cases) -> failwith "cases" - | Ast.Defun (_,name,body) -> Defun (name,compile body) - end) - -(* invariant: returns expanded string *) -and expands (quoted : bool) (args : Ast.args) : expr = - Concat (List.map (expand quoted) args) (* TODO insert field separations *) - -(* invariant: returns expanded string *) -and expand (quoted : bool) (arg : Ast.arg) : expr = - (* TODO field splitting, path expansion, quote removal *) - remove_quotes (expand_paths (split_fields (List.map (expand_char quoted) arg))) - -(* invariant: returns expanded string *) -and expand_char (quoted : bool) (a : Ast.arg_char) : expr = - match a with - | Ast.C chr -> Str (String.make 1 chr) - | Ast.E esc -> Str (String.make 1 esc) - | Ast.T None -> Str "~" - | Ast.T (Some usr) -> Str ("~" ^ usr) - | Ast.A ari -> Arith (expand quoted ari) - | Ast.V(fmt,nul,var,arg) -> expand_var quoted fmt nul var arg - | Ast.Q arg -> expand true arg - | Ast.B cmd -> Capture (compile cmd) - -and expand_var (quoted : bool) (fmt : Ast.var_type) (nul : bool) (var : string) (arg : Ast.arg) : expr = - match var with - | "@" -> failwith "$@" - | "*" -> failwith "$*" - | "?" -> failwith "$?" - | "$" -> failwith "$$" - | "#" -> failwith "$#" - | "!" -> failwith "$!" - | "-" -> failwith "$-" - | _ -> let (res,res_var) = fresh "res" in - bind res (Lookup var) - (begin match fmt with - | Ast.Normal -> res_var - | Ast.Minus -> failwith "${-}" - | Ast.Plus -> failwith "${+}" - | Ast.Question -> failwith "${?}" - | Ast.Assign -> failwith "${=}" - | Ast.TrimR -> failwith "${%}" - | Ast.TrimRMax -> failwith "${%%}" - | Ast.TrimL -> failwith "${#}" - | Ast.TrimLMax -> failwith "${##}" - | Ast.Length -> Length res_var - end) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index e8d0df0..13027b2 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -40,10 +40,10 @@ let with_handler (k : int -> 'a) : 'a = k r end -let init : unit -> unit = foreign "init" (void @-> returning void) +let dash_init : unit -> unit = foreign "init" (void @-> returning void) let initialize () = - init (); + dash_init (); init_stack () let popfile : unit -> unit = diff --git a/ocaml/dash.mli b/ocaml/dash.mli new file mode 100644 index 0000000..9dbc816 --- /dev/null +++ b/ocaml/dash.mli @@ -0,0 +1,220 @@ +(* dash internals + + call initialize before doing anything! +*) + +val initialize : unit -> unit + +val popfile : unit -> unit +val setinputstring : string -> unit +val setinputtostdin : unit -> unit +val setinputfile : ?push:bool -> string -> unit + +val setvar : string -> string -> unit +val setalias : string -> string -> unit +val unalias : string -> unit + +(* Ctypes mappings of the node types *) +type node +val node : node Ctypes.union Ctypes.typ +val node_type : (int, node Ctypes.union) Ctypes.field + +type nodelist +val nodelist_next : + (nodelist Ctypes.structure Ctypes_static.ptr, nodelist Ctypes.structure) + Ctypes.field +val nodelist_n : + (node Ctypes.union Ctypes_static.ptr, nodelist Ctypes.structure) + Ctypes.field + +type ncmd +val ncmd : ncmd Ctypes.structure Ctypes.typ +val ncmd_type : (int, ncmd Ctypes.structure) Ctypes.field +val ncmd_linno : (int, ncmd Ctypes.structure) Ctypes.field +val ncmd_assign : + (node Ctypes.union Ctypes_static.ptr, ncmd Ctypes.structure) Ctypes.field +val ncmd_args : + (node Ctypes.union Ctypes_static.ptr, ncmd Ctypes.structure) Ctypes.field +val ncmd_redirect : + (node Ctypes.union Ctypes_static.ptr, ncmd Ctypes.structure) Ctypes.field +val node_ncmd : (ncmd Ctypes.structure, node Ctypes.union) Ctypes.field + +type npipe +val npipe : npipe Ctypes.structure Ctypes.typ +val npipe_type : (int, npipe Ctypes.structure) Ctypes.field +val npipe_backgnd : (int, npipe Ctypes.structure) Ctypes.field +val npipe_cmdlist : + (nodelist Ctypes.structure Ctypes_static.ptr, npipe Ctypes.structure) + Ctypes.field +val node_npipe : (npipe Ctypes.structure, node Ctypes.union) Ctypes.field + +type nredir +val nredir : nredir Ctypes.structure Ctypes.typ +val nredir_type : (int, nredir Ctypes.structure) Ctypes.field +val nredir_linno : (int, nredir Ctypes.structure) Ctypes.field +val nredir_n : + (node Ctypes.union Ctypes_static.ptr, nredir Ctypes.structure) Ctypes.field +val nredir_redirect : + (node Ctypes.union Ctypes_static.ptr, nredir Ctypes.structure) Ctypes.field +val node_nredir : (nredir Ctypes.structure, node Ctypes.union) Ctypes.field + +type nbinary +val nbinary : nbinary Ctypes.structure Ctypes.typ +val nbinary_type : (int, nbinary Ctypes.structure) Ctypes.field +val nbinary_ch1 : + (node Ctypes.union Ctypes_static.ptr, nbinary Ctypes.structure) + Ctypes.field +val nbinary_ch2 : + (node Ctypes.union Ctypes_static.ptr, nbinary Ctypes.structure) + Ctypes.field +val node_nbinary : (nbinary Ctypes.structure, node Ctypes.union) Ctypes.field + +type nif +val nif : nif Ctypes.structure Ctypes.typ +val nif_type : (int, nif Ctypes.structure) Ctypes.field +val nif_test : + (node Ctypes.union Ctypes_static.ptr, nif Ctypes.structure) Ctypes.field +val nif_ifpart : + (node Ctypes.union Ctypes_static.ptr, nif Ctypes.structure) Ctypes.field +val nif_elsepart : + (node Ctypes.union Ctypes_static.ptr, nif Ctypes.structure) Ctypes.field +val node_nif : (nif Ctypes.structure, node Ctypes.union) Ctypes.field + +type nfor +val nfor : nfor Ctypes.structure Ctypes.typ +val nfor_type : (int, nfor Ctypes.structure) Ctypes.field +val nfor_linno : (int, nfor Ctypes.structure) Ctypes.field +val nfor_args : + (node Ctypes.union Ctypes_static.ptr, nfor Ctypes.structure) Ctypes.field +val nfor_body : + (node Ctypes.union Ctypes_static.ptr, nfor Ctypes.structure) Ctypes.field +val nfor_var : (string, nfor Ctypes.structure) Ctypes.field +val node_nfor : (nfor Ctypes.structure, node Ctypes.union) Ctypes.field + +type ncase +val ncase : ncase Ctypes.structure Ctypes.typ +val ncase_type : (int, ncase Ctypes.structure) Ctypes.field +val ncase_linno : (int, ncase Ctypes.structure) Ctypes.field +val ncase_expr : + (node Ctypes.union Ctypes_static.ptr, ncase Ctypes.structure) Ctypes.field +val ncase_cases : + (node Ctypes.union Ctypes_static.ptr, ncase Ctypes.structure) Ctypes.field +val node_ncase : (ncase Ctypes.structure, node Ctypes.union) Ctypes.field + +type nclist +val nclist : nclist Ctypes.structure Ctypes.typ +val nclist_type : (int, nclist Ctypes.structure) Ctypes.field +val nclist_next : + (node Ctypes.union Ctypes_static.ptr, nclist Ctypes.structure) Ctypes.field +val nclist_pattern : + (node Ctypes.union Ctypes_static.ptr, nclist Ctypes.structure) Ctypes.field +val nclist_body : + (node Ctypes.union Ctypes_static.ptr, nclist Ctypes.structure) Ctypes.field +val node_nclist : (nclist Ctypes.structure, node Ctypes.union) Ctypes.field + +type ndefun +val ndefun : ndefun Ctypes.structure Ctypes.typ +val ndefun_type : (int, ndefun Ctypes.structure) Ctypes.field +val ndefun_linno : (int, ndefun Ctypes.structure) Ctypes.field +val ndefun_text : (string, ndefun Ctypes.structure) Ctypes.field +val ndefun_body : + (node Ctypes.union Ctypes_static.ptr, ndefun Ctypes.structure) Ctypes.field +val node_ndefun : (ndefun Ctypes.structure, node Ctypes.union) Ctypes.field + +type narg +val narg : narg Ctypes.structure Ctypes.typ +val narg_type : (int, narg Ctypes.structure) Ctypes.field +val narg_next : + (node Ctypes.union Ctypes_static.ptr, narg Ctypes.structure) Ctypes.field +val narg_text : (string, narg Ctypes.structure) Ctypes.field +val narg_backquote : + (nodelist Ctypes.structure Ctypes_static.ptr, narg Ctypes.structure) + Ctypes.field +val node_narg : (narg Ctypes.structure, node Ctypes.union) Ctypes.field + +type nfile +val nfile : nfile Ctypes.structure Ctypes.typ +val nfile_type : (int, nfile Ctypes.structure) Ctypes.field +val nfile_next : + (node Ctypes.union Ctypes_static.ptr, nfile Ctypes.structure) Ctypes.field +val nfile_fd : (int, nfile Ctypes.structure) Ctypes.field +val nfile_fname : + (node Ctypes.union Ctypes_static.ptr, nfile Ctypes.structure) Ctypes.field +val nfile_expfname : (string, nfile Ctypes.structure) Ctypes.field +val node_nfile : (nfile Ctypes.structure, node Ctypes.union) Ctypes.field + +type ndup +val ndup : ndup Ctypes.structure Ctypes.typ +val ndup_type : (int, ndup Ctypes.structure) Ctypes.field +val ndup_next : + (node Ctypes.union Ctypes_static.ptr, ndup Ctypes.structure) Ctypes.field +val ndup_fd : (int, ndup Ctypes.structure) Ctypes.field +val ndup_dupfd : (int, ndup Ctypes.structure) Ctypes.field +val ndup_vname : + (node Ctypes.union Ctypes_static.ptr, ndup Ctypes.structure) Ctypes.field +val node_ndup : (ndup Ctypes.structure, node Ctypes.union) Ctypes.field + +type nhere +val nhere : nhere Ctypes.structure Ctypes.typ +val nhere_type : (int, nhere Ctypes.structure) Ctypes.field +val nhere_next : + (node Ctypes.union Ctypes_static.ptr, nhere Ctypes.structure) Ctypes.field +val nhere_fd : (int, nhere Ctypes.structure) Ctypes.field +val nhere_doc : + (node Ctypes.union Ctypes_static.ptr, nhere Ctypes.structure) Ctypes.field +val node_nhere : (nhere Ctypes.structure, node Ctypes.union) Ctypes.field + +type nnot +val nnot : nnot Ctypes.structure Ctypes.typ +val nnot_type : (int, nnot Ctypes.structure) Ctypes.field +val nnot_com : + (node Ctypes.union Ctypes_static.ptr, nnot Ctypes.structure) Ctypes.field +val node_nnot : (nnot Ctypes.structure, node Ctypes.union) Ctypes.field + +val ( @-> ) : + ('b, 'c) Ctypes.structured Ctypes.ptr -> + ('a, ('b, 'c) Ctypes.structured) Ctypes.field -> 'a +val arglist : narg Ctypes.structure -> narg Ctypes.structure list +val nodelist : + nodelist Ctypes.structure Ctypes.ptr -> node Ctypes.union Ctypes.ptr list +val redirlist : + node Ctypes.union Ctypes.ptr -> + [> `Dup of int * string * ndup Ctypes.structure + | `File of int * string * nfile Ctypes.structure + | `Here of int * string * bool * nhere Ctypes.structure ] + list +val caselist : + node Ctypes.union Ctypes.ptr -> + (node Ctypes.union Ctypes_static.ptr * node Ctypes.union Ctypes_static.ptr) + list + +(* useful functions for working with the Ctypes AST *) +val addrof : 'a Ctypes.ptr -> nativeint +val eqptr : 'a Ctypes.ptr -> 'b Ctypes.ptr -> bool +val nullptr : 'a Ctypes.ptr -> bool + +(* useful functions for pretty printing *) +val explode : string -> char list +val implode : char list -> string +val intercalate : string -> string list -> string +val lines : string -> string list +val split_at : ('a -> bool) -> 'a list -> 'a list * 'a list + +(* shell-specific functions for pretty printing *) +val braces : string -> string +val parens : string -> string +val fresh_marker : string list -> string -> string + +(* parser *) +type parse_result = + Done + | Error + | Null + | Parsed of node Ctypes.union Ctypes.ptr +exception Parse_error +val parse_next : ?interactive:bool -> unit -> parse_result +val parse_all : + ?interactive:bool -> unit -> node Ctypes.union Ctypes.ptr list + +(* native pretty printer *) +val show : node Ctypes.union Ctypes.ptr -> string diff --git a/ocaml/expansion.ml b/ocaml/expansion.ml deleted file mode 100644 index caf460b..0000000 --- a/ocaml/expansion.ml +++ /dev/null @@ -1,59 +0,0 @@ -(* - - - - -*) - -type param = Param of string - -type op = - | Hyp - | ColonHyp - | Eq - | ColonEq - | Ques - | ColonQues - | Plus - | ColonPlus - | Percent - | PercentPercent - | Hash - | HashHash - -type word - = WLiteral of string - | WParam of param - | WArith of string - | WTilde of string - | WSubst of param * op * word - | WLength of param - | WCommand of string - | WDoubleQuote of string - | WCat of word * word - -let cat (w1 : word) (w2: word) = match (w1, w2) with - | WLiteral str1, WLiteral str2 = WLiteral (str1 ^ str2) - | _, _ -> WCat (w1, w2) - -let rec parse_word (str : char list) : word = str match - (* Section 2.3, bullet 1 *) - | [] -> WLiteral "" - (* Section 2.2.1 *) - | '\' :: '\n' :: rest -> parse_word rest - | '\' :: ch :: rest -> cat (WLiteral (Char.to_string ch)) (parse_word rest) - (* Section 2.2.2 *) - | '\'' :: rest -> parse_word_in_single_quotes rest [] - (* Section 2.3, bullet 5 *) - | '$' :: '(' :: '(' :: rest -> parse_arith_word rest - -and parse_arith_word (str : char list) : word = str match - | ')' :: ')' :: rest - -(* Section 2.2.2 *) -and parse_word_in_single_quotes (str : char list) (chars : char list): word = - match chars with - | '\'' :: rest -> cat (WLiteral (List.of_char_list (List.rev chars))) parse_word rest - | ch :: rest -> parse_word_in_single_quotes rest (ch :: chars) - | [] -> failwith "EOF reading a single-quoted string" - diff --git a/ocaml/main.ml b/ocaml/main.ml index 715a0c3..ae67f7b 100644 --- a/ocaml/main.ml +++ b/ocaml/main.ml @@ -13,15 +13,13 @@ let parse_args () = ["-v",Arg.Set verbose,"verbose mode"] (function | "-" -> input_src := None | f -> input_src := Some f) "Final argument should be either a filename or - (for STDIN); only the last such argument is used" -;; + +let main () = initialize (); parse_args (); set_input_src (); let ns = parse_all () in let cs = List.map Ast.of_node ns in - let try_compile c = - try ignore (Compile.compile c) - with _ -> prerr_endline "couldn't compile"; () in - List.iter try_compile cs; - List.map - (fun c -> print_endline (Ast.to_string c)) cs + List.map (fun c -> print_endline (Ast.to_string c)) cs;; + +main () diff --git a/ocaml/test.c b/ocaml/test.c deleted file mode 100644 index a00ebe3..0000000 --- a/ocaml/test.c +++ /dev/null @@ -1,426 +0,0 @@ -#include -#include -#include -#include - -#define DEBUG -#include "shell.h" -#include "parser.h" -#include "nodes.h" -#include "mystring.h" -#include "show.h" -#include "options.h" -#include "init.h" -#include "input.h" - -/* This file is a mash-up of dash/src/show.c, compiling out a special - version of dash into a shared library. The end goal is pulling out - the dash parser so we can work with it in Ocaml. - - I mostly wrote this file to make sure I built the library - correctly. Next step is getting the C AST into Ocaml. -*/ - -static void shtree(union node *, int, char *, FILE*); -static void shcmd(union node *, FILE *); -static void sharg(union node *, FILE *); -static void indent(int, char *, FILE *); -static void trstring(char *); - -int main(int argc, char** argv) { - if (argc != 2) { - fprintf(stderr, "Usage: %s [command to evaluate]\n", argv[0]); - return -1; - } - - char *cmd = strdup(argv[1]); - - /* debugging unsigned char issues */ - // printf("CTLESC = %hhu\n", (unsigned char) CTLESC); - // printf("CTLVAR = %hhu\n", (unsigned char) CTLVAR); - // printf("CTLENDVAR = %hhu\n", (unsigned char) CTLENDVAR); - // printf("CTLBACKQ = %hhu\n", (unsigned char) CTLBACKQ); - // printf("CTLARI = %hhu\n", (unsigned char) CTLARI); - // printf("CTLENDARI = %hhu\n", (unsigned char) CTLENDARI); - // printf("CTLQUOTEMARK = %hhu\n", (unsigned char) CTLQUOTEMARK); - - init(); - - setinputstring(cmd); - - union node *n = parsecmd(0); - if (n != (union node *) EOF) { - showtree(n); - } else { - fprintf(stderr, "Hit eof..."); - - } - - popfile(); - free(cmd); - - return 0; -} - -void -showtree(union node *n) -{ - trputs("showtree called\n"); - shtree(n, 1, NULL, stdout); -} - - -static void -shtree(union node *n, int ind, char *pfx, FILE *fp) -{ - struct nodelist *lp; - const char *s; - - if (n == NULL) - return; - - indent(ind, pfx, fp); - switch(n->type) { - case NSEMI: - s = "; "; - goto binop; - case NAND: - s = " && "; - goto binop; - case NOR: - s = " || "; -binop: - shtree(n->nbinary.ch1, ind, NULL, fp); - /* if (ind < 0) */ - fputs(s, fp); - shtree(n->nbinary.ch2, ind, NULL, fp); - break; - case NCMD: - shcmd(n, fp); - if (ind >= 0) - putc('\n', fp); - break; - case NPIPE: - for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) { - shcmd(lp->n, fp); - if (lp->next) - fputs(" | ", fp); - } - if (n->npipe.backgnd) - fputs(" &", fp); - if (ind >= 0) - putc('\n', fp); - break; - case NWHILE: - fprintf(fp, "while\n"); - shtree(n->nbinary.ch1, ind+1, NULL, fp); - indent(ind, pfx, fp); - fprintf(fp, "do;\n"); - shtree(n->nbinary.ch2, ind+1, NULL, fp); - indent(ind, pfx, fp); - fprintf(fp,"done\n"); - break; - default: - fprintf(fp, "", n->type); - if (ind >= 0) - putc('\n', fp); - break; - } -} - - - -static void -shcmd(union node *cmd, FILE *fp) -{ - union node *np; - int first; - const char *s; - int dftfd; - - first = 1; - for (np = cmd->ncmd.args ; np ; np = np->narg.next) { - if (! first) - putchar(' '); - sharg(np, fp); - first = 0; - } - for (np = cmd->ncmd.redirect ; np ; np = np->nfile.next) { - if (! first) - putchar(' '); - switch (np->nfile.type) { - case NTO: s = ">"; dftfd = 1; break; - case NCLOBBER: s = ">|"; dftfd = 1; break; - case NAPPEND: s = ">>"; dftfd = 1; break; - case NTOFD: s = ">&"; dftfd = 1; break; - case NFROM: s = "<"; dftfd = 0; break; - case NFROMFD: s = "<&"; dftfd = 0; break; - case NFROMTO: s = "<>"; dftfd = 0; break; - default: s = "*error*"; dftfd = 0; break; - } - if (np->nfile.fd != dftfd) - fprintf(fp, "%d", np->nfile.fd); - fputs(s, fp); - if (np->nfile.type == NTOFD || np->nfile.type == NFROMFD) { - fprintf(fp, "%d", np->ndup.dupfd); - } else { - sharg(np->nfile.fname, fp); - } - first = 0; - } -} - - - -static void -sharg(union node *arg, FILE *fp) -{ - char *p; - struct nodelist *bqlist; - int subtype; - - if (arg->type != NARG) { - printf("\n", arg->type); - abort(); - } - bqlist = arg->narg.backquote; - for (p = arg->narg.text ; *p ; p++) { - switch ((signed char)*p) { - case CTLESC: - putc(*++p, fp); - break; - case CTLVAR: - putc('$', fp); - putc('{', fp); - subtype = *++p; - if (subtype == VSLENGTH) - putc('#', fp); - - while (*p != '=') - putc(*p++, fp); - - if (subtype & VSNUL) - putc(':', fp); - - switch (subtype & VSTYPE) { - case VSNORMAL: - putc('}', fp); - break; - case VSMINUS: - putc('-', fp); - break; - case VSPLUS: - putc('+', fp); - break; - case VSQUESTION: - putc('?', fp); - break; - case VSASSIGN: - putc('=', fp); - break; - case VSTRIMLEFT: - putc('#', fp); - break; - case VSTRIMLEFTMAX: - putc('#', fp); - putc('#', fp); - break; - case VSTRIMRIGHT: - putc('%', fp); - break; - case VSTRIMRIGHTMAX: - putc('%', fp); - putc('%', fp); - break; - case VSLENGTH: - break; - default: - printf("", subtype); - } - break; - case CTLENDVAR: - putc('}', fp); - break; - case CTLBACKQ: - putc('$', fp); - putc('(', fp); - shtree(bqlist->n, -1, NULL, fp); - putc(')', fp); - break; - default: - putc(*p, fp); - break; - } - } -} - - -static void -indent(int amount, char *pfx, FILE *fp) -{ - int i; - - for (i = 0 ; i < amount ; i++) { - if (pfx && i == amount - 1) - fputs(pfx, fp); - putc('\t', fp); - } -} - - - -/* - * Debugging stuff. - */ - - -FILE *tracefile; - - -void -trputc(int c) -{ - if (debug != 1) - return; - putc(c, tracefile); -} - -void -trace(const char *fmt, ...) -{ - va_list va; - - if (debug != 1) - return; - va_start(va, fmt); - (void) vfprintf(tracefile, fmt, va); - va_end(va); -} - -void -tracev(const char *fmt, va_list va) -{ - if (debug != 1) - return; - (void) vfprintf(tracefile, fmt, va); -} - - -void -trputs(const char *s) -{ - if (debug != 1) - return; - fputs(s, tracefile); -} - - -static void -trstring(char *s) -{ - char *p; - char c; - - if (debug != 1) - return; - putc('"', tracefile); - for (p = s ; *p ; p++) { - switch ((signed char)*p) { - case '\n': c = 'n'; goto backslash; - case '\t': c = 't'; goto backslash; - case '\r': c = 'r'; goto backslash; - case '"': c = '"'; goto backslash; - case '\\': c = '\\'; goto backslash; - case CTLESC: c = 'e'; goto backslash; - case CTLVAR: c = 'v'; goto backslash; - case CTLBACKQ: c = 'q'; goto backslash; -backslash: putc('\\', tracefile); - putc(c, tracefile); - break; - default: - if (*p >= ' ' && *p <= '~') - putc(*p, tracefile); - else { - putc('\\', tracefile); - putc(*p >> 6 & 03, tracefile); - putc(*p >> 3 & 07, tracefile); - putc(*p & 07, tracefile); - } - break; - } - } - putc('"', tracefile); -} - - -void -trargs(char **ap) -{ - if (debug != 1) - return; - while (*ap) { - trstring(*ap++); - if (*ap) - putc(' ', tracefile); - else - putc('\n', tracefile); - } -} - - -void -opentrace(void) -{ - char s[100]; -#ifdef O_APPEND - int flags; -#endif - - if (debug != 1) { - if (tracefile) - fflush(tracefile); - /* leave open because libedit might be using it */ - return; - } -#ifdef not_this_way - { - char *p; - if ((p = getenv(homestr)) == NULL) { - if (geteuid() == 0) - p = "/"; - else - p = "/tmp"; - } - scopy(p, s); - strcat(s, "/trace"); - } -#else - scopy("./trace", s); -#endif /* not_this_way */ - if (tracefile) { -#ifndef __KLIBC__ - if (!freopen(s, "a", tracefile)) { -#else - if (!(!fclose(tracefile) && (tracefile = fopen(s, "a")))) { -#endif /* __KLIBC__ */ - fprintf(stderr, "Can't re-open %s\n", s); - debug = 0; - return; - } - } else { - if ((tracefile = fopen(s, "a")) == NULL) { - fprintf(stderr, "Can't open %s\n", s); - debug = 0; - return; - } - } -#ifdef O_APPEND - if ((flags = fcntl(fileno(tracefile), F_GETFL, 0)) >= 0) - fcntl(fileno(tracefile), F_SETFL, flags | O_APPEND); -#endif -#ifndef __KLIBC__ - setlinebuf(tracefile); -#endif /* __KLIBC__ */ - fputs("\nTracing started.\n", tracefile); -} - diff --git a/src/Makefile.am b/src/Makefile.am index d8432c6..6e98086 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -36,6 +36,8 @@ dash_LDADD = builtins.o init.o nodes.o signames.o syntax.o libdash_a_SOURCES = $(dash_SOURCES) +libdash_a_LIBADD = builtins.o init.o nodes.o signames.o syntax.o + HELPERS = mkinit mksyntax mknodes mksignames BUILT_SOURCES = builtins.h nodes.h syntax.h token.h token_vars.h From b8b1123b661678bcffda95c49901e0a9fcda4ec3 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 11:05:47 -0400 Subject: [PATCH 226/401] update readme --- README.md | 32 +++++++++++++++++++++++++++++++- ocaml/Makefile | 10 +++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0c925df..ea8cc30 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,39 @@ *libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. +# What are the dependencies? + +The C code for dash should build on a wide variety of systems. The library may not build on platforms with esoteric linkers; it's been tested on OS X. + +The OCaml code relies on `ctypes-0.11.5` and `ctypes-foreign`; everything else should be in `base`. + +# How to build it + +In the root directory run: + +``` +./autogen.sh && ./configure && make +``` + +This should construct an executable `src/dash` and a static library `src/libdash.a`. + +Then run: + +``` +cd ocaml; make +``` + +This will build the OCaml library `ocaml/dash.mxa` along with a tester, `ocaml/test.native`. You can then run (still in the `ocaml` directory): + +``` +make test +``` + +Which will use `ocaml/round_trip.sh` to ensure that every tester file in `ocaml/tests` round-trips correctly through parsing and pretty printing. + # How to use the parser The ideal interface to use is `parsecmd_safe` in `parser.c`. Parsing the POSIX shell is a complicated affair: beyond the usual locale issues, aliases affect the lexer, so one must use `setalias` and `unalias` to manage any aliases that ought to exist. # How work with the parsed nodes -The general AST is described in `nodes.h`. There are some tricky invariants around the precise formatting of control codes; the OCaml code shows some examples of working with the `args` fields. +The general AST is described in `nodes.h`. There are some tricky invariants around the precise formatting of control codes; the OCaml code shows some examples of working with the `args` fields in `ocaml/ast.ml`, which converts the C AST to an OCaml AST. diff --git a/ocaml/Makefile b/ocaml/Makefile index 3f5e8b7..898ac6a 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -4,15 +4,15 @@ OCAMLLIB=$(shell opam config var lib) OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes OCAMLLIBS=unix.cmxa bigarray.cmxa str.cmxa ctypes.cmxa ctypes-foreign-base.cmxa ctypes-foreign-unthreaded.cmxa -all : main.native dash.cmxa +all : test.native dash.cmxa -test : main.native $(wildcard tests/*) +test : test.native $(wildcard tests/*) @for f in tests/*; do \ - ./round_trip.sh ./main.native $$f 2>test.err; \ + ./round_trip.sh ./test.native $$f 2>test.err; \ done -main.native : dash.cmx ast.cmx main.cmx +test.native : dash.cmx ast.cmx main.cmx ocamlopt.opt -cclib -force_load $(DASH)/src/libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ @@ -26,7 +26,7 @@ dash.cmxa : dash.cmx ast.cmx $(DASH)/src/libdash.a ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ clean : - rm -f *.o test *~ *.cmi *.cmx main.native dash.a dash.cmxa + rm -f *.o test *~ *.cmi *.cmx test.native dash.a dash.cmxa rm -rf _build # ocamldep output From cdc7aa65ed09934ed41fba5c4fa9042faf95cc13 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 11:47:00 -0400 Subject: [PATCH 227/401] change build to keep libdash in working directory. gross, but avoids absolute paths (which are getting put in by -force-load). --- ocaml/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index 898ac6a..d87ab9a 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -13,11 +13,15 @@ test : test.native $(wildcard tests/*) test.native : dash.cmx ast.cmx main.cmx - ocamlopt.opt -cclib -force_load $(DASH)/src/libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ + ocamlopt.opt -cclib -force_load libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ -dash.cmxa : dash.cmx ast.cmx $(DASH)/src/libdash.a - ocamlopt.opt -cclib -force_load $(DASH)/src/libdash.a $(OCAMLINCLUDES) $^ -a -o $@ +dash.cmxa : dash.cmx ast.cmx + $(MAKE) libdash.a + ocamlopt.opt -cclib -force_load libdash.a $(OCAMLINCLUDES) $^ -a -o $@ + +libdash.a : $(DASH)/src/libdash.a + cp $< $@ %.cmx : %.ml ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ From eb41a89fc297fc1877e394b0247f95634c403276 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 26 Sep 2018 12:23:25 -0400 Subject: [PATCH 228/401] fix build deps and clean target --- ocaml/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index d87ab9a..65c1e58 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -13,9 +13,9 @@ test : test.native $(wildcard tests/*) test.native : dash.cmx ast.cmx main.cmx + $(MAKE) dash.cmxa ocamlopt.opt -cclib -force_load libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ - dash.cmxa : dash.cmx ast.cmx $(MAKE) libdash.a ocamlopt.opt -cclib -force_load libdash.a $(OCAMLINCLUDES) $^ -a -o $@ @@ -30,7 +30,7 @@ libdash.a : $(DASH)/src/libdash.a ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ clean : - rm -f *.o test *~ *.cmi *.cmx test.native dash.a dash.cmxa + rm -f *.o test *~ *.cmi *.cmx test.native libdash.a dash.cmxa rm -rf _build # ocamldep output From 475e2482076864fcffcb146959087b636da7f6a4 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 27 Sep 2018 12:58:15 -0400 Subject: [PATCH 229/401] working on getting shared libraries built --- configure.ac | 5 +++-- ocaml/Makefile | 6 ++++-- src/Makefile.am | 4 ++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index bf6afe7..16c846d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,6 @@ AC_INIT([dash],[0.5.13.3]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) - AC_CONFIG_HEADERS(config.h) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) @@ -14,8 +13,10 @@ AC_USE_SYSTEM_EXTENSIONS dnl AC_GNU_SOURCE dnl AC_PROG_YACC dnl MMG 2018-09-26 support building the library -AC_PROG_RANLIB AM_PROG_AR +AC_PROG_RANLIB +dnl AC_PROG_LIBTOOL + AC_MSG_CHECKING([for build system compiler]) if test "$cross_compiling" = yes; then diff --git a/ocaml/Makefile b/ocaml/Makefile index 65c1e58..bf599f4 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -1,5 +1,7 @@ DASH=.. +FORCE_LOAD=$(if $(shell gcc -v 2>&1 | grep gcc),--force-link,-force_load) + OCAMLLIB=$(shell opam config var lib) OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes OCAMLLIBS=unix.cmxa bigarray.cmxa str.cmxa ctypes.cmxa ctypes-foreign-base.cmxa ctypes-foreign-unthreaded.cmxa @@ -14,11 +16,11 @@ test : test.native $(wildcard tests/*) test.native : dash.cmx ast.cmx main.cmx $(MAKE) dash.cmxa - ocamlopt.opt -cclib -force_load libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ + ocamlopt.opt -cclib $(FORCE_LOAD) libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ dash.cmxa : dash.cmx ast.cmx $(MAKE) libdash.a - ocamlopt.opt -cclib -force_load libdash.a $(OCAMLINCLUDES) $^ -a -o $@ + ocamlopt.opt -cclib $(FORCE_LOAD) libdash.a $(OCAMLINCLUDES) $^ -a -o $@ libdash.a : $(DASH)/src/libdash.a cp $< $@ diff --git a/src/Makefile.am b/src/Makefile.am index 6e98086..ad870e7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -38,6 +38,10 @@ libdash_a_SOURCES = $(dash_SOURCES) libdash_a_LIBADD = builtins.o init.o nodes.o signames.o syntax.o +#lib_LTLIBRARIES = libdash.la +#libdash_la_SOURCES = $(dash_SOURCES) +#libdash_la_CFLAGS = $(AM_CFLAGS) + HELPERS = mkinit mksyntax mknodes mksignames BUILT_SOURCES = builtins.h nodes.h syntax.h token.h token_vars.h From 9b54b001df53ccb1542ce4425575f3efa832ff7c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 27 Sep 2018 14:27:55 -0400 Subject: [PATCH 230/401] correctly building in both docker and my computer --- .gitignore | 4 ++++ Makefile.am | 2 ++ configure.ac | 8 ++++++-- ocaml/Makefile | 8 +++++--- ocaml/round_trip.sh | 7 +++++++ src/.gitignore | 2 ++ src/Makefile.am | 21 +++++++++++++-------- 7 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 4abc1f9..670b1f9 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,7 @@ test.err ar-lib config.* src/libdash.a +*.lo +m4 +libtool +ltmain.sh diff --git a/Makefile.am b/Makefile.am index 4910c12..73345fe 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,3 +1,5 @@ +ACLOCAL_AMFLAGS=-I m4 + SUBDIRS = src lib : libdash.a libdash.so.1.0.1 diff --git a/configure.ac b/configure.ac index 16c846d..01ac722 100644 --- a/configure.ac +++ b/configure.ac @@ -2,6 +2,7 @@ AC_INIT([dash],[0.5.13.3]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_SRCDIR([src/main.c]) AC_CONFIG_HEADERS(config.h) +AC_CONFIG_MACRO_DIRS([m4]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) @@ -14,8 +15,6 @@ dnl AC_GNU_SOURCE dnl AC_PROG_YACC dnl MMG 2018-09-26 support building the library AM_PROG_AR -AC_PROG_RANLIB -dnl AC_PROG_LIBTOOL AC_MSG_CHECKING([for build system compiler]) @@ -236,5 +235,10 @@ AC_ARG_ENABLE(lineno, AS_HELP_STRING(--disable-lineno, \ if test "$enable_lineno" != "no"; then AC_DEFINE([WITH_LINENO], 1, [Define if you build with -DWITH_LINENO]) fi + +dnl MMG 2018-09-27 support building the shared library +AC_PROG_LIBTOOL +LT_INIT + AC_CONFIG_FILES([Makefile src/Makefile]) AC_OUTPUT diff --git a/ocaml/Makefile b/ocaml/Makefile index bf599f4..f8d2537 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -1,6 +1,8 @@ DASH=.. -FORCE_LOAD=$(if $(shell gcc -v 2>&1 | grep gcc),--force-link,-force_load) +DYNLINK=-ccopt '-Wl,-L../src/.libs/ -ldash' +STALINK=-cclib -force_load libdash.a +LINK=$(if $(shell gcc -v 2>&1 | grep gcc),$(DYNLINK),$(STALINK)) OCAMLLIB=$(shell opam config var lib) OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes @@ -16,11 +18,11 @@ test : test.native $(wildcard tests/*) test.native : dash.cmx ast.cmx main.cmx $(MAKE) dash.cmxa - ocamlopt.opt -cclib $(FORCE_LOAD) libdash.a $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ + ocamlopt.opt $(LINK) $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ dash.cmxa : dash.cmx ast.cmx $(MAKE) libdash.a - ocamlopt.opt -cclib $(FORCE_LOAD) libdash.a $(OCAMLINCLUDES) $^ -a -o $@ + ocamlopt.opt $(LINK) $(OCAMLINCLUDES) $^ -a -o $@ libdash.a : $(DASH)/src/libdash.a cp $< $@ diff --git a/ocaml/round_trip.sh b/ocaml/round_trip.sh index fa85b3b..57a7e86 100755 --- a/ocaml/round_trip.sh +++ b/ocaml/round_trip.sh @@ -9,7 +9,14 @@ p=$1 tgt=$2 orig=$(${p} ${tgt}) +if [ "$?" -ne 0 ]; +then echo ${tgt} FAILED, couldn\'t run; exit 2 +fi + rt=$(${p} ${tgt} | ${p}) +if [ "$?" -ne 0 ]; +then echo ${tgt} FAILED round trip, couldn\'t run; exit 3 +fi if [ "${orig}" = "${rt}" ]; then echo ${tgt} OK; exit 0 diff --git a/src/.gitignore b/src/.gitignore index 644eccb..38733ad 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -11,3 +11,5 @@ nodes.[ch] signames.c syntax.[ch] token.h +.libs +libdash.la diff --git a/src/Makefile.am b/src/Makefile.am index ad870e7..124c2ee 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -18,7 +18,17 @@ bin_PROGRAMS = dash dash_CFLAGS = -DMAIN -noinst_LIBRARIES = libdash.a +lib_LIBRARIES = libdash.a +lib_LTLIBRARIES = libdash.la + +libdash_la_SOURCES = \ + alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ + histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ + mystring.c options.c parser.c redir.c show.c trap.c output.c \ + bltin/printf.c system.c bltin/test.c bltin/times.c var.c \ + builtins.c init.c nodes.c signames.c syntax.c +libdash_la_CFLAGS = $(AM_CFLAGS) +libdash_la_LDFLAGS = -module -shared dash_CFILES = \ alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ @@ -34,13 +44,8 @@ dash_SOURCES = \ show.h system.h trap.h var.h dash_LDADD = builtins.o init.o nodes.o signames.o syntax.o -libdash_a_SOURCES = $(dash_SOURCES) - -libdash_a_LIBADD = builtins.o init.o nodes.o signames.o syntax.o - -#lib_LTLIBRARIES = libdash.la -#libdash_la_SOURCES = $(dash_SOURCES) -#libdash_la_CFLAGS = $(AM_CFLAGS) +libdash_a_SOURCES = $(dash_CFILES) +libdash_a_LIBADD = $(dash_LDADD) HELPERS = mkinit mksyntax mknodes mksignames From 0a103aa16d106f25791ca167d194af1a2acdda35 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 28 Sep 2018 09:15:19 -0400 Subject: [PATCH 231/401] break out tests, building properly as a library and installing correctly --- Makefile.am | 7 ---- autogen.sh | 3 +- ocaml/META | 4 ++ ocaml/Makefile | 46 ++++------------------- {ocaml => test}/round_trip.sh | 0 ocaml/main.ml => test/test.ml | 0 {ocaml => test}/tests/braces_amp.sh | 0 {ocaml => test}/tests/diverge.sh | 0 {ocaml => test}/tests/escaping | 0 {ocaml => test}/tests/for_spaces.sh | 0 {ocaml => test}/tests/grab_submissions.sh | 0 {ocaml => test}/tests/grade.sh | 0 {ocaml => test}/tests/run_grader.sh | 0 {ocaml => test}/tests/run_lda.sh | 0 {ocaml => test}/tests/send_emails.sh | 0 {ocaml => test}/tests/syntax | 0 {ocaml => test}/tests/test.sh | 0 {ocaml => test}/tests/timeout3 | 0 18 files changed, 13 insertions(+), 47 deletions(-) create mode 100644 ocaml/META rename {ocaml => test}/round_trip.sh (100%) rename ocaml/main.ml => test/test.ml (100%) rename {ocaml => test}/tests/braces_amp.sh (100%) rename {ocaml => test}/tests/diverge.sh (100%) rename {ocaml => test}/tests/escaping (100%) rename {ocaml => test}/tests/for_spaces.sh (100%) rename {ocaml => test}/tests/grab_submissions.sh (100%) rename {ocaml => test}/tests/grade.sh (100%) rename {ocaml => test}/tests/run_grader.sh (100%) rename {ocaml => test}/tests/run_lda.sh (100%) rename {ocaml => test}/tests/send_emails.sh (100%) rename {ocaml => test}/tests/syntax (100%) rename {ocaml => test}/tests/test.sh (100%) rename {ocaml => test}/tests/timeout3 (100%) diff --git a/Makefile.am b/Makefile.am index 73345fe..cb1807d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2,10 +2,3 @@ ACLOCAL_AMFLAGS=-I m4 SUBDIRS = src -lib : libdash.a libdash.so.1.0.1 - -libdash.a : - ar rcs $@ src/*.o - -libdash.so.1.0.1 : - gcc -shared -o $@ src/*.o diff --git a/autogen.sh b/autogen.sh index 9879c53..bbc5667 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,6 +1,7 @@ #!/bin/sh -aclocal \ +libtoolize \ +&& aclocal \ && autoheader \ && automake --add-missing \ && autoconf diff --git a/ocaml/META b/ocaml/META new file mode 100644 index 0000000..f8fd69e --- /dev/null +++ b/ocaml/META @@ -0,0 +1,4 @@ +description = "bindings to the dash shell as a library" +requires = "ctypes,ctypes.foreign" +version = "0.1" +archive(native) = "dash.cmxa" diff --git a/ocaml/Makefile b/ocaml/Makefile index f8d2537..aa4a246 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -1,44 +1,12 @@ -DASH=.. +.PHONY : all install clean -DYNLINK=-ccopt '-Wl,-L../src/.libs/ -ldash' -STALINK=-cclib -force_load libdash.a -LINK=$(if $(shell gcc -v 2>&1 | grep gcc),$(DYNLINK),$(STALINK)) +all : dash.cmxa -OCAMLLIB=$(shell opam config var lib) -OCAMLINCLUDES=-I $(OCAMLLIB)/bytes -I $(OCAMLLIB)/ctypes -OCAMLLIBS=unix.cmxa bigarray.cmxa str.cmxa ctypes.cmxa ctypes-foreign-base.cmxa ctypes-foreign-unthreaded.cmxa +install : all + ocamlfind install dash META dash.cmxa dash.a dash.mli dash.cmi dash.cmx ast.mli ast.cmi ast.cmx -all : test.native dash.cmxa - -test : test.native $(wildcard tests/*) - @for f in tests/*; do \ - ./round_trip.sh ./test.native $$f 2>test.err; \ - done - - -test.native : dash.cmx ast.cmx main.cmx - $(MAKE) dash.cmxa - ocamlopt.opt $(LINK) $(OCAMLINCLUDES) $(OCAMLLIBS) $^ -o $@ - -dash.cmxa : dash.cmx ast.cmx - $(MAKE) libdash.a - ocamlopt.opt $(LINK) $(OCAMLINCLUDES) $^ -a -o $@ - -libdash.a : $(DASH)/src/libdash.a - cp $< $@ - -%.cmx : %.ml - ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ - -%.cmi : %.mli - ocamlopt.opt $(OCAMLINCLUDES) -c $< -o $@ +dash.cmxa : dash.mli dash.ml ast.mli ast.ml + ocamlfind ocamlmklib -package ctypes,ctypes.foreign -linkpkg -cclib -ldash $^ -o dash clean : - rm -f *.o test *~ *.cmi *.cmx test.native libdash.a dash.cmxa - rm -rf _build - -# ocamldep output -ast.cmx : dash.cmx ast.cmi -ast.cmi : dash.cmi -dash.cmx : dash.cmi -main.cmx : dash.cmx ast.cmx + rm -f *.o *.cmi *.cmx dash.a dash.cmxa diff --git a/ocaml/round_trip.sh b/test/round_trip.sh similarity index 100% rename from ocaml/round_trip.sh rename to test/round_trip.sh diff --git a/ocaml/main.ml b/test/test.ml similarity index 100% rename from ocaml/main.ml rename to test/test.ml diff --git a/ocaml/tests/braces_amp.sh b/test/tests/braces_amp.sh similarity index 100% rename from ocaml/tests/braces_amp.sh rename to test/tests/braces_amp.sh diff --git a/ocaml/tests/diverge.sh b/test/tests/diverge.sh similarity index 100% rename from ocaml/tests/diverge.sh rename to test/tests/diverge.sh diff --git a/ocaml/tests/escaping b/test/tests/escaping similarity index 100% rename from ocaml/tests/escaping rename to test/tests/escaping diff --git a/ocaml/tests/for_spaces.sh b/test/tests/for_spaces.sh similarity index 100% rename from ocaml/tests/for_spaces.sh rename to test/tests/for_spaces.sh diff --git a/ocaml/tests/grab_submissions.sh b/test/tests/grab_submissions.sh similarity index 100% rename from ocaml/tests/grab_submissions.sh rename to test/tests/grab_submissions.sh diff --git a/ocaml/tests/grade.sh b/test/tests/grade.sh similarity index 100% rename from ocaml/tests/grade.sh rename to test/tests/grade.sh diff --git a/ocaml/tests/run_grader.sh b/test/tests/run_grader.sh similarity index 100% rename from ocaml/tests/run_grader.sh rename to test/tests/run_grader.sh diff --git a/ocaml/tests/run_lda.sh b/test/tests/run_lda.sh similarity index 100% rename from ocaml/tests/run_lda.sh rename to test/tests/run_lda.sh diff --git a/ocaml/tests/send_emails.sh b/test/tests/send_emails.sh similarity index 100% rename from ocaml/tests/send_emails.sh rename to test/tests/send_emails.sh diff --git a/ocaml/tests/syntax b/test/tests/syntax similarity index 100% rename from ocaml/tests/syntax rename to test/tests/syntax diff --git a/ocaml/tests/test.sh b/test/tests/test.sh similarity index 100% rename from ocaml/tests/test.sh rename to test/tests/test.sh diff --git a/ocaml/tests/timeout3 b/test/tests/timeout3 similarity index 100% rename from ocaml/tests/timeout3 rename to test/tests/timeout3 From 00ea42cfbc0bdd71c1f25fbd3ef74463cf949150 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 28 Sep 2018 16:42:28 -0400 Subject: [PATCH 232/401] fixup gitignores --- ocaml/.gitignore | 9 +++++++++ test/.gitignore | 9 +++++++++ 2 files changed, 18 insertions(+) create mode 100644 ocaml/.gitignore create mode 100644 test/.gitignore diff --git a/ocaml/.gitignore b/ocaml/.gitignore new file mode 100644 index 0000000..a13843d --- /dev/null +++ b/ocaml/.gitignore @@ -0,0 +1,9 @@ +*.a +*.cmxa +test +*.native +*~ +*.o +*.cmx +*.cmi + diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..8570da4 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,9 @@ +*.a +*.cmxa +test +*.native +*~ +*.o +*.cmx +*.cmi +test.err From 36e914f19247bf25b3e0ae435f99fe5814912269 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 28 Sep 2018 16:42:40 -0400 Subject: [PATCH 233/401] better builds on OS X: same script works on docker --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index 124c2ee..0e18ec5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -28,7 +28,7 @@ libdash_la_SOURCES = \ bltin/printf.c system.c bltin/test.c bltin/times.c var.c \ builtins.c init.c nodes.c signames.c syntax.c libdash_la_CFLAGS = $(AM_CFLAGS) -libdash_la_LDFLAGS = -module -shared +libdash_la_LDFLAGS = -shared -dynamic dash_CFILES = \ alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ From f34f478b0f6d7a5f53fa06de98bd19c57657e7df Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 5 Oct 2018 17:51:12 -0400 Subject: [PATCH 234/401] cleanup escaping --- ocaml/.gitignore | 3 ++- ocaml/Makefile | 1 + ocaml/dash.ml | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ocaml/.gitignore b/ocaml/.gitignore index a13843d..5e14717 100644 --- a/ocaml/.gitignore +++ b/ocaml/.gitignore @@ -6,4 +6,5 @@ test *.o *.cmx *.cmi - +*.cmo +*.cma diff --git a/ocaml/Makefile b/ocaml/Makefile index aa4a246..c1e23e4 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -3,6 +3,7 @@ all : dash.cmxa install : all + if ocamlfind query dash; then ocamlfind remove dash; fi ocamlfind install dash META dash.cmxa dash.a dash.mli dash.cmi dash.cmx ast.mli ast.cmi ast.cmx dash.cmxa : dash.mli dash.ml ast.mli ast.ml diff --git a/ocaml/dash.ml b/ocaml/dash.ml index 13027b2..8c606df 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -491,7 +491,10 @@ and show_arg (s : char list) (bqlist : nodelist structure ptr) stack = let c' = match c with | '\'' -> "\\'" | '\"' -> "\\\"" - | _ -> Char.escaped c + | '\\' -> "\\" + | '$' -> "$" + | '`' -> "`" + | _ -> "\\" ^ String.make 1 c in c' ^ str,s'',bqlist',stack' (* CTLVAR *) From 015a1767d3eeecdeec42d73c955573907b11aa2a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 5 Oct 2018 18:03:58 -0400 Subject: [PATCH 235/401] better fix --- ocaml/dash.ml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index 8c606df..b1843be 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -491,10 +491,7 @@ and show_arg (s : char list) (bqlist : nodelist structure ptr) stack = let c' = match c with | '\'' -> "\\'" | '\"' -> "\\\"" - | '\\' -> "\\" - | '$' -> "$" - | '`' -> "`" - | _ -> "\\" ^ String.make 1 c + | _ -> String.make 1 c in c' ^ str,s'',bqlist',stack' (* CTLVAR *) From e3fe597be95b9710ad2b87aac1dd72a94691280e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 9 Oct 2018 09:02:05 -0400 Subject: [PATCH 236/401] add missing makefile --- test/Makefile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 test/Makefile diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..3204153 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,14 @@ +.PHONY : all test clean + +all : test.native + +test : test.native $(wildcard tests/*) + @for f in tests/*; do \ + ./round_trip.sh ./test.native $$f 2>test.err; \ + done + +test.native : test.ml + ocamlfind ocamlopt -package dash -linkpkg test.ml -o test.native + +clean : + rm -f *.o *.cmi *.cmx test.native test.err From d08fcf96edd99a72200b600af2c44cfc02881013 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 23 Oct 2018 15:39:32 -0400 Subject: [PATCH 237/401] properly handle null commands---which can show up in an empty case, not just an empty if --- ocaml/ast.ml | 8 ++++---- test/tests/empty_case | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 test/tests/empty_case diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 814b0ba..c3e7cfe 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -90,6 +90,9 @@ open Dash let skip = Command (-1,[],[],[]) let rec of_node (n : node union ptr) : t = + if nullptr n + then skip + else match (n @-> node_type) with (* NCMD *) | 0 -> @@ -118,12 +121,9 @@ let rec of_node (n : node union ptr) : t = (* NIF *) | 8 -> let n = n @-> node_nif in - let else_part = getf n nif_elsepart in If (of_node (getf n nif_test), of_node (getf n nif_ifpart), - if nullptr else_part - then skip - else of_node else_part) + of_node (getf n nif_elsepart)) (* NWHILE *) | 9 -> let (t,b) = of_binary n in While (t,b) (* NUNTIL *) diff --git a/test/tests/empty_case b/test/tests/empty_case new file mode 100644 index 0000000..7d18b23 --- /dev/null +++ b/test/tests/empty_case @@ -0,0 +1,6 @@ +case foo in + bar) + ;; + *) + echo hi;; +esac \ No newline at end of file From c08bc75b1f5b7296e996ee426916534c19a34f8f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 15 Nov 2018 10:01:17 -0500 Subject: [PATCH 238/401] setting up CI --- .dockerignore | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ .travis.yml | 14 +++++++++ Dockerfile | 36 ++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 .dockerignore create mode 100644 .travis.yml create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1675b35 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,82 @@ +# compiled code anywhere +**/*.native +**/*.o +**/*.lo +**/*.cmx +**/*.cmo +**/*.cmi +**/*.a +**/*.cmxa +**/*.dSYM/ +**/_build + +# system crap +Dockerfile +Dockerfile.test +Dockerfile.web +.git +.gitmodules +**/.gitignore +**/*~ +**/.#* +**/.DS_Store +**/.DS_Store? +**/._* +**/.Spotlight* +**/.Trash* +**/*[Tt]humbs.db +**/*~ +**/*.bak +**/*.orig +**/*.rej + +################################################################################ +# libdash ignores + +# geneated by libtool +ltmain.sh + +# generated by autogen.sh +Makefile.in +aclocal.m4 +autom4te.cache/ +compile +config.h.in +configure +depcomp +install-sh +missing + +# generated by configure +Makefile +config.cache +config.h +config.log +config.status +src/.deps/ +stamp-h1 + +# generated by make +src/token_vars.h + +# generated files +ar-lib +config.* +src/libdash.a + +src/builtins.[ch] +src/builtins.def +src/dash +src/init.c +src/mkinit +src/mknodes +src/mksignames +src/mksyntax +src/nodes.[ch] +src/signames.c +src/syntax.[ch] +src/token.h + +src/.libs +src/.deps + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..0ee90e7 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +sudo: required + +language: generic + +services: + - docker + +before_install: + - docker pull ocaml/opam2:debian-9 + - docker build -t libdash . + +script: + - docker run libdash make -C libdash/test test + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..552c51a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# start with a reasonable image. Debian 9 stretch is what's on the POSIX testing VM +FROM ocaml/opam2:debian-9 + +# silence apt +# TODO this still isn't silencing it :( +ENV DEBIAN_FRONTEND=noninteractive + +# system support for libdash; libgmp for zarith for lem +RUN sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev + +# make sure we have ocamlfind and ocamlbuild +RUN opam install ocamlfind ocamlbuild + +# set up FFI for libdash; num library for lem; extunix for shell syscalls +RUN opam pin add ctypes 0.11.5 +RUN opam install ctypes-foreign +RUN opam install extunix + +WORKDIR /home/opam + +# copy in repo files for libdash to the WORKDIR (should be /home/opam) +# we do this as late as possible so we don't have to redo the slow stuff above +ADD --chown=opam:opam . libdash + +# build libdash, expose shared object +RUN cd libdash; ./autogen.sh && ./configure --prefix=/usr --libdir=/usr/lib/x86_64-linux-gnu +RUN cd libdash; make +RUN cd libdash; sudo make install +# build ocaml bindings +RUN cd libdash/ocaml; opam config exec -- make && opam config exec -- make install + +# system test +RUN cd libdash/test; opam config exec -- make && opam config exec make test + +ENTRYPOINT [ "opam", "config", "exec", "--" ] +CMD [ "bash" ] From 204b27dd3e458decd3caf5d652757569c17fe394 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 15 Nov 2018 10:01:26 -0500 Subject: [PATCH 239/401] more careful stack discipline --- ocaml/dash.ml | 18 ++++++------------ ocaml/dash.mli | 13 ++++++++++--- test/Makefile | 2 +- test/test.ml | 30 ++++++++++++++++++++++-------- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index b1843be..564bfac 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -13,7 +13,11 @@ let () = seal stackmark let init_stack () = let stack = make stackmark in (* ??? do we want to save this *) - foreign "setstackmark" (ptr stackmark @-> returning void) (addr stack) + foreign "setstackmark" (ptr stackmark @-> returning void) (addr stack); + stack + +let pop_stack stack = + foreign "popstackmark" (ptr stackmark @-> returning void) (addr stack) (* on OS X x86_64 *) let jmp_buf_t : 'a Ctypes_static.carray typ = array 18 int @@ -43,8 +47,7 @@ let with_handler (k : int -> 'a) : 'a = let dash_init : unit -> unit = foreign "init" (void @-> returning void) let initialize () = - dash_init (); - init_stack () + dash_init () let popfile : unit -> unit = foreign "popfile" (void @-> returning void) @@ -255,8 +258,6 @@ let nullptr (p : 'a ptr) = addrof p = Nativeint.zero type parse_result = Done | Error | Null | Parsed of (node union ptr) -exception Parse_error - let parse_next ?interactive:(i=false) () = let n = parsecmd_safe (if i then 1 else 0) in if eqptr n neof @@ -266,13 +267,6 @@ let parse_next ?interactive:(i=false) () = else if nullptr n then Null (* comment or blank line or error ... *) else Parsed n - -let rec parse_all ?interactive:(i=false) () : (node union ptr) list = - match parse_next ~interactive:i () with - | Done -> [] - | Error -> raise Parse_error - | Null -> parse_all ~interactive:i () - | Parsed n -> n::parse_all ~interactive:i () let (@->) (s : ('b, 'c) structured ptr) (f : ('a, ('b, 'c) structured) field) = getf (!@ s) f diff --git a/ocaml/dash.mli b/ocaml/dash.mli index 9dbc816..e29eec3 100644 --- a/ocaml/dash.mli +++ b/ocaml/dash.mli @@ -5,6 +5,16 @@ val initialize : unit -> unit +(* stackmark discipline: + + (init_stack parse_next [process AST] pop_stack[deallocates dash AST])* + + see libdash/test/test.ml for an example usage in parse_all +*) +type stackmark +val init_stack : unit -> stackmark Ctypes.structure +val pop_stack : stackmark Ctypes.structure -> unit + val popfile : unit -> unit val setinputstring : string -> unit val setinputtostdin : unit -> unit @@ -211,10 +221,7 @@ type parse_result = | Error | Null | Parsed of node Ctypes.union Ctypes.ptr -exception Parse_error val parse_next : ?interactive:bool -> unit -> parse_result -val parse_all : - ?interactive:bool -> unit -> node Ctypes.union Ctypes.ptr list (* native pretty printer *) val show : node Ctypes.union Ctypes.ptr -> string diff --git a/test/Makefile b/test/Makefile index 3204153..eef7279 100644 --- a/test/Makefile +++ b/test/Makefile @@ -8,7 +8,7 @@ test : test.native $(wildcard tests/*) done test.native : test.ml - ocamlfind ocamlopt -package dash -linkpkg test.ml -o test.native + ocamlfind ocamlopt -package dash,ctypes,ctypes.foreign -linkpkg test.ml -o test.native clean : rm -f *.o *.cmi *.cmx test.native test.err diff --git a/test/test.ml b/test/test.ml index ae67f7b..518a72e 100644 --- a/test/test.ml +++ b/test/test.ml @@ -1,12 +1,10 @@ -open Dash - let verbose = ref false let input_src : string option ref = ref None let set_input_src () = match !input_src with - | None -> setinputtostdin () - | Some f -> setinputfile f + | None -> Dash.setinputtostdin () + | Some f -> Dash.setinputfile f let parse_args () = Arg.parse @@ -14,12 +12,28 @@ let parse_args () = (function | "-" -> input_src := None | f -> input_src := Some f) "Final argument should be either a filename or - (for STDIN); only the last such argument is used" +exception Parse_error + +let rec parse_all () : Ast.t list = + let stackmark = Dash.init_stack () in + match Dash.parse_next ~interactive:false () with + | Dash.Done -> Dash.pop_stack stackmark; [] + | Dash.Error -> Dash.pop_stack stackmark; raise Parse_error + | Dash.Null -> Dash.pop_stack stackmark; parse_all () + | Dash.Parsed n -> + (* translate to our AST *) + let c = Ast.of_node n in + (* deallocate *) + Dash.pop_stack stackmark; + (* keep calm and carry on *) + c::parse_all () + let main () = - initialize (); + Dash.initialize (); parse_args (); set_input_src (); - let ns = parse_all () in - let cs = List.map Ast.of_node ns in - List.map (fun c -> print_endline (Ast.to_string c)) cs;; + let cs = parse_all () in + List.map (fun c -> print_endline (Ast.to_string c)) cs +;; main () From 28cc465dab6c617d47d65b2e0e0488305856bb60 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 15 Nov 2018 10:04:01 -0500 Subject: [PATCH 240/401] add Travis badge [ci skip] --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index ea8cc30..c3b8a9b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Build Status](https://travis-ci.com/mgree/libdash.svg?branch=master)](https://travis-ci.com/mgree/libdash) + *libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. # What are the dependencies? From 69e5fd649d82885f3209d65743b67ff5608bcae8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 15 Nov 2018 11:14:20 -0500 Subject: [PATCH 241/401] update instructions [ci skip] --- README.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index c3b8a9b..92da6b2 100644 --- a/README.md +++ b/README.md @@ -8,29 +8,33 @@ The C code for dash should build on a wide variety of systems. The library may n The OCaml code relies on `ctypes-0.11.5` and `ctypes-foreign`; everything else should be in `base`. -# How to build it +# How to build and test it -In the root directory run: +You should be able to simply run `docker build -t libdash .` to get a runnable environment. Everything will be in `/home/opam/libdash`. + +## How to build it locally + +Broadly: crib from the `Dockerfile`. More concretely, in the root directory run: ``` -./autogen.sh && ./configure && make +./autogen.sh && ./configure && make && sudo make install ``` -This should construct an executable `src/dash` and a static library `src/libdash.a`. +This should construct an executable `src/dash` and a static library `src/libdash.a`. They will need to be installed globally for things to work well. Then run: ``` -cd ocaml; make +cd ocaml; make && make install ``` -This will build the OCaml library `ocaml/dash.mxa` along with a tester, `ocaml/test.native`. You can then run (still in the `ocaml` directory): +This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. ``` -make test +cd test; make test ``` -Which will use `ocaml/round_trip.sh` to ensure that every tester file in `ocaml/tests` round-trips correctly through parsing and pretty printing. +The tests use `ocaml/round_trip.sh` to ensure that every tester file in `ocaml/tests` round-trips correctly through parsing and pretty printing. # How to use the parser From bf62307d438b4947fba0d006058383c285f1e6d6 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 16 Nov 2018 15:12:35 -0500 Subject: [PATCH 242/401] trim up unnecessary/broken setjmp. prevent the ocaml GC from seeing the stack strings --- ocaml/dash.ml | 51 +++++++++++++++++++++----------------------------- ocaml/dash.mli | 5 ++++- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index 564bfac..e9ebb30 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -12,48 +12,31 @@ let size = field stackmark "stacknleft" PosixTypes.size_t let () = seal stackmark let init_stack () = - let stack = make stackmark in (* ??? do we want to save this *) + let stack = make stackmark in foreign "setstackmark" (ptr stackmark @-> returning void) (addr stack); stack let pop_stack stack = foreign "popstackmark" (ptr stackmark @-> returning void) (addr stack) -(* on OS X x86_64 *) -let jmp_buf_t : 'a Ctypes_static.carray typ = array 18 int - -type jmploc -let jmploc : jmploc structure typ = structure "jmploc" -let jmp_buf = field jmploc "jmp_buf" jmp_buf_t -let () = seal jmploc - -let setjmp : int ptr -> int = foreign "setjmp" (ptr int @-> returning int) - -let with_handler (k : int -> 'a) : 'a = - let jmptgt = make jmploc in - let r = setjmp (CArray.start (getf jmptgt jmp_buf)) in - if r = 0 - then (* normal return *) - let handler = foreign_value "handler" (ptr jmploc) in - handler <-@ addr jmptgt; - k 0 - else (* coming from a longjmp *) - (* TODO we're never actually landing here, for some reason... *) - begin - fprintf stderr "dash raised exception %d\n" r; - k r - end - +let alloc_stack_string = + foreign "sstrdup" (string @-> returning (ptr char)) + +let free_stack_string s = + foreign "stunalloc" (ptr char @-> returning void) s + let dash_init : unit -> unit = foreign "init" (void @-> returning void) +let root_stackmark = ref None let initialize () = - dash_init () + dash_init (); + root_stackmark := Some (init_stack ()) let popfile : unit -> unit = foreign "popfile" (void @-> returning void) -let setinputstring : string -> unit = - foreign "setinputstring" (string @-> returning void) +let setinputstring : char ptr -> unit = + foreign "setinputstring" (ptr char @-> returning void) let setinputtostdin () : unit = foreign "setinputfd" (int @-> int @-> returning void) 0 0 (* don't both pushing the file *) @@ -263,7 +246,15 @@ let parse_next ?interactive:(i=false) () = if eqptr n neof then Done else if eqptr n nerr - then Error + then + begin + begin + match !root_stackmark with + | None -> failwith "!!! missing root stackmark" + | Some smark -> pop_stack smark + end; + Error + end else if nullptr n then Null (* comment or blank line or error ... *) else Parsed n diff --git a/ocaml/dash.mli b/ocaml/dash.mli index e29eec3..547bebc 100644 --- a/ocaml/dash.mli +++ b/ocaml/dash.mli @@ -15,8 +15,11 @@ type stackmark val init_stack : unit -> stackmark Ctypes.structure val pop_stack : stackmark Ctypes.structure -> unit +val alloc_stack_string : string -> (char Ctypes.ptr) +val free_stack_string : (char Ctypes.ptr) -> unit + val popfile : unit -> unit -val setinputstring : string -> unit +val setinputstring : (char Ctypes.ptr) -> unit val setinputtostdin : unit -> unit val setinputfile : ?push:bool -> string -> unit From fa64fab9fbf37c362f32d7116203672d5625e23b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Dec 2018 15:06:41 -0500 Subject: [PATCH 243/401] add test with comments; not actually a problem, but might as well keep it --- test/tests/builtin.trap.exitcode.test | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 test/tests/builtin.trap.exitcode.test diff --git a/test/tests/builtin.trap.exitcode.test b/test/tests/builtin.trap.exitcode.test new file mode 100644 index 0000000..4b832a6 --- /dev/null +++ b/test/tests/builtin.trap.exitcode.test @@ -0,0 +1,4 @@ +# https://www.spinics.net/lists/dash/msg01770.html + +trap 'set -o bad@option' INT +kill -s INT $$ From ab91d5989a1a7593e8a4d215ee0f15a85d251cdd Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Dec 2018 15:38:33 -0500 Subject: [PATCH 244/401] marked in alias --- src/alias.c | 2 ++ src/alias.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/alias.c b/src/alias.c index 9da2da6..e4fbf88 100644 --- a/src/alias.c +++ b/src/alias.c @@ -51,6 +51,7 @@ struct alias *atab[ATABSIZE]; STATIC struct alias *freealias(struct alias *); STATIC struct alias **__lookupalias(const char *); +// libdash void setalias(const char *name, const char *val) { @@ -185,6 +186,7 @@ unaliascmd(int argc, char **argv) return (i); } +// libdash struct alias * freealias(struct alias *ap) { struct alias *next; diff --git a/src/alias.h b/src/alias.h index d4ec646..3aec80d 100644 --- a/src/alias.h +++ b/src/alias.h @@ -45,7 +45,7 @@ struct alias { }; struct alias *lookupalias(const char *, int); -void setalias(const char *, const char *); +void setalias(const char *, const char *); // libdash int aliascmd(int, char **); int unaliascmd(int, char **); void rmaliases(void); From 7569873013dec186f45f9eb0c11e47a04602f528 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Dec 2018 15:39:22 -0500 Subject: [PATCH 245/401] more in alias --- src/alias.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/alias.c b/src/alias.c index e4fbf88..1b5863b 100644 --- a/src/alias.c +++ b/src/alias.c @@ -48,10 +48,11 @@ struct alias *atab[ATABSIZE]; +// STATIC void setalias(const char *, const char *); // libdash STATIC struct alias *freealias(struct alias *); STATIC struct alias **__lookupalias(const char *); -// libdash +// STATIC // libdash void setalias(const char *name, const char *val) { @@ -186,7 +187,7 @@ unaliascmd(int argc, char **argv) return (i); } -// libdash +// STATIC // libdash struct alias * freealias(struct alias *ap) { struct alias *next; From 3d4f912f3e61f4c028fd6c6b317a8d5304cdea29 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Dec 2018 15:45:08 -0500 Subject: [PATCH 246/401] more marking --- src/input.c | 2 ++ src/input.h | 2 +- src/main.c | 8 ++++---- src/parser.c | 5 +++-- src/parser.h | 4 ++-- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/input.c b/src/input.c index 3a9758b..efec027 100644 --- a/src/input.c +++ b/src/input.c @@ -82,6 +82,7 @@ int stdin_istty = -1; STATIC void pushfile(void); static void popstring(void); static int preadfd(void); +// static void setinputfd(int fd, int push); // libdash static int preadbuffer(void); #ifdef mkinit @@ -570,6 +571,7 @@ setinputfile(const char *fname, int flags) * interrupts off. */ +// static void // libdash void setinputfd(int fd, int push) { diff --git a/src/input.h b/src/input.h index 3087a36..18fe2b4 100644 --- a/src/input.h +++ b/src/input.h @@ -112,7 +112,7 @@ void pungetc(void); void pungetn(int); void pushstring(char *, void *); int setinputfile(const char *, int); -void setinputfd(int fd, int push); +void setinputfd(int fd, int push); // libdash void setinputstring(char *); void pushstdin(void); void popfile(void); diff --git a/src/main.c b/src/main.c index dfbe719..8127d91 100644 --- a/src/main.c +++ b/src/main.c @@ -79,9 +79,9 @@ STATIC void read_profile(const char *); STATIC char *find_dot_file(char *); static int cmdloop(int); -#ifdef MAIN +#ifdef MAIN // libdash int main(int, char **); -#endif +#endif //MAIN // libdash /* * Main routine. We initialize things, parse the arguments, execute @@ -91,7 +91,7 @@ int main(int, char **); * is used to figure out how far we had gotten. */ -#ifdef MAIN +#ifdef MAIN //libdash int main(int argc, char **argv) { @@ -198,7 +198,7 @@ main(int argc, char **argv) /* NOTREACHED */ } -#endif +#endif // MAIN // libdash /* * Read and execute commands. "Top" is nonzero for the top level command diff --git a/src/parser.c b/src/parser.c index 3065837..1f78809 100644 --- a/src/parser.c +++ b/src/parser.c @@ -55,7 +55,7 @@ #include "var.h" #include "error.h" #include "memalloc.h" -#include "init.h" /* MMG 2018-09-25 for reset() */ +#include "init.h" /* defines reset() */ // libdash #include "mystring.h" #include "alias.h" #include "show.h" @@ -169,7 +169,8 @@ parsecmd(int interact) return list(1); } -/* MMG 2018-09-25 manually install a handler here */ +// libdash +/* 2018-09-25 manually install a handler here */ union node * parsecmd_safe(int interact) { diff --git a/src/parser.h b/src/parser.h index b415aef..dd9d85c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -83,7 +83,7 @@ union node; extern int lasttoken; extern int tokpushback; #define NEOF ((union node *)&tokpushback) -/* MMG 2018-09-25 similar story for an error return value */ +/* 2018-09-25 similar story for an error return value */ // libdash #define NERR ((union node *)&lasttoken) extern int whichprompt; /* 1 == PS1, 2 == PS2 */ extern int checkkwd; @@ -92,7 +92,7 @@ extern int checkkwd; int isassignment(const char *p); int issimplecmd(union node *n, const char *name); union node *parsecmd(int); -union node *parsecmd_safe(int); +union node *parsecmd_safe(int); // libdash void fixredir(union node *, const char *, int); const char *getprompt(void *); const char *const *findkwd(const char *); From 071a5b94187213141746db0a5feff501ac8ec954 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Dec 2018 15:46:53 -0500 Subject: [PATCH 247/401] cleaner diffs --- src/alias.c | 12 +++++++++--- src/input.c | 10 +++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/alias.c b/src/alias.c index 1b5863b..d5fe20c 100644 --- a/src/alias.c +++ b/src/alias.c @@ -48,11 +48,15 @@ struct alias *atab[ATABSIZE]; -// STATIC void setalias(const char *, const char *); // libdash +/* +STATIC void setalias(const char *, const char *); +*/ // libdash STATIC struct alias *freealias(struct alias *); STATIC struct alias **__lookupalias(const char *); -// STATIC // libdash +/* +STATIC +*/ // libdash void setalias(const char *name, const char *val) { @@ -187,7 +191,9 @@ unaliascmd(int argc, char **argv) return (i); } -// STATIC // libdash +/* +STATIC +*/ // libdash struct alias * freealias(struct alias *ap) { struct alias *next; diff --git a/src/input.c b/src/input.c index efec027..13a8e09 100644 --- a/src/input.c +++ b/src/input.c @@ -82,7 +82,9 @@ int stdin_istty = -1; STATIC void pushfile(void); static void popstring(void); static int preadfd(void); -// static void setinputfd(int fd, int push); // libdash +/* +static void setinputfd(int fd, int push); +*/ // libdash static int preadbuffer(void); #ifdef mkinit @@ -571,8 +573,10 @@ setinputfile(const char *fname, int flags) * interrupts off. */ -// static void // libdash -void +/* +static void +*/ +void // libdash setinputfd(int fd, int push) { pushfile(); From fe53ebe4bf0f0dde5f44c01e5f0e9ac34062ee05 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Dec 2018 15:47:48 -0500 Subject: [PATCH 248/401] still cleaner --- src/alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alias.c b/src/alias.c index d5fe20c..bce509c 100644 --- a/src/alias.c +++ b/src/alias.c @@ -192,7 +192,7 @@ unaliascmd(int argc, char **argv) } /* -STATIC +STATIC struct alias * */ // libdash struct alias * freealias(struct alias *ap) { From 1f3b596b7f265c844c1ef241db9d983a7c2e1a61 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 Dec 2018 14:50:24 -0500 Subject: [PATCH 249/401] expose a function for getting fresh fds >= 10 --- ocaml/dash.ml | 11 +++++++++++ ocaml/dash.mli | 2 ++ src/redir.c | 10 +++++++++- src/redir.h | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index e9ebb30..fd0eaf2 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -3,6 +3,8 @@ open Ctypes open Ctypes_types open Foreign +(* First, some dash trivia. *) + type stackmark let stackmark : stackmark structure typ = structure "stackmark" @@ -55,6 +57,15 @@ let setalias (name : string) (mapping : string) : unit = let unalias (name : string) : unit = foreign "unalias" (string @-> returning void) name +(* Next, a utility function that isn't in Unix or ExtUnix. *) + +let freshfd_ge10 (fd : int) : int option = + let newfd = foreign "freshfd_ge10" (int @-> returning int) fd in + if newfd < 0 + then None + else Some newfd + +(* Actual AST stuff begins here. *) (* first, we define the node type... *) type node diff --git a/ocaml/dash.mli b/ocaml/dash.mli index 547bebc..2ffee25 100644 --- a/ocaml/dash.mli +++ b/ocaml/dash.mli @@ -27,6 +27,8 @@ val setvar : string -> string -> unit val setalias : string -> string -> unit val unalias : string -> unit +val freshfd_ge10 : int -> int option + (* Ctypes mappings of the node types *) type node val node : node Ctypes.union Ctypes.typ diff --git a/src/redir.c b/src/redir.c index e61d3da..ce8777c 100644 --- a/src/redir.c +++ b/src/redir.c @@ -453,7 +453,15 @@ FORKRESET { #endif - +/* + * Just a convenience because fcntl isn't well exposed in OCaml. + */ +// libdash +int +freshfd_ge10(int fd) +{ + return fcntl(fd, F_DUPFD, 10); +} /* * Move a file descriptor to > 10. Invokes sh_error on error unless diff --git a/src/redir.h b/src/redir.h index 0be5f1a..e3945e4 100644 --- a/src/redir.h +++ b/src/redir.h @@ -45,10 +45,10 @@ struct redirtab; union node; void redirect(union node *, int); void popredir(int); +int freshfd_ge10(int); // libdash int savefd(int, int); int redirectsafe(union node *, int); void unwindredir(struct redirtab *stop); struct redirtab *pushredir(union node *redir); int sh_open(const char *pathname, int flags, int mayfail); int sh_pipe(int pip[2], int memfd); - From 19f896069a87f8c00e9f76c285887e483c2603be Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 14 Dec 2018 10:57:51 -0500 Subject: [PATCH 250/401] better interface for freshfd_ge10, so i can nab EBADF --- ocaml/dash.ml | 7 ++----- ocaml/dash.mli | 3 ++- src/redir.c | 10 +++++++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index fd0eaf2..4acd027 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -59,11 +59,8 @@ let unalias (name : string) : unit = (* Next, a utility function that isn't in Unix or ExtUnix. *) -let freshfd_ge10 (fd : int) : int option = - let newfd = foreign "freshfd_ge10" (int @-> returning int) fd in - if newfd < 0 - then None - else Some newfd +let freshfd_ge10 (fd : int) : int = + foreign "freshfd_ge10" (int @-> returning int) fd (* Actual AST stuff begins here. *) (* first, we define the node type... *) diff --git a/ocaml/dash.mli b/ocaml/dash.mli index 2ffee25..f18c119 100644 --- a/ocaml/dash.mli +++ b/ocaml/dash.mli @@ -27,7 +27,8 @@ val setvar : string -> string -> unit val setalias : string -> string -> unit val unalias : string -> unit -val freshfd_ge10 : int -> int option +(* returns -1 when fd was closed; -2 on other errors *) +val freshfd_ge10 : int -> int (* Ctypes mappings of the node types *) type node diff --git a/src/redir.c b/src/redir.c index ce8777c..c74ab7a 100644 --- a/src/redir.c +++ b/src/redir.c @@ -460,7 +460,15 @@ FORKRESET { int freshfd_ge10(int fd) { - return fcntl(fd, F_DUPFD, 10); + int newfd; + + newfd = fcntl(fd, F_DUPFD, 10); + + if (newfd < 0) { + return errno == EBADF ? -1 : -2; + } else { + return newfd; + } } /* From 5dc2a0bb6e4b476eeaf05d1ed6fa9426589a4113 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Dec 2018 10:58:31 -0500 Subject: [PATCH 251/401] fix to errno when running on glibc, make fcntl code simpler --- ocaml/dash.ml | 3 +++ src/main.c | 9 +++++++++ src/main.h | 1 + src/redir.c | 18 +++++++++++------- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index 4acd027..a0d6879 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -28,9 +28,12 @@ let free_stack_string s = foreign "stunalloc" (ptr char @-> returning void) s let dash_init : unit -> unit = foreign "init" (void @-> returning void) +let initialize_dash_errno : unit -> unit = + foreign "initialize_dash_errno" (void @-> returning void) let root_stackmark = ref None let initialize () = + initialize_dash_errno (); dash_init (); root_stackmark := Some (init_stack ()) diff --git a/src/main.c b/src/main.c index 8127d91..35a5532 100644 --- a/src/main.c +++ b/src/main.c @@ -79,6 +79,15 @@ STATIC void read_profile(const char *); STATIC char *find_dot_file(char *); static int cmdloop(int); +//libdash +void +initialize_dash_errno() +{ +#ifdef __GLIBC__ + dash_errno = __errno_location(); +#endif +} + #ifdef MAIN // libdash int main(int, char **); #endif //MAIN // libdash diff --git a/src/main.h b/src/main.h index ef242a1..c88ab53 100644 --- a/src/main.h +++ b/src/main.h @@ -50,6 +50,7 @@ extern int *dash_errno; #undef errno #define errno (*dash_errno) #endif +void initialize_dash_errno(); // libdash void readcmdfile(char *); int dotcmd(int, char **); diff --git a/src/redir.c b/src/redir.c index c74ab7a..5dd990c 100644 --- a/src/redir.c +++ b/src/redir.c @@ -461,14 +461,18 @@ int freshfd_ge10(int fd) { int newfd; - - newfd = fcntl(fd, F_DUPFD, 10); - - if (newfd < 0) { - return errno == EBADF ? -1 : -2; - } else { - return newfd; + int err; + + newfd = fcntl(fd, F_DUPFD_CLOEXEC, 10); + + err = newfd < 0 ? errno : 0; + if (err == EBADF) { + newfd = -1; + } else if (err) { + newfd = -2; } + + return newfd; } /* From 8be27f597d852cfb74ec409fc045ee473c558a94 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Dec 2018 10:01:37 -0500 Subject: [PATCH 252/401] build .so file to support bytecode compilation (needed for profiling) --- .dockerignore | 1 + ocaml/META | 2 ++ ocaml/Makefile | 6 +++--- src/.gitignore | 1 + src/Makefile.am | 11 ++++++++++- test/.gitignore | 2 ++ test/Makefile | 15 ++++++++++++--- 7 files changed, 31 insertions(+), 7 deletions(-) diff --git a/.dockerignore b/.dockerignore index 1675b35..75eeee1 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,6 @@ # compiled code anywhere **/*.native +**/*.byte **/*.o **/*.lo **/*.cmx diff --git a/ocaml/META b/ocaml/META index f8fd69e..d9e5dcd 100644 --- a/ocaml/META +++ b/ocaml/META @@ -2,3 +2,5 @@ description = "bindings to the dash shell as a library" requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" +archive(byte) = "dash.cma" +linkopts="-cclib -ldash" \ No newline at end of file diff --git a/ocaml/Makefile b/ocaml/Makefile index c1e23e4..13bb91e 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -1,13 +1,13 @@ .PHONY : all install clean -all : dash.cmxa +all : dash.cmxa dash.cma install : all if ocamlfind query dash; then ocamlfind remove dash; fi - ocamlfind install dash META dash.cmxa dash.a dash.mli dash.cmi dash.cmx ast.mli ast.cmi ast.cmx + ocamlfind install dash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -package ctypes,ctypes.foreign -linkpkg -cclib -ldash $^ -o dash + ocamlfind ocamlmklib -package ctypes,ctypes.foreign -cclib -ldash $^ -o dash clean : rm -f *.o *.cmi *.cmx dash.a dash.cmxa diff --git a/src/.gitignore b/src/.gitignore index 38733ad..2f5860e 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -13,3 +13,4 @@ syntax.[ch] token.h .libs libdash.la +dlldash.la diff --git a/src/Makefile.am b/src/Makefile.am index 0e18ec5..1871997 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,7 +19,7 @@ bin_PROGRAMS = dash dash_CFLAGS = -DMAIN lib_LIBRARIES = libdash.a -lib_LTLIBRARIES = libdash.la +lib_LTLIBRARIES = libdash.la dlldash.la libdash_la_SOURCES = \ alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ @@ -30,6 +30,15 @@ libdash_la_SOURCES = \ libdash_la_CFLAGS = $(AM_CFLAGS) libdash_la_LDFLAGS = -shared -dynamic +dlldash_la_SOURCES = \ + alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ + histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ + mystring.c options.c parser.c redir.c show.c trap.c output.c \ + bltin/printf.c system.c bltin/test.c bltin/times.c var.c \ + builtins.c init.c nodes.c signames.c syntax.c +dlldash_la_CFLAGS = $(AM_CFLAGS) +dlldash_la_LDFLAGS = -shared -dynamic -module + dash_CFILES = \ alias.c arith_yacc.c arith_yylex.c cd.c error.c eval.c exec.c expand.c \ histedit.c input.c jobs.c mail.c main.c memalloc.c miscbltin.c \ diff --git a/test/.gitignore b/test/.gitignore index 8570da4..ed32e6c 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -7,3 +7,5 @@ test *.cmx *.cmi test.err +test.byte +test.cmo diff --git a/test/Makefile b/test/Makefile index eef7279..f6935d9 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,14 +1,23 @@ .PHONY : all test clean -all : test.native +all : test.native test.byte -test : test.native $(wildcard tests/*) +test : test.native test.byte $(wildcard tests/*) + @echo "TESTING test.native" @for f in tests/*; do \ ./round_trip.sh ./test.native $$f 2>test.err; \ done + @echo "TESTING test.byte" + @for f in tests/*; do \ + ./round_trip.sh ./test.byte $$f 2>test.err; \ + done + test.native : test.ml ocamlfind ocamlopt -package dash,ctypes,ctypes.foreign -linkpkg test.ml -o test.native +test.byte : test.ml + ocamlfind ocamlc -package dash,ctypes,ctypes.foreign -linkpkg test.ml -o test.byte + clean : - rm -f *.o *.cmi *.cmx test.native test.err + rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err From c010021dc0394e69099b1d7563504e57658650da Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Dec 2018 13:00:41 -0500 Subject: [PATCH 253/401] add debug info, tests passing --- .gitignore | 1 + ocaml/Makefile | 4 ++-- test/Makefile | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 670b1f9..a510e90 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,4 @@ src/libdash.a m4 libtool ltmain.sh +ocamlprof.dump diff --git a/ocaml/Makefile b/ocaml/Makefile index 13bb91e..f225468 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -7,7 +7,7 @@ install : all ocamlfind install dash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -package ctypes,ctypes.foreign -cclib -ldash $^ -o dash + ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -cclib -ldash $^ -o dash clean : - rm -f *.o *.cmi *.cmx dash.a dash.cmxa + rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa diff --git a/test/Makefile b/test/Makefile index f6935d9..729f19c 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,10 +14,10 @@ test : test.native test.byte $(wildcard tests/*) test.native : test.ml - ocamlfind ocamlopt -package dash,ctypes,ctypes.foreign -linkpkg test.ml -o test.native + ocamlfind ocamlopt -g -package dash,ctypes,ctypes.foreign -linkpkg $^ -o test.native -test.byte : test.ml - ocamlfind ocamlc -package dash,ctypes,ctypes.foreign -linkpkg test.ml -o test.byte +test.byte : test.ml + ocamlfind ocamlcp -p a -package dash,ctypes,ctypes.foreign -linkpkg $^ -o test.byte clean : rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err From 7a9766045e321e158d532bb79fb1c49409345c93 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 3 Jan 2019 15:20:01 -0500 Subject: [PATCH 254/401] fix bug in tildes in arithmetic mode --- ocaml/ast.ml | 9 +++++++-- test/tests/tilde_arith | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 test/tests/tilde_arith diff --git a/ocaml/ast.ml b/ocaml/ast.ml index c3e7cfe..72010c4 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -268,8 +268,12 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = arg_char (Q a) s bqlist stack' (* tildes *) | '~'::s,stack -> - let uname,s' = parse_tilde [] s in - arg_char (T uname) s' bqlist stack + if List.exists (fun m -> m = `CTLQuo || m = `CTLAri) stack + then (* we're in arithmetic or double quotes, so tilde is ignored *) + arg_char (C '~') s bqlist stack + else + let uname,s' = parse_tilde [] s in + arg_char (T uname) s' bqlist stack (* ordinary character *) | c::s,_ -> arg_char (C c) s bqlist stack @@ -287,6 +291,7 @@ and parse_tilde acc = | ':'::_ as s -> ret, s | '/'::_ as s -> ret, s (* ordinary char *) + (* TODO 2019-01-03 only characters from the portable character set *) | c::s' -> parse_tilde (acc @ [c]) s' and arg_char c s bqlist stack = diff --git a/test/tests/tilde_arith b/test/tests/tilde_arith new file mode 100644 index 0000000..8eba810 --- /dev/null +++ b/test/tests/tilde_arith @@ -0,0 +1 @@ +echo $((~10)) From 3c24161bd61195ced635b30a29d9ebec07f4d97a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 24 Apr 2019 14:07:07 -0400 Subject: [PATCH 255/401] remove call to reset() that was breaking parse error handling --- src/parser.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 1f78809..5a6b2e8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -170,7 +170,7 @@ parsecmd(int interact) } // libdash -/* 2018-09-25 manually install a handler here */ +/* 2018-09-25 manually install a handler here so we can return an appropriate error code */ union node * parsecmd_safe(int interact) { @@ -185,7 +185,6 @@ parsecmd_safe(int interact) needprompt = 0; if (unlikely(setjmp(jmploc.loc))) { - reset(); return NERR; } handler = &jmploc; From 5a41da92cf89c9bc7a5d9513f4993fb96cad0a41 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 25 Apr 2019 15:41:03 -0400 Subject: [PATCH 256/401] fix handling of empty aliases --- src/parser.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/parser.c b/src/parser.c index 5a6b2e8..531ecbc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -508,6 +508,9 @@ TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); break; case TWORD: case TREDIR: +// libdash +/* 2019-04-25 to allow for proper handling of empty aliases */ + case TNL: tokpushback++; return simplecmd(); } @@ -750,6 +753,9 @@ readtoken(void) kwd |= checkkwd; checkkwd = 0; +// libdash +/* 2019-04-25 to handle empty aliases */ +ignorenl: if (t != TWORD || quoteflag) { goto out; } @@ -770,10 +776,15 @@ readtoken(void) if (kwd & CHKALIAS) { struct alias *ap; if ((ap = lookupalias(wordtext, 1)) != NULL) { +// libdash +/* 2019-04-25 to handle empty aliases */ if (*ap->val) { pushstring(ap->val, ap); - } - goto top; + goto top; + } else { + t = xxreadtoken(); + goto ignorenl; + } } } out: From bd68b02392e2dc2deca3dfc623c1f22eb3cc79ab Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 24 Jun 2019 09:43:25 -0400 Subject: [PATCH 257/401] fix ocaml version at 4.07 --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 552c51a..314cea6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,9 @@ ENV DEBIAN_FRONTEND=noninteractive # system support for libdash; libgmp for zarith for lem RUN sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev +# because extunix needs camlp4, which isn't ready yet :( 2019-06-24 +RUN opam switch 4.07 + # make sure we have ocamlfind and ocamlbuild RUN opam install ocamlfind ocamlbuild From 92d2961013650ea1ae728e84bf79d98a60e7608c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 8 Jul 2019 12:54:24 -0400 Subject: [PATCH 258/401] fix redir parsing --- ocaml/ast.ml | 14 ++++++++++---- ocaml/ast.mli | 2 +- ocaml/dash.ml | 9 ++++++++- test/tests/redir_indirect | 1 + 4 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 test/tests/redir_indirect diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 72010c4..14a8241 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -18,7 +18,7 @@ type t = and assign = string * arg and redirection = | File of redir_type * int * arg - | Dup of dup_type * int * int + | Dup of dup_type * int * arg | Heredoc of heredoc_type * int * arg and redir_type = To | Clobber | From | FromTo | Append and dup_type = ToFD | FromFD @@ -168,7 +168,13 @@ and redirs (n : node union ptr) = File (ty,getf n nfile_fd,to_arg (getf n nfile_fname @-> node_narg)) in let mk_dup ty = let n = n @-> node_ndup in - Dup (ty,getf n ndup_fd,getf n ndup_dupfd) in + let vname = getf n ndup_vname in + let tgt= + if nullptr vname + then List.map (fun c -> C c) (explode (string_of_int (getf n ndup_dupfd))) + else to_arg (vname @-> node_narg) + in + Dup (ty,getf n ndup_fd,tgt) in let mk_here ty = let n = n @-> node_nhere in Heredoc (ty,getf n nhere_fd,to_arg (getf n nhere_doc @-> node_narg)) in @@ -412,8 +418,8 @@ and string_of_redir = function | File (From,fd,a) -> show_unless 0 fd ^ "<" ^ string_of_arg a | File (FromTo,fd,a) -> show_unless 0 fd ^ "<>" ^ string_of_arg a | File (Append,fd,a) -> show_unless 1 fd ^ ">>" ^ string_of_arg a - | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_int tgt - | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_int tgt + | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_arg tgt + | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_arg tgt | Heredoc (t,fd,a) -> let heredoc = string_of_arg a in let marker = fresh_marker (lines heredoc) "EOF" in diff --git a/ocaml/ast.mli b/ocaml/ast.mli index d64fe25..bbb765b 100644 --- a/ocaml/ast.mli +++ b/ocaml/ast.mli @@ -18,7 +18,7 @@ type t = and assign = string * arg and redirection = File of redir_type * int * arg - | Dup of dup_type * int * int + | Dup of dup_type * int * arg | Heredoc of heredoc_type * int * arg and redir_type = To | Clobber | From | FromTo | Append and dup_type = ToFD | FromFD diff --git a/ocaml/dash.ml b/ocaml/dash.ml index a0d6879..991ee21 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -457,7 +457,14 @@ and shredir (n : node union ptr) : string = and show_redir n : string = match n with | `File (src,sym,f) -> show_redir_src (getf f nfile_fd) src ^ sym ^ sharg ((getf f nfile_fname) @-> node_narg) - | `Dup (src,sym,d) -> show_redir_src (getf d ndup_fd) src ^ sym ^ string_of_int (getf d ndup_dupfd) + | `Dup (src,sym,d) -> + let vname = getf d ndup_vname in + let tgt = + if nullptr vname + then string_of_int (getf d ndup_dupfd) + else sharg (vname @-> node_narg) + in + show_redir_src (getf d ndup_fd) src ^ sym ^ tgt | `Here (src,sym,exp,h) -> let heredoc = sharg ((getf h nhere_doc) @-> node_narg) in let marker = fresh_marker (lines heredoc) "EOF" in diff --git a/test/tests/redir_indirect b/test/tests/redir_indirect new file mode 100644 index 0000000..16e2052 --- /dev/null +++ b/test/tests/redir_indirect @@ -0,0 +1 @@ +x=1; echo msg 2>&$x From e308612dd4d29752ca2a74cfa274feade7195efa Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 8 Jul 2019 12:57:20 -0400 Subject: [PATCH 259/401] fix spacing [ci skip] --- ocaml/ast.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 14a8241..860ae25 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -169,7 +169,7 @@ and redirs (n : node union ptr) = let mk_dup ty = let n = n @-> node_ndup in let vname = getf n ndup_vname in - let tgt= + let tgt = if nullptr vname then List.map (fun c -> C c) (explode (string_of_int (getf n ndup_dupfd))) else to_arg (vname @-> node_narg) From d12e01a66985c048e24b96e9f4af1314b8f3a5b3 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 8 Jul 2019 12:58:53 -0400 Subject: [PATCH 260/401] add back in logic for closed fds --- ocaml/ast.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 860ae25..9e62b02 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -171,7 +171,10 @@ and redirs (n : node union ptr) = let vname = getf n ndup_vname in let tgt = if nullptr vname - then List.map (fun c -> C c) (explode (string_of_int (getf n ndup_dupfd))) + then let dupfd = getf n ndup_dupfd in + if dupfd = -1 + then [C '-'] + else List.map (fun c -> C c) (explode (string_of_int dupfd)) else to_arg (vname @-> node_narg) in Dup (ty,getf n ndup_fd,tgt) in From 8d128aca12b20994ebaab800dad236fb7e82cc79 Mon Sep 17 00:00:00 2001 From: tucak Date: Fri, 24 Jan 2020 22:01:15 +0100 Subject: [PATCH 261/401] Remove the root stackmark. Stop keeping track of the root stackmark and let the user handle all stack operations. Trying to manage the root stackmark in the library just leads to errors where the user ends up with stackmarks that point to places that we have already popped. --- ocaml/dash.ml | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/ocaml/dash.ml b/ocaml/dash.ml index 991ee21..1283651 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -31,11 +31,9 @@ let dash_init : unit -> unit = foreign "init" (void @-> returning void) let initialize_dash_errno : unit -> unit = foreign "initialize_dash_errno" (void @-> returning void) -let root_stackmark = ref None let initialize () = initialize_dash_errno (); - dash_init (); - root_stackmark := Some (init_stack ()) + dash_init () let popfile : unit -> unit = foreign "popfile" (void @-> returning void) @@ -257,15 +255,7 @@ let parse_next ?interactive:(i=false) () = if eqptr n neof then Done else if eqptr n nerr - then - begin - begin - match !root_stackmark with - | None -> failwith "!!! missing root stackmark" - | Some smark -> pop_stack smark - end; - Error - end + then Error else if nullptr n then Null (* comment or blank line or error ... *) else Parsed n From ab5158509ad11c9e8aec6d4875b6d676e015ea39 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 31 Jan 2020 11:32:51 -0800 Subject: [PATCH 262/401] pushing on tests after tiny fix---ast.ml is out of sync :( --- ocaml/ast.ml | 8 ++++++-- test/failing/backslash | 1 + test/tests/aaaa | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 test/failing/backslash create mode 100644 test/tests/aaaa diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 9e62b02..adb9d91 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -88,7 +88,11 @@ open Foreign open Dash let skip = Command (-1,[],[],[]) - + +let special_chars : char list = explode "|&;<>()$`\\\"'" + +let needs_escaping c = List.mem c special_chars + let rec of_node (n : node union ptr) : t = if nullptr n then skip @@ -393,7 +397,7 @@ and string_of_arg_char = function | E '!' -> "\\!" | E '&' -> "\\&" | E '|' -> "\\|" - | E ';' -> "\\;" + | E ';' -> "\\;" | C c -> String.make 1 c | E c -> Char.escaped c | T None -> "~" diff --git a/test/failing/backslash b/test/failing/backslash new file mode 100644 index 0000000..2cb4253 --- /dev/null +++ b/test/failing/backslash @@ -0,0 +1 @@ +printf %s\\n foobar\|\&\;\<\>\(\)\$\`\\\"\'\ \?\*\[\ diff --git a/test/tests/aaaa b/test/tests/aaaa new file mode 100644 index 0000000..04d8190 --- /dev/null +++ b/test/tests/aaaa @@ -0,0 +1 @@ +eval "\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" From 4897bfdfb4ef872afaa27f68f4525a7e16f7b181 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 31 Jan 2020 11:43:29 -0800 Subject: [PATCH 263/401] another broken test --- test/failing/aaaa_single | 1 + 1 file changed, 1 insertion(+) create mode 100644 test/failing/aaaa_single diff --git a/test/failing/aaaa_single b/test/failing/aaaa_single new file mode 100644 index 0000000..e75bf90 --- /dev/null +++ b/test/failing/aaaa_single @@ -0,0 +1 @@ +eval '"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' From cc18fa23aed65ba19149e5d04b3048258691fcd9 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 31 Jan 2020 14:55:58 -0800 Subject: [PATCH 264/401] basic opam file working --- libdash.opam | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 libdash.opam diff --git a/libdash.opam b/libdash.opam new file mode 100644 index 0000000..54b0c47 --- /dev/null +++ b/libdash.opam @@ -0,0 +1,25 @@ +opam-version: "2.0" +name: "libdash" +version: "0.1" +synopsis: "Bindings to the dash shell's parser" +maintainer: "Michael Greenberg " +authors: "Michael Greenberg " +license: "BSD" +homepage: "https://github.com/mgree/libdash" +bug-reports: "https://github.com/mgree/libdash/issues" +depends: [ + "ocaml" {>= "4.0.7"} + "ctypes" {= "0.11.5"} + "ctypes-foreign" {>= "0.4.0"} +] +build: [ + ["./autogen.sh"] + ["./configure" "--prefix=%{prefix}%" "--libdir=%{lib}%"] + [make] +] +install: [ + [make "install"] + [make "-C" "ocaml"] + [make "-C" "ocaml" "install"] +] +dev-repo: "git+https:///github.com/mgree/libdash" From f3efe5246a74141d6405d43f61a03e6ab2b1777a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 31 Jan 2020 15:19:26 -0800 Subject: [PATCH 265/401] add url info --- libdash.opam | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libdash.opam b/libdash.opam index 54b0c47..0417ecc 100644 --- a/libdash.opam +++ b/libdash.opam @@ -23,3 +23,7 @@ install: [ [make "-C" "ocaml" "install"] ] dev-repo: "git+https:///github.com/mgree/libdash" +url { + src: "https://github.com/mgree/libdash/archive/0.1.tar.gz" + checksum: "md5=23db9e4b424fc293a7b57545befc6e66" +} From f97b53ae331afd7a2a161f8c5a62fa7bcbaff4aa Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 10:55:04 -0800 Subject: [PATCH 266/401] rpath to find installed libdash files seems to be working --- ocaml/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index f225468..f8d39c5 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -7,7 +7,7 @@ install : all ocamlfind install dash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -cclib -ldash $^ -o dash + ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -ccopt -Wl,-rpath=`ocamlfind query libdash` -cclib -ldash $^ -o dash clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa From 6987b66605bad3d7fc469c0fd3b85c55bd4b6045 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 3 Feb 2020 09:43:17 -0800 Subject: [PATCH 267/401] working on opam file --- Dockerfile | 17 +++++++++-------- libdash.opam | 4 ++++ ocaml/Makefile | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 314cea6..7f73b3c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,9 +15,8 @@ RUN opam switch 4.07 RUN opam install ocamlfind ocamlbuild # set up FFI for libdash; num library for lem; extunix for shell syscalls -RUN opam pin add ctypes 0.11.5 -RUN opam install ctypes-foreign -RUN opam install extunix +RUN opam pin add -n ctypes 0.11.5 +RUN opam install ctypes-foreign ctypes WORKDIR /home/opam @@ -26,14 +25,16 @@ WORKDIR /home/opam ADD --chown=opam:opam . libdash # build libdash, expose shared object -RUN cd libdash; ./autogen.sh && ./configure --prefix=/usr --libdir=/usr/lib/x86_64-linux-gnu -RUN cd libdash; make -RUN cd libdash; sudo make install +#RUN cd libdash; ./autogen.sh && ./configure --prefix=/usr --libdir=/usr/lib/x86_64-linux-gnu +#RUN cd libdash; make +#RUN cd libdash; sudo make install # build ocaml bindings -RUN cd libdash/ocaml; opam config exec -- make && opam config exec -- make install +#RUN cd libdash/ocaml; opam config exec -- make && opam config exec -- make install # system test -RUN cd libdash/test; opam config exec -- make && opam config exec make test +#RUN cd libdash/test; opam config exec -- make && opam config exec make test + +RUN (cd libdash; opam install .) ENTRYPOINT [ "opam", "config", "exec", "--" ] CMD [ "bash" ] diff --git a/libdash.opam b/libdash.opam index 0417ecc..05dacfa 100644 --- a/libdash.opam +++ b/libdash.opam @@ -9,6 +9,7 @@ homepage: "https://github.com/mgree/libdash" bug-reports: "https://github.com/mgree/libdash/issues" depends: [ "ocaml" {>= "4.0.7"} + "ocamlfind" {>= "1.8.0"} "ctypes" {= "0.11.5"} "ctypes-foreign" {>= "0.4.0"} ] @@ -22,6 +23,9 @@ install: [ [make "-C" "ocaml"] [make "-C" "ocaml" "install"] ] +remove: [ + ["ocamlfind" "remove" "libdash"] +] dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/0.1.tar.gz" diff --git a/ocaml/Makefile b/ocaml/Makefile index f8d39c5..281f5fd 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -4,7 +4,7 @@ all : dash.cmxa dash.cma install : all if ocamlfind query dash; then ocamlfind remove dash; fi - ocamlfind install dash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx + ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx dash.cmxa : dash.mli dash.ml ast.mli ast.ml ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -ccopt -Wl,-rpath=`ocamlfind query libdash` -cclib -ldash $^ -o dash From b0c4524a08ec181edf36ff78db79637cd91e0603 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 3 Feb 2020 11:22:06 -0800 Subject: [PATCH 268/401] flailing --- Dockerfile | 9 +++++---- libdash.opam | 6 ++++-- ocaml/Makefile | 6 +++--- test/Makefile | 4 ++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7f73b3c..ddc11fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # start with a reasonable image. Debian 9 stretch is what's on the POSIX testing VM -FROM ocaml/opam2:debian-9 +FROM ocaml/opam2:debian-stable # silence apt # TODO this still isn't silencing it :( @@ -8,7 +8,8 @@ ENV DEBIAN_FRONTEND=noninteractive # system support for libdash; libgmp for zarith for lem RUN sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev -# because extunix needs camlp4, which isn't ready yet :( 2019-06-24 +RUN opam update + RUN opam switch 4.07 # make sure we have ocamlfind and ocamlbuild @@ -34,7 +35,7 @@ ADD --chown=opam:opam . libdash # system test #RUN cd libdash/test; opam config exec -- make && opam config exec make test -RUN (cd libdash; opam install .) +RUN (cd libdash; opam exec -- opam install .) -ENTRYPOINT [ "opam", "config", "exec", "--" ] +ENTRYPOINT [ "opam", "exec", "--" ] CMD [ "bash" ] diff --git a/libdash.opam b/libdash.opam index 05dacfa..6304b92 100644 --- a/libdash.opam +++ b/libdash.opam @@ -12,14 +12,16 @@ depends: [ "ocamlfind" {>= "1.8.0"} "ctypes" {= "0.11.5"} "ctypes-foreign" {>= "0.4.0"} +# "conf-autoconf" {build} +# "conf-libtool" {build} ] build: [ ["./autogen.sh"] - ["./configure" "--prefix=%{prefix}%" "--libdir=%{lib}%"] + ["./configure" "--prefix=%{prefix}%" "--libdir=%{stublibs}%"] [make] + [make "install"] ] install: [ - [make "install"] [make "-C" "ocaml"] [make "-C" "ocaml" "install"] ] diff --git a/ocaml/Makefile b/ocaml/Makefile index 281f5fd..ea02c1a 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -3,11 +3,11 @@ all : dash.cmxa dash.cma install : all - if ocamlfind query dash; then ocamlfind remove dash; fi - ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx + if ocamlfind query libdash; then ocamlfind remove libdash; fi + ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx libdash.a dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -ccopt -Wl,-rpath=`ocamlfind query libdash` -cclib -ldash $^ -o dash + ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -linkall libdash.a $^ -o dash clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa diff --git a/test/Makefile b/test/Makefile index 729f19c..1789759 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,10 +14,10 @@ test : test.native test.byte $(wildcard tests/*) test.native : test.ml - ocamlfind ocamlopt -g -package dash,ctypes,ctypes.foreign -linkpkg $^ -o test.native + ocamlfind ocamlopt -g -package libdash -linkpkg $^ -o test.native test.byte : test.ml - ocamlfind ocamlcp -p a -package dash,ctypes,ctypes.foreign -linkpkg $^ -o test.byte + ocamlfind ocamlcp -p a -package libdash -linkpkg $^ -o test.byte clean : rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err From 3cdd87223b49c087f47084c6449e34fbeb828e52 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 3 Feb 2020 12:10:49 -0800 Subject: [PATCH 269/401] more flailing --- .dockerignore | 2 ++ Dockerfile | 2 +- libdash.opam | 2 +- ocaml/META | 1 - ocaml/Makefile | 4 ++-- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.dockerignore b/.dockerignore index 75eeee1..3380f3a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,6 +17,7 @@ Dockerfile.test Dockerfile.web .git .gitmodules +.dockerignore **/.gitignore **/*~ **/.#* @@ -34,6 +35,7 @@ Dockerfile.web ################################################################################ # libdash ignores +libtool # geneated by libtool ltmain.sh diff --git a/Dockerfile b/Dockerfile index ddc11fc..2044b8d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ ADD --chown=opam:opam . libdash # system test #RUN cd libdash/test; opam config exec -- make && opam config exec make test -RUN (cd libdash; opam exec -- opam install .) +RUN (cd libdash; eval $(opam env); opam install .) ENTRYPOINT [ "opam", "exec", "--" ] CMD [ "bash" ] diff --git a/libdash.opam b/libdash.opam index 6304b92..6de0233 100644 --- a/libdash.opam +++ b/libdash.opam @@ -20,9 +20,9 @@ build: [ ["./configure" "--prefix=%{prefix}%" "--libdir=%{stublibs}%"] [make] [make "install"] + [make "-C" "ocaml"] ] install: [ - [make "-C" "ocaml"] [make "-C" "ocaml" "install"] ] remove: [ diff --git a/ocaml/META b/ocaml/META index d9e5dcd..1bb89ad 100644 --- a/ocaml/META +++ b/ocaml/META @@ -3,4 +3,3 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -linkopts="-cclib -ldash" \ No newline at end of file diff --git a/ocaml/Makefile b/ocaml/Makefile index ea02c1a..500201d 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -4,10 +4,10 @@ all : dash.cmxa dash.cma install : all if ocamlfind query libdash; then ocamlfind remove libdash; fi - ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx libdash.a + ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -linkall libdash.a $^ -o dash + ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign $^ -o dash clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa From fdc1dbe39cf70efe1e1f938138b63a05f5d84979 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 5 Feb 2020 11:35:34 -0800 Subject: [PATCH 270/401] everything builds, LD_LIBRARY_PATH issues remain --- libdash.opam | 1 + ocaml/Makefile | 5 ++++- ocaml/mk_meta.sh | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100755 ocaml/mk_meta.sh diff --git a/libdash.opam b/libdash.opam index 6de0233..5f3671b 100644 --- a/libdash.opam +++ b/libdash.opam @@ -24,6 +24,7 @@ build: [ ] install: [ [make "-C" "ocaml" "install"] + [make "-C" "test" "test"] {with-test} ] remove: [ ["ocamlfind" "remove" "libdash"] diff --git a/ocaml/Makefile b/ocaml/Makefile index 500201d..6a16175 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -2,10 +2,13 @@ all : dash.cmxa dash.cma -install : all +install : all META if ocamlfind query libdash; then ocamlfind remove libdash; fi ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx +META : mk_meta.sh + ./mk_meta.sh + dash.cmxa : dash.mli dash.ml ast.mli ast.ml ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign $^ -o dash diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh new file mode 100755 index 0000000..3234f9f --- /dev/null +++ b/ocaml/mk_meta.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +cat >META < Date: Wed, 5 Feb 2020 21:41:33 -0800 Subject: [PATCH 271/401] META is generated --- ocaml/META | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 ocaml/META diff --git a/ocaml/META b/ocaml/META deleted file mode 100644 index 1bb89ad..0000000 --- a/ocaml/META +++ /dev/null @@ -1,5 +0,0 @@ -description = "bindings to the dash shell as a library" -requires = "ctypes,ctypes.foreign" -version = "0.1" -archive(native) = "dash.cmxa" -archive(byte) = "dash.cma" From 3d5df38bf9ecf2ddcf9100e11d410165f7f573d7 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Feb 2020 11:23:29 -0800 Subject: [PATCH 272/401] use _:lib to avoid permission problem (???) --- libdash.opam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdash.opam b/libdash.opam index 5f3671b..b79b967 100644 --- a/libdash.opam +++ b/libdash.opam @@ -17,7 +17,7 @@ depends: [ ] build: [ ["./autogen.sh"] - ["./configure" "--prefix=%{prefix}%" "--libdir=%{stublibs}%"] + ["./configure" "--prefix=%{prefix}%" "--libdir=%{_:lib}%"] [make] [make "install"] [make "-C" "ocaml"] From 4cb3a100bbfced0f2ca5e932c5a6cd588f77358f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 7 Feb 2020 11:28:05 -0800 Subject: [PATCH 273/401] revert --- libdash.opam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdash.opam b/libdash.opam index b79b967..5f3671b 100644 --- a/libdash.opam +++ b/libdash.opam @@ -17,7 +17,7 @@ depends: [ ] build: [ ["./autogen.sh"] - ["./configure" "--prefix=%{prefix}%" "--libdir=%{_:lib}%"] + ["./configure" "--prefix=%{prefix}%" "--libdir=%{stublibs}%"] [make] [make "install"] [make "-C" "ocaml"] From 8aa50f8b416abb882587c0e955f26a4e3c1e16cc Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 13:20:59 -0800 Subject: [PATCH 274/401] pushing --- libdash.opam | 2 +- ocaml/Makefile | 2 +- ocaml/mk_meta.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libdash.opam b/libdash.opam index 5f3671b..b79b967 100644 --- a/libdash.opam +++ b/libdash.opam @@ -17,7 +17,7 @@ depends: [ ] build: [ ["./autogen.sh"] - ["./configure" "--prefix=%{prefix}%" "--libdir=%{stublibs}%"] + ["./configure" "--prefix=%{prefix}%" "--libdir=%{_:lib}%"] [make] [make "install"] [make "-C" "ocaml"] diff --git a/ocaml/Makefile b/ocaml/Makefile index 6a16175..24d7cb6 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -10,7 +10,7 @@ META : mk_meta.sh ./mk_meta.sh dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign $^ -o dash + ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -dllpath `opam var lib`/libdash $^ -o dash clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 3234f9f..2cb1243 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -6,5 +6,5 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -linkopts="-ccopt -L$(opam var stublibs) -cclib -ldash" +inkopts="-ccopt -L$(ocamlfind query libdash) -cclib -Wl,-rpath=$(ocamlfind query libdash) -cclib -ldash" EOF From d1b7efe6648cb914728c609c87e77305c2725713 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 15:47:42 -0800 Subject: [PATCH 275/401] actually working, for real --- Dockerfile | 6 +++--- ocaml/mk_meta.sh | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2044b8d..44e5d4a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,10 +32,10 @@ ADD --chown=opam:opam . libdash # build ocaml bindings #RUN cd libdash/ocaml; opam config exec -- make && opam config exec -- make install -# system test -#RUN cd libdash/test; opam config exec -- make && opam config exec make test - RUN (cd libdash; eval $(opam env); opam install .) +# system test +RUN cd libdash/test; opam config exec -- make && opam config exec make test + ENTRYPOINT [ "opam", "exec", "--" ] CMD [ "bash" ] diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 2cb1243..d8d10c8 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -6,5 +6,6 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -inkopts="-ccopt -L$(ocamlfind query libdash) -cclib -Wl,-rpath=$(ocamlfind query libdash) -cclib -ldash" +linkopts(byte) ="-ccopt -L$(opam var lib)/libdash -cclib -ldash -dllpath $(opam var lib)/libdash" EOF + From 83777d4a60eb7e1e22206df14f427dafd3291d1f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 15:53:09 -0800 Subject: [PATCH 276/401] minimal linker options --- ocaml/mk_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index d8d10c8..ffcc0cb 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -6,6 +6,6 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -linkopts(byte) ="-ccopt -L$(opam var lib)/libdash -cclib -ldash -dllpath $(opam var lib)/libdash" +linkopts(byte) = "-dllpath $(opam var lib)/libdash" EOF From 103299ec723df0f8e2ba70fa9f561e09e3f286ec Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 19:35:34 -0800 Subject: [PATCH 277/401] remove need for explicit TMPDIR on macOS --- src/mktokens | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/mktokens b/src/mktokens index dcef676..818f73c 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,11 +37,10 @@ # token marks the end of a list. The third column is the name to print in # error messages. -: "${TMPDIR:=/tmp}" +: ${TMPDIR=/tmp} -cat > "${TMPDIR}"/ka$$ <<\! +cat > $TMPDIR/ka$$ <<\! TEOF 1 end of file -TBLANK 0 blank TNL 0 newline TSEMI 0 ";" TBACKGND 0 "&" @@ -71,28 +70,28 @@ TWHILE 0 "while" TBEGIN 0 "{" TEND 1 "}" ! -nl=`wc -l "${TMPDIR}"/ka$$` +nl=`wc -l ${TMPDIR}/ka$$` exec > token.h -awk '{print "#define " $1 " " NR-1}' "${TMPDIR}"/ka$$ +awk '{print "#define " $1 " " NR-1}' ${TMPDIR}/ka$$ exec > token_vars.h echo ' /* Array indicating which tokens mark the end of a list */ static const char tokendlist[] = {' -awk '{print "\t" $2 ","}' "${TMPDIR}"/ka$$ +awk '{print "\t" $2 ","}' ${TMPDIR}/ka$$ echo '}; static const char *const tokname[] = {' sed -e 's/"/\\"/g' \ -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/ "\1",/' \ - "${TMPDIR}"/ka$$ + ${TMPDIR}/ka$$ echo '}; ' -sed 's/"//g' "${TMPDIR}"/ka$$ | awk ' +sed 's/"//g' ${TMPDIR}/ka$$ | awk ' /TNOT/{print "#define KWDOFFSET " NR-1; print ""; print "static const char *const parsekwd[] = {"} /TNOT/,/neverfound/{if (last) print " \"" last "\","; last = $3} END{print " \"" last "\"\n};"}' -rm "${TMPDIR}"/ka$$ +rm ${TMPDIR}/ka$$ From 3cec3fa6eb0d96046b6088298504a67b486e591f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 19:38:35 -0800 Subject: [PATCH 278/401] fixup unset or null --- src/mktokens | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mktokens b/src/mktokens index 818f73c..3ab7bc5 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,7 +37,7 @@ # token marks the end of a list. The third column is the name to print in # error messages. -: ${TMPDIR=/tmp} +: ${TMPDIR:=/tmp} cat > $TMPDIR/ka$$ <<\! TEOF 1 end of file From ffcce12471ca244eff865ffa5346c86a6531f999 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 17 Feb 2020 20:06:35 -0800 Subject: [PATCH 279/401] fix install ordering, everything works!!!!!! --- libdash.opam | 2 +- ocaml/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libdash.opam b/libdash.opam index b79b967..23e592a 100644 --- a/libdash.opam +++ b/libdash.opam @@ -19,10 +19,10 @@ build: [ ["./autogen.sh"] ["./configure" "--prefix=%{prefix}%" "--libdir=%{_:lib}%"] [make] - [make "install"] [make "-C" "ocaml"] ] install: [ + [make "install"] [make "-C" "ocaml" "install"] [make "-C" "test" "test"] {with-test} ] diff --git a/ocaml/Makefile b/ocaml/Makefile index 24d7cb6..663bd78 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -3,7 +3,7 @@ all : dash.cmxa dash.cma install : all META - if ocamlfind query libdash; then ocamlfind remove libdash; fi +# if ocamlfind query libdash; then ocamlfind remove libdash; fi ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx META : mk_meta.sh From ebbc61551370114a3afd5693c620c54cfcb5c85c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Feb 2020 08:40:19 -0800 Subject: [PATCH 280/401] add str dep to build --- ocaml/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index 663bd78..06865fb 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -10,7 +10,7 @@ META : mk_meta.sh ./mk_meta.sh dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package ctypes,ctypes.foreign -dllpath `opam var lib`/libdash $^ -o dash + ocamlfind ocamlmklib -g -package str,ctypes,ctypes.foreign -dllpath `opam var lib`/libdash $^ -o dash clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa From a94c2397aa1e16bbf71ef9c0c414d9b96bed9772 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Feb 2020 09:23:58 -0800 Subject: [PATCH 281/401] bump to 0.2 --- libdash.opam | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libdash.opam b/libdash.opam index 23e592a..adc0362 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.1" +version: "0.2" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -31,6 +31,5 @@ remove: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/0.1.tar.gz" - checksum: "md5=23db9e4b424fc293a7b57545befc6e66" + src: "https://github.com/mgree/libdash/archive/0.2.tar.gz" } From c8cb8286c2ba18830e7b77f5a63def9edecd5188 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Feb 2020 09:26:31 -0800 Subject: [PATCH 282/401] add checksum of new release --- libdash.opam | 1 + 1 file changed, 1 insertion(+) diff --git a/libdash.opam b/libdash.opam index adc0362..def3e02 100644 --- a/libdash.opam +++ b/libdash.opam @@ -32,4 +32,5 @@ remove: [ dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/0.2.tar.gz" + checksum: "md5=a1781ceb16d45c493d8aacf510a7a4ae" } From accff6b433ec7117070631e7b1054afdce0d5177 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Feb 2020 11:05:33 -0800 Subject: [PATCH 283/401] update OPAM file to use the right local libtoolize --- libdash.opam | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libdash.opam b/libdash.opam index def3e02..82b546d 100644 --- a/libdash.opam +++ b/libdash.opam @@ -10,13 +10,18 @@ bug-reports: "https://github.com/mgree/libdash/issues" depends: [ "ocaml" {>= "4.0.7"} "ocamlfind" {>= "1.8.0"} - "ctypes" {= "0.11.5"} + "ctypes" {>= "0.11.5"} "ctypes-foreign" {>= "0.4.0"} # "conf-autoconf" {build} # "conf-libtool" {build} ] build: [ - ["./autogen.sh"] + ["libtoolize"] {os != "macos"} + ["glibtoolize"] {os = "macos"} + ["aclocal"] + ["autoheader"] + ["automake" "--add-missing"] + ["autoconf"] ["./configure" "--prefix=%{prefix}%" "--libdir=%{_:lib}%"] [make] [make "-C" "ocaml"] From b96136cf2781b74a24a24ee6c2d9934c6b8db0b8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Feb 2020 11:36:35 -0800 Subject: [PATCH 284/401] version bump, add str dependency in test --- libdash.opam | 5 ++--- test/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/libdash.opam b/libdash.opam index 82b546d..b2ddec6 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2" +version: "0.2.1" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -36,6 +36,5 @@ remove: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/0.2.tar.gz" - checksum: "md5=a1781ceb16d45c493d8aacf510a7a4ae" + src: "https://github.com/mgree/libdash/archive/0.2.1.tar.gz" } diff --git a/test/Makefile b/test/Makefile index 1789759..1408b96 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,10 +14,10 @@ test : test.native test.byte $(wildcard tests/*) test.native : test.ml - ocamlfind ocamlopt -g -package libdash -linkpkg $^ -o test.native + ocamlfind ocamlopt -g -package str,libdash -linkpkg $^ -o test.native test.byte : test.ml - ocamlfind ocamlcp -p a -package libdash -linkpkg $^ -o test.byte + ocamlfind ocamlcp -p a -package str,libdash -linkpkg $^ -o test.byte clean : rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err From 24789e86a1844f9367e9cff4b7d9e505bd9d3b5b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 19 Feb 2020 11:38:09 -0800 Subject: [PATCH 285/401] add back checksum (this is ridiculous) --- libdash.opam | 1 + 1 file changed, 1 insertion(+) diff --git a/libdash.opam b/libdash.opam index b2ddec6..eb3f4c1 100644 --- a/libdash.opam +++ b/libdash.opam @@ -37,4 +37,5 @@ remove: [ dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/0.2.1.tar.gz" + checksum: "md5=8d71b366655871125286402f51d5225c" } From 13eaf255f8f43cf4ef99b28f8d2cde3d17da5135 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 09:22:16 -0800 Subject: [PATCH 286/401] building into custom directory... not _quite_ there --- Dockerfile | 2 +- libdash.opam | 24 +++++++++--------------- mk_dot_install.sh | 22 ++++++++++++++++++++++ ocaml/Makefile | 4 ++-- ocaml/mk_meta.sh | 2 ++ 5 files changed, 36 insertions(+), 18 deletions(-) create mode 100755 mk_dot_install.sh diff --git a/Dockerfile b/Dockerfile index 44e5d4a..eccbb0b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ ADD --chown=opam:opam . libdash # build ocaml bindings #RUN cd libdash/ocaml; opam config exec -- make && opam config exec -- make install -RUN (cd libdash; eval $(opam env); opam install .) +RUN (cd libdash; eval $(opam env); opam install -v -t .) # system test RUN cd libdash/test; opam config exec -- make && opam config exec make test diff --git a/libdash.opam b/libdash.opam index eb3f4c1..e4958b3 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2.1" +version: "0.2.2" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -12,7 +12,7 @@ depends: [ "ocamlfind" {>= "1.8.0"} "ctypes" {>= "0.11.5"} "ctypes-foreign" {>= "0.4.0"} -# "conf-autoconf" {build} + "conf-autoconf" {build} # "conf-libtool" {build} ] build: [ @@ -22,20 +22,14 @@ build: [ ["autoheader"] ["automake" "--add-missing"] ["autoconf"] - ["./configure" "--prefix=%{prefix}%" "--libdir=%{_:lib}%"] + ["mkdir" "_build"] + ["./configure" "--prefix=%{build}%/_build"] [make] - [make "-C" "ocaml"] + [make "install"] # into _build + [make "-C" "ocaml" "all"] + ["./mk_dot_install.sh"] # PICK UP HERE: don't leave it to OPAM to install these :( ] -install: [ - [make "install"] - [make "-C" "ocaml" "install"] - [make "-C" "test" "test"] {with-test} -] -remove: [ - ["ocamlfind" "remove" "libdash"] +run-test: [ + [make "-C" "test" "test"] ] dev-repo: "git+https:///github.com/mgree/libdash" -url { - src: "https://github.com/mgree/libdash/archive/0.2.1.tar.gz" - checksum: "md5=8d71b366655871125286402f51d5225c" -} diff --git a/mk_dot_install.sh b/mk_dot_install.sh new file mode 100755 index 0000000..0ac9473 --- /dev/null +++ b/mk_dot_install.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e + +libdash_files=$(ls _build/lib) +bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" + +files= +for f in ${libdash_files} +do + files="${files} \"_build/lib/${f}\"" +done + +for f in ${bindings_files} +do + files="${files} \"ocaml/${f}\"" +done + +cat >libdash.install <META < Date: Thu, 20 Feb 2020 09:23:58 -0800 Subject: [PATCH 287/401] tests don't QUITE work --- libdash.opam | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libdash.opam b/libdash.opam index e4958b3..5710f75 100644 --- a/libdash.opam +++ b/libdash.opam @@ -29,7 +29,7 @@ build: [ [make "-C" "ocaml" "all"] ["./mk_dot_install.sh"] # PICK UP HERE: don't leave it to OPAM to install these :( ] -run-test: [ - [make "-C" "test" "test"] -] +#run-test: [ +# [make "-C" "test" "test"] +#] dev-repo: "git+https:///github.com/mgree/libdash" From 6094e5ca62d279fa0b657c1a6b9699096dab0906 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 11:47:36 -0800 Subject: [PATCH 288/401] staged builds working with all tests passing --- README.md | 18 ++---------------- libdash.opam | 6 ++---- ocaml/Makefile | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 92da6b2..671d2fe 100644 --- a/README.md +++ b/README.md @@ -14,27 +14,13 @@ You should be able to simply run `docker build -t libdash .` to get a runnable e ## How to build it locally -Broadly: crib from the `Dockerfile`. More concretely, in the root directory run: - -``` -./autogen.sh && ./configure && make && sudo make install -``` - -This should construct an executable `src/dash` and a static library `src/libdash.a`. They will need to be installed globally for things to work well. - -Then run: - -``` -cd ocaml; make && make install -``` - -This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. +Install the OPAM file: `opam pin add .` or `opam install .`. This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. ``` cd test; make test ``` -The tests use `ocaml/round_trip.sh` to ensure that every tester file in `ocaml/tests` round-trips correctly through parsing and pretty printing. +The tests use `test/round_trip.sh` to ensure that every tester file in `test/tests` round-trips correctly through parsing and pretty printing. The OPAM package can be installed with the `-t` flag to run the tests internally; see `ocaml/Makefile`'s testing targets. # How to use the parser diff --git a/libdash.opam b/libdash.opam index 5710f75..08098f3 100644 --- a/libdash.opam +++ b/libdash.opam @@ -27,9 +27,7 @@ build: [ [make] [make "install"] # into _build [make "-C" "ocaml" "all"] - ["./mk_dot_install.sh"] # PICK UP HERE: don't leave it to OPAM to install these :( + ["./mk_dot_install.sh"] + [make "-C" "ocaml" "test"] {with-test} ] -#run-test: [ -# [make "-C" "test" "test"] -#] dev-repo: "git+https:///github.com/mgree/libdash" diff --git a/ocaml/Makefile b/ocaml/Makefile index 7e0d7e3..93aac09 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -12,5 +12,26 @@ META : mk_meta.sh dash.cmxa : dash.mli dash.ml ast.mli ast.ml ocamlfind ocamlmklib -g -package str,ctypes,ctypes.foreign -dllpath `opam var lib`/libdash $^ -o dash +test : test.native test.byte $(wildcard ../test/tests/*) + @echo "TESTING test.native" + @for f in ../test/tests/*; do \ + ../test/round_trip.sh ./test.native $$f 2>test.err; \ + done + @echo "TESTING test.byte" + @for f in ../test/tests/*; do \ + ../test/round_trip.sh ./test.byte $$f 2>test.err; \ + done + +BUILD=$(abspath ../_build/lib) + +test.native : test.ml dash.cmxa + ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath=$(BUILD) dash.cmxa test.ml -o $@ + +test.byte : test.ml dash.cmxa + ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) dash.cma test.ml -o $@ + +test.ml : ../test/test.ml + cp $< $@ + clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa From 229d425c99c853a71dc80958d8edef8614691380 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 11:49:54 -0800 Subject: [PATCH 289/401] add URL to OPAM file --- libdash.opam | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libdash.opam b/libdash.opam index 08098f3..6f27aa8 100644 --- a/libdash.opam +++ b/libdash.opam @@ -31,3 +31,7 @@ build: [ [make "-C" "ocaml" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" +url { + src: "https://github.com/mgree/libdash/archive/0.2.2.tar.gz" + checksum: "md5=ef65c591a6ed23108795f093172fbc8a" +} From 5e14938cfa158ff64dd1a7a025ff819fc27b1314 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 12:05:03 -0800 Subject: [PATCH 290/401] fix up makefiles for OPAM CI --- libdash.opam | 10 +++++----- ocaml/Makefile | 2 +- test/Makefile | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libdash.opam b/libdash.opam index 6f27aa8..3423ef8 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2.2" +version: "0.2.3" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -31,7 +31,7 @@ build: [ [make "-C" "ocaml" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" -url { - src: "https://github.com/mgree/libdash/archive/0.2.2.tar.gz" - checksum: "md5=ef65c591a6ed23108795f093172fbc8a" -} +#url { +# src: "https://github.com/mgree/libdash/archive/0.2.2.tar.gz" +# checksum: "md5=ef65c591a6ed23108795f093172fbc8a" +#} diff --git a/ocaml/Makefile b/ocaml/Makefile index 93aac09..780add1 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -25,7 +25,7 @@ test : test.native test.byte $(wildcard ../test/tests/*) BUILD=$(abspath ../_build/lib) test.native : test.ml dash.cmxa - ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath=$(BUILD) dash.cmxa test.ml -o $@ + ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) dash.cmxa test.ml -o $@ test.byte : test.ml dash.cmxa ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) dash.cma test.ml -o $@ diff --git a/test/Makefile b/test/Makefile index 1408b96..c532d98 100644 --- a/test/Makefile +++ b/test/Makefile @@ -17,7 +17,7 @@ test.native : test.ml ocamlfind ocamlopt -g -package str,libdash -linkpkg $^ -o test.native test.byte : test.ml - ocamlfind ocamlcp -p a -package str,libdash -linkpkg $^ -o test.byte + ocamlfind ocamlc -g -package str,libdash -linkpkg $^ -o test.byte clean : rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err From 0826edc66d7e172fb635aaf92dc03689a238fd03 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 12:21:03 -0800 Subject: [PATCH 291/401] really forcing that library path home --- libdash.opam | 9 +++++---- ocaml/Makefile | 12 ++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/libdash.opam b/libdash.opam index 3423ef8..316e0b0 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2.3" +version: "0.2.4" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -31,7 +31,8 @@ build: [ [make "-C" "ocaml" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" -#url { -# src: "https://github.com/mgree/libdash/archive/0.2.2.tar.gz" +url { + src: "https://github.com/mgree/libdash/archive/0.2.4.tar.gz" # checksum: "md5=ef65c591a6ed23108795f093172fbc8a" -#} +} + diff --git a/ocaml/Makefile b/ocaml/Makefile index 780add1..af58d56 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -1,3 +1,5 @@ +BUILD=$(abspath ../_build/lib) + .PHONY : all install clean all : dash.cmxa dash.cma META @@ -15,20 +17,18 @@ dash.cmxa : dash.mli dash.ml ast.mli ast.ml test : test.native test.byte $(wildcard ../test/tests/*) @echo "TESTING test.native" @for f in ../test/tests/*; do \ - ../test/round_trip.sh ./test.native $$f 2>test.err; \ + LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.native $$f 2>test.err; \ done @echo "TESTING test.byte" @for f in ../test/tests/*; do \ - ../test/round_trip.sh ./test.byte $$f 2>test.err; \ + LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.byte $$f 2>test.err; \ done -BUILD=$(abspath ../_build/lib) - test.native : test.ml dash.cmxa - ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) dash.cmxa test.ml -o $@ + LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) dash.cmxa test.ml -o $@ test.byte : test.ml dash.cmxa - ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) dash.cma test.ml -o $@ + LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) dash.cma test.ml -o $@ test.ml : ../test/test.ml cp $< $@ From 7af35f7326a893d3ae3af1d309082602b6fd6f4f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 13:23:23 -0800 Subject: [PATCH 292/401] trying to manually do ldconfig's work --- ldconfig.sh | 15 +++++++++++++++ libdash.opam | 5 +++-- 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100755 ldconfig.sh diff --git a/ldconfig.sh b/ldconfig.sh new file mode 100755 index 0000000..0f31370 --- /dev/null +++ b/ldconfig.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +cd _build/lib + +trylink() { + [ -f "$2" ] || ln -sf $1 $2 +} + +trylink dlldash.so.0.0.0 dlldash.so +trylink dlldash.so.0.0.0 dlldash.so.0 + +trylink libdash.so.0.0.0 libdash.so +trylink libdash.so.0.0.0 libdash.so.0 diff --git a/libdash.opam b/libdash.opam index 316e0b0..ffbb531 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2.4" +version: "0.2.5" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -28,11 +28,12 @@ build: [ [make "install"] # into _build [make "-C" "ocaml" "all"] ["./mk_dot_install.sh"] + ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it [make "-C" "ocaml" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/0.2.4.tar.gz" + src: "https://github.com/mgree/libdash/archive/0.2.5.tar.gz" # checksum: "md5=ef65c591a6ed23108795f093172fbc8a" } From d7d233d48a4909c2edda2b5c73c0cce211870a48 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 14:04:22 -0800 Subject: [PATCH 293/401] debugging, setting checksum --- ldconfig.sh | 2 ++ libdash.opam | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ldconfig.sh b/ldconfig.sh index 0f31370..35e03e6 100755 --- a/ldconfig.sh +++ b/ldconfig.sh @@ -13,3 +13,5 @@ trylink dlldash.so.0.0.0 dlldash.so.0 trylink libdash.so.0.0.0 libdash.so trylink libdash.so.0.0.0 libdash.so.0 + +ls -l diff --git a/libdash.opam b/libdash.opam index ffbb531..f41f794 100644 --- a/libdash.opam +++ b/libdash.opam @@ -34,6 +34,6 @@ build: [ dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/0.2.5.tar.gz" -# checksum: "md5=ef65c591a6ed23108795f093172fbc8a" + checksum: "md5=c59dfed6f5c5fe70a59313aa1589889e" } From 73e8c76ebe0b71e09ab2486e3abecea51f964dd2 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 14:28:44 -0800 Subject: [PATCH 294/401] debugging --- ocaml/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocaml/Makefile b/ocaml/Makefile index af58d56..ff8c124 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -16,10 +16,12 @@ dash.cmxa : dash.mli dash.ml ast.mli ast.ml test : test.native test.byte $(wildcard ../test/tests/*) @echo "TESTING test.native" + @echo test.native can run | ./test.native @for f in ../test/tests/*; do \ LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.native $$f 2>test.err; \ done @echo "TESTING test.byte" + @echo test.byte can run | ./test.native @for f in ../test/tests/*; do \ LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.byte $$f 2>test.err; \ done From c9316cc3e22ade9edc7fd0ead7903f096ea6dde5 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 14:47:16 -0800 Subject: [PATCH 295/401] i am in linking hell --- ocaml/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index ff8c124..b35de0d 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -21,16 +21,16 @@ test : test.native test.byte $(wildcard ../test/tests/*) LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.native $$f 2>test.err; \ done @echo "TESTING test.byte" - @echo test.byte can run | ./test.native + @echo test.byte can run | ./test.byte @for f in ../test/tests/*; do \ LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.byte $$f 2>test.err; \ done test.native : test.ml dash.cmxa - LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) dash.cmxa test.ml -o $@ + LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) -cclib -ldash dash.cmxa test.ml -o $@ test.byte : test.ml dash.cmxa - LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) dash.cma test.ml -o $@ + LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) -cclib -ldash dash.cma test.ml -o $@ test.ml : ../test/test.ml cp $< $@ From edbfb81c3270399f25933286f6947e2e5cb9a463 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 15:03:22 -0800 Subject: [PATCH 296/401] revert to 0.1 tag, remove debugging --- ldconfig.sh | 1 - libdash.opam | 6 +++--- ocaml/Makefile | 3 --- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/ldconfig.sh b/ldconfig.sh index 35e03e6..334ac7b 100755 --- a/ldconfig.sh +++ b/ldconfig.sh @@ -14,4 +14,3 @@ trylink dlldash.so.0.0.0 dlldash.so.0 trylink libdash.so.0.0.0 libdash.so trylink libdash.so.0.0.0 libdash.so.0 -ls -l diff --git a/libdash.opam b/libdash.opam index f41f794..6b8ba2a 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2.5" +version: "0.1" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -33,7 +33,7 @@ build: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/0.2.5.tar.gz" - checksum: "md5=c59dfed6f5c5fe70a59313aa1589889e" + src: "https://github.com/mgree/libdash/archive/0.1.tar.gz" +# checksum: "md5=c59dfed6f5c5fe70a59313aa1589889e" } diff --git a/ocaml/Makefile b/ocaml/Makefile index b35de0d..d60b165 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -5,7 +5,6 @@ BUILD=$(abspath ../_build/lib) all : dash.cmxa dash.cma META install : all -# if ocamlfind query libdash; then ocamlfind remove libdash; fi ocamlfind install libdash META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx META : mk_meta.sh @@ -16,12 +15,10 @@ dash.cmxa : dash.mli dash.ml ast.mli ast.ml test : test.native test.byte $(wildcard ../test/tests/*) @echo "TESTING test.native" - @echo test.native can run | ./test.native @for f in ../test/tests/*; do \ LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.native $$f 2>test.err; \ done @echo "TESTING test.byte" - @echo test.byte can run | ./test.byte @for f in ../test/tests/*; do \ LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.byte $$f 2>test.err; \ done From 775967b4dd7e83bd5f1b77e6ac2b3c89363fa3e2 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 15:05:18 -0800 Subject: [PATCH 297/401] update link, add checksum --- libdash.opam | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdash.opam b/libdash.opam index 6b8ba2a..b7871b9 100644 --- a/libdash.opam +++ b/libdash.opam @@ -33,7 +33,7 @@ build: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/0.1.tar.gz" -# checksum: "md5=c59dfed6f5c5fe70a59313aa1589889e" + src: "https://github.com/mgree/libdash/archive/v0.1.tar.gz" + checksum: "md5=0454e642ed1130612f75e8d73a8c2f51" } From ba2b39766c325ba78440e82bf06c4070b0a7cd14 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 15:20:50 -0800 Subject: [PATCH 298/401] fix checksum --- libdash.opam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdash.opam b/libdash.opam index b7871b9..4907ae7 100644 --- a/libdash.opam +++ b/libdash.opam @@ -34,6 +34,6 @@ build: [ dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/v0.1.tar.gz" - checksum: "md5=0454e642ed1130612f75e8d73a8c2f51" + checksum: "md5=14a0fb3bc658c21919695a47b58487b7" } From e4cafe644548b8d6dca21c0f414fe5853966404c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 20 Feb 2020 15:29:44 -0800 Subject: [PATCH 299/401] drop checksum from repo (self-reference issues) --- libdash.opam | 1 - 1 file changed, 1 deletion(-) diff --git a/libdash.opam b/libdash.opam index 4907ae7..512bcdd 100644 --- a/libdash.opam +++ b/libdash.opam @@ -34,6 +34,5 @@ build: [ dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/v0.1.tar.gz" - checksum: "md5=14a0fb3bc658c21919695a47b58487b7" } From f9661bd0af0eba8cfae43fac2f4a0b1ad15ef707 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 25 Feb 2020 08:52:44 -0800 Subject: [PATCH 300/401] update OPAM script --- libdash.opam | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libdash.opam b/libdash.opam index 512bcdd..6dc271a 100644 --- a/libdash.opam +++ b/libdash.opam @@ -31,6 +31,10 @@ build: [ ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it [make "-C" "ocaml" "test"] {with-test} ] +install: [ + ["opam-installer" "--prefix=%{prefix}%" "libdash.install"] + [make "-C" "test" "test"] {with-test} +] dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/v0.1.tar.gz" From 712db3999a52d3f9155b2a5a20be98a44b197e72 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 25 Feb 2020 13:38:22 -0800 Subject: [PATCH 301/401] revised attempt at integrated tests --- libdash.opam | 3 +-- mk_dot_install.sh => libdash_install.sh | 13 +++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) rename mk_dot_install.sh => libdash_install.sh (61%) diff --git a/libdash.opam b/libdash.opam index 6dc271a..3ce41b4 100644 --- a/libdash.opam +++ b/libdash.opam @@ -27,12 +27,11 @@ build: [ [make] [make "install"] # into _build [make "-C" "ocaml" "all"] - ["./mk_dot_install.sh"] ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it [make "-C" "ocaml" "test"] {with-test} ] install: [ - ["opam-installer" "--prefix=%{prefix}%" "libdash.install"] + ["./libdash_install.sh"] # autotools borks if we call it install.sh, lol [make "-C" "test" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" diff --git a/mk_dot_install.sh b/libdash_install.sh similarity index 61% rename from mk_dot_install.sh rename to libdash_install.sh index 0ac9473..7b1a955 100755 --- a/mk_dot_install.sh +++ b/libdash_install.sh @@ -1,6 +1,6 @@ -#!/bin/sh +#!/bin/bash -set -e +set -ex libdash_files=$(ls _build/lib) bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" @@ -8,15 +8,12 @@ bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.c files= for f in ${libdash_files} do - files="${files} \"_build/lib/${f}\"" + files="${files} _build/lib/${f}" done for f in ${bindings_files} do - files="${files} \"ocaml/${f}\"" + files="${files} ocaml/${f}" done -cat >libdash.install < Date: Tue, 25 Feb 2020 14:17:44 -0800 Subject: [PATCH 302/401] go back to libdash.install approach --- libdash.opam | 4 +++- libdash_install.sh => mk_dot_install.sh | 13 ++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) rename libdash_install.sh => mk_dot_install.sh (61%) diff --git a/libdash.opam b/libdash.opam index 3ce41b4..f6f0d98 100644 --- a/libdash.opam +++ b/libdash.opam @@ -12,6 +12,7 @@ depends: [ "ocamlfind" {>= "1.8.0"} "ctypes" {>= "0.11.5"} "ctypes-foreign" {>= "0.4.0"} + "opam-installer" {>= "2.0.0"} "conf-autoconf" {build} # "conf-libtool" {build} ] @@ -27,11 +28,12 @@ build: [ [make] [make "install"] # into _build [make "-C" "ocaml" "all"] + ["./mk_dot_install.sh"] ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it [make "-C" "ocaml" "test"] {with-test} ] install: [ - ["./libdash_install.sh"] # autotools borks if we call it install.sh, lol + ["opam-installer" "--prefix=%{prefix}%" "libdash.install"] [make "-C" "test" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" diff --git a/libdash_install.sh b/mk_dot_install.sh similarity index 61% rename from libdash_install.sh rename to mk_dot_install.sh index 7b1a955..0ac9473 100755 --- a/libdash_install.sh +++ b/mk_dot_install.sh @@ -1,6 +1,6 @@ -#!/bin/bash +#!/bin/sh -set -ex +set -e libdash_files=$(ls _build/lib) bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" @@ -8,12 +8,15 @@ bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.c files= for f in ${libdash_files} do - files="${files} _build/lib/${f}" + files="${files} \"_build/lib/${f}\"" done for f in ${bindings_files} do - files="${files} ocaml/${f}" + files="${files} \"ocaml/${f}\"" done -ocamlfind install libdash -nodll $files +cat >libdash.install < Date: Tue, 12 May 2020 13:59:43 -0700 Subject: [PATCH 303/401] v0.1.1, porting a bugfix @tucak from mgree/smoosh#18 --- libdash.opam | 4 ++-- ocaml/.gitignore | 1 + ocaml/ast.ml | 3 +++ ocaml/ast.mli | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/libdash.opam b/libdash.opam index f6f0d98..428b0a1 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.1" +version: "0.1.1" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -38,6 +38,6 @@ install: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/v0.1.tar.gz" + src: "https://github.com/mgree/libdash/archive/v0.1.1.tar.gz" } diff --git a/ocaml/.gitignore b/ocaml/.gitignore index 5e14717..21d4ac7 100644 --- a/ocaml/.gitignore +++ b/ocaml/.gitignore @@ -8,3 +8,4 @@ test *.cmi *.cmo *.cma +META diff --git a/ocaml/ast.ml b/ocaml/ast.ml index adb9d91..3105601 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -1,5 +1,7 @@ type linno = int +exception ParseException of string + type t = | Command of linno * assign list * args * redirection list (* assign, args, redir *) | Pipe of bool * t list (* background?, commands *) @@ -254,6 +256,7 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = in arg_char v s bqlist stack (* CTLENDVAR *) + | '\130'::_, _ -> raise (ParseException "bad substitution (missing variable name in ${}?") | '\131'::s,`CTLVar::stack' -> [],s,bqlist,stack' | '\131'::_,`CTLAri::_ -> failwith "Saw CTLENDVAR before CTLENDARI" | '\131'::_,`CTLQuo::_ -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" diff --git a/ocaml/ast.mli b/ocaml/ast.mli index bbb765b..73725cb 100644 --- a/ocaml/ast.mli +++ b/ocaml/ast.mli @@ -1,5 +1,7 @@ type linno = int +exception ParseException of string + type t = Command of linno * assign list * args * redirection list | Pipe of bool * t list From 658fa24f594db7e275e9fe518ad82af13f483887 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 14:50:53 -0700 Subject: [PATCH 304/401] better output from failed tests, to debug bad build in CI --- test/round_trip.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/round_trip.sh b/test/round_trip.sh index 57a7e86..e1c3336 100755 --- a/test/round_trip.sh +++ b/test/round_trip.sh @@ -8,14 +8,14 @@ fi p=$1 tgt=$2 -orig=$(${p} ${tgt}) +orig=$(${p} ${tgt} 2>&1) if [ "$?" -ne 0 ]; -then echo ${tgt} FAILED, couldn\'t run; exit 2 +then echo "${tgt} FAILED, couldn't run (output: ${orig})"; exit 2 fi -rt=$(${p} ${tgt} | ${p}) +rt=$(${p} ${tgt} | ${p} 2>&1) if [ "$?" -ne 0 ]; -then echo ${tgt} FAILED round trip, couldn\'t run; exit 3 +then echo "${tgt} FAILED round trip, couldn't run (output: $rt)"; exit 3 fi if [ "${orig}" = "${rt}" ]; From 011a6a5d96023eab80c07f5287f42f199e9311a4 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 14:56:14 -0700 Subject: [PATCH 305/401] add conf-libtool dep, now that OPAM repo is up to date --- libdash.opam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdash.opam b/libdash.opam index 428b0a1..cb51226 100644 --- a/libdash.opam +++ b/libdash.opam @@ -14,7 +14,7 @@ depends: [ "ctypes-foreign" {>= "0.4.0"} "opam-installer" {>= "2.0.0"} "conf-autoconf" {build} -# "conf-libtool" {build} + "conf-libtool" {build} ] build: [ ["libtoolize"] {os != "macos"} From d6d373569bd6f17e0d8b042b7c9c16cb8d45549a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 15:02:16 -0700 Subject: [PATCH 306/401] set missing variables in .travis.yml --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0ee90e7..e4d7a2e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,8 @@ -sudo: required +os: + - linux + - osx + +dist: xenial language: generic From c3942cbf6f6a43fc9c41fe10d8f388e841ce3fbb Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 15:05:00 -0700 Subject: [PATCH 307/401] lol no osx for u --- .travis.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index e4d7a2e..018a872 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,4 @@ -os: - - linux - - osx - +os: linux dist: xenial language: generic From 18371568858ffe8060a9fd5c5d1fe9e6a8bed0d6 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 15:23:53 -0700 Subject: [PATCH 308/401] local debugging of the opam-repo issues, using direct VM and no docker in CI --- .travis-ocaml.sh | 306 +++++++++++++++++++++++++++++++++++++++++++++++ .travis.yml | 18 +-- 2 files changed, 317 insertions(+), 7 deletions(-) create mode 100644 .travis-ocaml.sh diff --git a/.travis-ocaml.sh b/.travis-ocaml.sh new file mode 100644 index 0000000..3df8d0e --- /dev/null +++ b/.travis-ocaml.sh @@ -0,0 +1,306 @@ +## basic OCaml and opam installation + +full_apt_version () { + package=$1 + version=$2 + case "${version}" in + latest) echo -n "${package}" ;; + *) echo -n "${package}=" + apt-cache show "$package" \ + | sed -n "s/^Version: \(${version}\)/\1/p" \ + | head -1 + esac +} + +set -uex + +if [ "$TRAVIS_OS_NAME" = freebsd -a "${OPAM_VERSION+x}" = x ]; then + echo OPAM_VERSION not permitted for FreeBSD targets + exit 1 +fi + +OCAML_VERSION=${OCAML_VERSION:-latest} +SYS_OCAML_VERSION=4.05 +# Default opam is the latest release of opam 2 +OPAM_VERSION=${OPAM_VERSION:-2} +OPAM_INIT=${OPAM_INIT:-true} +OCAML_BETA=${OCAML_BETA:-disable} + +OPAM_LATEST_RELEASE=2.0.6 + +case $OPAM_VERSION in + 2|2.0) OPAM_VERSION=$OPAM_LATEST_RELEASE;; + 1.*) echo "Opam version '$OPAM_VERSION' is not supported"; exit 1;; +esac + +if [ "$TRAVIS_OS_NAME" = "osx" ] ; then + brew update &> /dev/null + BREW_OPAM_VERSION=$(brew info opam --json=v1 | sed -e 's/.*"versions":{[^}]*"stable":"//' -e 's/".*//') + if [ "$OPAM_VERSION" != "$BREW_OPAM_VERSION" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Ignored OPAM_VERSION=$OPAM_VERSION; interpreted as \"$BREW_OPAM_VERSION\"" >&2 + echo -e "[\e[0;31mWARNING\e[0m] opam 2 is installed via Homebrew" >&2 + set -x + fi + OPAM_VERSION="$BREW_OPAM_VERSION" +fi + +if [ "$OPAM_VERSION" != "$OPAM_LATEST_RELEASE" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Out-of-date opam $OPAM_VERSION requested" >&2 + echo -e "[\e[0;31mWARNING\e[0m] Latest release is $OPAM_LATEST_RELEASE" >&2 + set -x +fi + +if [ "${INSTALL_LOCAL+x}" = x ] ; then + if [ "$TRAVIS_OS_NAME" = osx -o "$TRAVIS_OS_NAME" = freebsd ] ; then + echo INSTALL_LOCAL not permitted for macOS and FreeBSD targets + exit 1 + fi + + if [ "${OPAM_SWITCH:=ocaml-system}" != ocaml-system ] ; then + echo "INSTALL_LOCAL requires OPAM_SWITCH=ocaml-system (or unset/null)" + exit 1 + fi +fi + +# the base opam repository to use for bootstrapping and catch-all namespace +BASE_REMOTE=${BASE_REMOTE:-git://github.com/ocaml/opam-repository} + +# whether we need a new gcc and binutils +UPDATE_GCC_BINUTILS=${UPDATE_GCC_BINUTILS:-"0"} + +# Install Xenial remotes +UBUNTU_XENIAL=${UBUNTU_XENIAL:-"0"} + +# Install XQuartz on OSX +INSTALL_XQUARTZ=${INSTALL_XQUARTZ:-"false"} + +APT_UPDATED=0 + +add_ppa () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + APT_UPDATED=0 + sudo add-apt-repository --yes ppa:$1 + fi +} + +apt_install () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + if [ "$APT_UPDATED" -eq 0 ] ; then + APT_UPDATED=1 + sudo apt-get update -qq + fi + sudo apt-get install --no-install-recommends -y "$@" + fi +} + +install_ocaml () { + apt_install \ + ocaml ocaml-base ocaml-native-compilers ocaml-compiler-libs \ + ocaml-interp ocaml-base-nox ocaml-nox +} + +install_opam2 () { + case $TRAVIS_OS_NAME in + freebsd) + # Opam does not have any ready to use binaries for FreeBSD + sudo pkg install -qy ocaml-opam ;; + linux) + case $TRAVIS_DIST in + precise|trusty|xenial) + add_ppa ansible/bubblewrap ;; + esac + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + install_ocaml + fi + apt_install bubblewrap + sudo wget https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-x86_64-linux -O /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + osx) + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + brew install ocaml + fi + sudo curl -fsSL https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-x86_64-macos -o /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + esac +} + +install_ppa () { + add_ppa $1 + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + sudo apt-get -qq update + APT_UPDATED=1 + apt_install \ + "$(full_apt_version ocaml $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-native-compilers $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-compiler-libs $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-interp $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base-nox $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-nox $SYS_OCAML_VERSION)" + fi + apt_install opam +} + +install_on_freebsd () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.0; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +install_on_linux () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.0; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac + + XENIAL="deb mirror://mirrors.ubuntu.com/mirrors.txt xenial main restricted universe" + + if [ "$UPDATE_GCC_BINUTILS" != "0" ] ; then + echo "installing a recent gcc and binutils (mainly to get mirage-entropy-xen working!)" + sudo add-apt-repository "${XENIAL}" + sudo add-apt-repository --yes ppa:ubuntu-toolchain-r/test + sudo apt-get -qq update + sudo apt-get install -y gcc-5 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 90 + sudo add-apt-repository -r "${XENIAL}" + fi + + if [ "$UBUNTU_XENIAL" != "0" ] ; then + echo "Adding Ubuntu Xenial mirrors" + sudo add-apt-repository "${XENIAL}" + sudo apt-get -qq update + APT_UPDATED=1 + fi + + if [ "${INSTALL_LOCAL:=0}" != 0 ] ; then + echo -en "travis_fold:start:build.ocaml\r" + echo "Building a local OCaml; this may take a few minutes..." + wget "http://caml.inria.fr/pub/distrib/ocaml-${OCAML_FULL_VERSION%.*}/ocaml-$OCAML_FULL_VERSION.tar.gz" + tar -xzf "ocaml-$OCAML_FULL_VERSION.tar.gz" + cd "ocaml-$OCAML_FULL_VERSION" + ./configure -prefix /usr/local ${OCAML_CONFIGURE_ARGS:=--with-debug-runtime} + make world.opt + sudo make install + cd .. + echo -en "travis_fold:end:build.ocaml\r" + fi +} + +install_on_osx () { + case $INSTALL_XQUARTZ in + true) + curl -OL "http://xquartz.macosforge.org/downloads/SL/XQuartz-2.7.6.dmg" + sudo hdiutil attach XQuartz-2.7.6.dmg + sudo installer -verbose -pkg /Volumes/XQuartz-2.7.6/XQuartz.pkg -target / + ;; + esac + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.0; + OPAM_SWITCH=${OPAM_SWITCH:-ocaml-system}; + brew install ocaml; + install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.0; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +case $TRAVIS_OS_NAME in + freebsd) install_on_freebsd ;; + osx) install_on_osx ;; + linux) install_on_linux ;; +esac + +ocaml_package=ocaml-base-compiler +if [ "$OCAML_BETA" = "enable" ]; then + ocaml_package=ocaml-variants +fi + +OPAM_SWITCH=${OPAM_SWITCH:-$ocaml_package.$OCAML_FULL_VERSION} + +export OPAMYES=1 + +case $OPAM_INIT in + true) + opam init -a --bare "$BASE_REMOTE" + opam_repo_selection= + if [ "$OCAML_BETA" = "enable" ]; then + opam repo add --dont-select beta git://github.com/ocaml/ocaml-beta-repository.git + opam_repo_selection="--repo=default,beta" + fi + opam switch "$OPAM_SWITCH" || opam switch create $opam_repo_selection "$OPAM_SWITCH" + eval $(opam config env) + ;; +esac + +echo OCAML_VERSION=$OCAML_VERSION > .travis-ocaml.env +echo OPAM_SWITCH=$OPAM_SWITCH >> .travis-ocaml.env + +# Temporary fix an issue with opam-depext < 1.1.3 on FreeBSD. +# See https://github.com/ocaml/opam-depext/pull/123 +echo export ASSUME_ALWAYS_YES=YES >> .travis-ocaml.env + +if [ -x "$(command -v ocaml)" ]; then + ocaml -version +else + echo "OCaml is not yet installed" +fi + +opam --version +opam --git-version diff --git a/.travis.yml b/.travis.yml index 018a872..d23f143 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,13 +3,17 @@ dist: xenial language: generic -services: - - docker - -before_install: - - docker pull ocaml/opam2:debian-9 - - docker build -t libdash . +env: + - OCAML_VERSION=4.07 + - OCAML_VERSION=4.09 + - OCAML_VERSION=4.09 INSTALL_LOCAL=1 + - OCAML_VERSION=4.11.0+trunk OCAML_BETA=enable + +install: + - sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev + - test -e .travis-opam.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-opam.sh script: - - docker run libdash make -C libdash/test test + - opam pin -t add . + - (cd test; opam exec -- make test) From 80ee363855ae2c23346337f45d05bb2dfab874d1 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 15:27:10 -0700 Subject: [PATCH 309/401] you have to use the right script name --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d23f143..2d6f389 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,8 @@ env: install: - sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev - - test -e .travis-opam.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-opam.sh + - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh + - bash -ex .travis-ocaml.sh script: - opam pin -t add . From 6af30bc9826bd4210b4c341887f254e5d70e99d9 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 15:48:31 -0700 Subject: [PATCH 310/401] affirmative --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2d6f389..125df6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,6 @@ install: - bash -ex .travis-ocaml.sh script: - - opam pin -t add . + - opam pin -y -t add . - (cd test; opam exec -- make test) From 48b6f1718e4fd2e5ac2c2224aa517ee5d5f936f0 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:02:26 -0700 Subject: [PATCH 311/401] verbose build, turn off post-install tests (which now happen directly in CI) --- .travis.yml | 2 +- libdash.opam | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 125df6f..94a0256 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,6 @@ install: - bash -ex .travis-ocaml.sh script: - - opam pin -y -t add . + - opam pin -y -v -t add . - (cd test; opam exec -- make test) diff --git a/libdash.opam b/libdash.opam index cb51226..9d7a075 100644 --- a/libdash.opam +++ b/libdash.opam @@ -34,7 +34,6 @@ build: [ ] install: [ ["opam-installer" "--prefix=%{prefix}%" "libdash.install"] - [make "-C" "test" "test"] {with-test} ] dev-repo: "git+https:///github.com/mgree/libdash" url { From 2b62f4e1e655cb86b2cf5032c76310758189a82e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:12:47 -0700 Subject: [PATCH 312/401] seeing if explicit LD_LIBRARY_PATH does it... --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 94a0256..bd73a05 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,10 @@ dist: xenial language: generic +cache: + directories: + - $HOME/.opam + env: - OCAML_VERSION=4.07 - OCAML_VERSION=4.09 @@ -16,5 +20,5 @@ install: script: - opam pin -y -v -t add . - - (cd test; opam exec -- make test) + - (cd test; LD_LIBRARY_PATH=$(opam var libdash:lib) opam exec -- make test) From b564ff911edeab5258305d6918412c21baa47b08 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:15:04 -0700 Subject: [PATCH 313/401] fix caching --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index bd73a05..e5045ae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,4 +21,5 @@ install: script: - opam pin -y -v -t add . - (cd test; LD_LIBRARY_PATH=$(opam var libdash:lib) opam exec -- make test) + - opam uninstall libdash # clear the cache From a8898d17aeacc91372cd36c4917917cfd9cc5044 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:29:59 -0700 Subject: [PATCH 314/401] getting some ldd feedback... --- .travis.yml | 2 +- test/Makefile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e5045ae..d854162 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,6 @@ install: script: - opam pin -y -v -t add . - - (cd test; LD_LIBRARY_PATH=$(opam var libdash:lib) opam exec -- make test) + - LD_LIBRARY_PATH=$(opam var libdash:lib) opam exec -- make -C test test - opam uninstall libdash # clear the cache diff --git a/test/Makefile b/test/Makefile index c532d98..d1fdbce 100644 --- a/test/Makefile +++ b/test/Makefile @@ -15,6 +15,7 @@ test : test.native test.byte $(wildcard tests/*) test.native : test.ml ocamlfind ocamlopt -g -package str,libdash -linkpkg $^ -o test.native + ldd $@ || otool -L $@ test.byte : test.ml ocamlfind ocamlc -g -package str,libdash -linkpkg $^ -o test.byte From 79bd3197599890ea05afe9709fde564072909e5f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:38:18 -0700 Subject: [PATCH 315/401] more debugging, le sigh --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index d854162..e79f3e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ install: script: - opam pin -y -v -t add . + - ls -l $(opam var libdash:lib) - LD_LIBRARY_PATH=$(opam var libdash:lib) opam exec -- make -C test test - opam uninstall libdash # clear the cache From 90536721a29103a5ff3a30661bef05f0ec89edd3 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:44:16 -0700 Subject: [PATCH 316/401] trying to build in more... --- ocaml/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index d60b165..4e60f31 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -11,7 +11,7 @@ META : mk_meta.sh ./mk_meta.sh dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package str,ctypes,ctypes.foreign -dllpath `opam var lib`/libdash $^ -o dash + ocamlfind ocamlmklib -g -package str,ctypes,ctypes.foreign -L$(BUILD) -ldash $^ -o dash test : test.native test.byte $(wildcard ../test/tests/*) @echo "TESTING test.native" From 77c11dbab932c19c844fe56634c994d1196de660 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:49:05 -0700 Subject: [PATCH 317/401] trying fancier linkopts... --- ocaml/mk_meta.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 5569c64..4874368 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -8,6 +8,7 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" +linkopts(native) = "-cclib -ldash" linkopts(byte) = "-dllpath $(opam var lib)/libdash" EOF From 804f5db565dd629bd636576352bb51e253765ed6 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:52:11 -0700 Subject: [PATCH 318/401] just put more options in there --- ocaml/mk_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 4874368..9140d95 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -8,7 +8,7 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -linkopts(native) = "-cclib -ldash" +linkopts(native) = "-linkpkg -ccopt -L$(opam var lib) -ccopt -Wl,-rpath -ccopt -Wl,$(opam var lib) -cclib -ldash" linkopts(byte) = "-dllpath $(opam var lib)/libdash" EOF From 0001c6089c1ca8023c39df093f52ec41c4eba1fb Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 12 May 2020 16:55:38 -0700 Subject: [PATCH 319/401] no linkpkg. we're just shooting in the dark at this point, ugh --- ocaml/mk_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 9140d95..31e87ba 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -8,7 +8,7 @@ requires = "ctypes,ctypes.foreign" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -linkopts(native) = "-linkpkg -ccopt -L$(opam var lib) -ccopt -Wl,-rpath -ccopt -Wl,$(opam var lib) -cclib -ldash" +linkopts(native) = "-ccopt -L$(opam var lib) -ccopt -Wl,-rpath -ccopt -Wl,$(opam var lib) -cclib -ldash" linkopts(byte) = "-dllpath $(opam var lib)/libdash" EOF From 209579b4c94298418e8a9b88969eeb1894a00aed Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 08:19:27 -0700 Subject: [PATCH 320/401] copy over matrix from opam-repository, for verisimilitude --- .travis.yml | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index e79f3e2..e331f3f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,3 @@ -os: linux dist: xenial language: generic @@ -7,12 +6,21 @@ cache: directories: - $HOME/.opam -env: - - OCAML_VERSION=4.07 - - OCAML_VERSION=4.09 - - OCAML_VERSION=4.09 INSTALL_LOCAL=1 - - OCAML_VERSION=4.11.0+trunk OCAML_BETA=enable - +matrix: + include: + - os: osx + osx_image: xcode11.3 + env: OCAML_VERSION=4.09 + - os: freebsd + env: OCAML_VERSION=4.09 + - os: linux + env: OCAML_VERSION=4.09 INSTALL_LOCAL=1 + - os: linux + env: OCAML_VERSION=4.11.0+trunk OCAML_BETA=enable + allow_failures: + - os: freebsd + env: OCAML_VERSION=4.09 + install: - sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh From f884378e7ceb264434ae5256f1e100c886062e0e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 08:29:02 -0700 Subject: [PATCH 321/401] fix macos deps --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e331f3f..05e2344 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,8 @@ matrix: env: OCAML_VERSION=4.09 install: - - sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev + - which apt-get && sudo apt-get install -y autoconf autotools libtool pkg-config libffi-dev + - which brew && brew install -y autoconf automake libtool pkg-config libffi - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh - bash -ex .travis-ocaml.sh From 3cdfdafa3c81a0a2eb67016994b4a6038cfbd87a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 08:31:44 -0700 Subject: [PATCH 322/401] missing -dev on autotools, blargh --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 05e2344..705195a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ matrix: env: OCAML_VERSION=4.09 install: - - which apt-get && sudo apt-get install -y autoconf autotools libtool pkg-config libffi-dev + - which apt-get && sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev - which brew && brew install -y autoconf automake libtool pkg-config libffi - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh - bash -ex .travis-ocaml.sh From 973a86705ab28fe908975c6a019770045e640c99 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 08:34:02 -0700 Subject: [PATCH 323/401] use addons, this is getting ridiculous --- .travis.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 705195a..2401977 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,10 +20,24 @@ matrix: allow_failures: - os: freebsd env: OCAML_VERSION=4.09 - + +addons: + apt: + packages: + - autoconf + - autotools-dev + - libtool + - pkg-config + - libffi-dev + homebrew: + packages: + - autoconf + - automake + - libtool + - pkg-config + - libffi + install: - - which apt-get && sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev - - which brew && brew install -y autoconf automake libtool pkg-config libffi - test -e .travis-ocaml.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-ocaml.sh - bash -ex .travis-ocaml.sh From 4b0b669ba04e338ddc52e3b9c859ea47358a5f28 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 09:44:25 -0700 Subject: [PATCH 324/401] drop LD_LIBRARY_PATH, probably breaking everything :( --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2401977..eab7aaf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,6 +44,6 @@ install: script: - opam pin -y -v -t add . - ls -l $(opam var libdash:lib) - - LD_LIBRARY_PATH=$(opam var libdash:lib) opam exec -- make -C test test + - opam exec -- make -C test test - opam uninstall libdash # clear the cache From f82dead0c2a13819a05a3013ebc849625f6d1d87 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 10:36:28 -0700 Subject: [PATCH 325/401] adding conf-aclocal (fix alpine... maybe freebsd, too?) --- libdash.opam | 1 + 1 file changed, 1 insertion(+) diff --git a/libdash.opam b/libdash.opam index 9d7a075..b612f20 100644 --- a/libdash.opam +++ b/libdash.opam @@ -14,6 +14,7 @@ depends: [ "ctypes-foreign" {>= "0.4.0"} "opam-installer" {>= "2.0.0"} "conf-autoconf" {build} + "conf-aclocal" {build} "conf-libtool" {build} ] build: [ From dc9cce3cac025670dc414740ea8e285242d99804 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 14:15:21 -0700 Subject: [PATCH 326/401] fix splat bang in tests --- test/round_trip.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/round_trip.sh b/test/round_trip.sh index e1c3336..1aa1648 100755 --- a/test/round_trip.sh +++ b/test/round_trip.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh if [ $# -ne 2 ]; then echo "Usage: ${0##*/} program target" From 2d6755856e9af049ec3c60f6d0bab349b119a5a9 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 18:11:02 -0700 Subject: [PATCH 327/401] add str dependency --- ocaml/mk_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 31e87ba..d45ed16 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -4,7 +4,7 @@ set -e cat >META < Date: Wed, 13 May 2020 20:19:42 -0700 Subject: [PATCH 328/401] debug info on META, trying to clarify rpath... huh. --- .travis.yml | 1 + ocaml/mk_meta.sh | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index eab7aaf..2e49652 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,6 +44,7 @@ install: script: - opam pin -y -v -t add . - ls -l $(opam var libdash:lib) + - cat $(opam var libdash:lib)/META - opam exec -- make -C test test - opam uninstall libdash # clear the cache diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index d45ed16..8e1b5ff 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -8,7 +8,7 @@ requires = "ctypes,ctypes.foreign,str" version = "0.1" archive(native) = "dash.cmxa" archive(byte) = "dash.cma" -linkopts(native) = "-ccopt -L$(opam var lib) -ccopt -Wl,-rpath -ccopt -Wl,$(opam var lib) -cclib -ldash" -linkopts(byte) = "-dllpath $(opam var lib)/libdash" +linkopts(native) = "-ccopt -L$(opam var libdash:lib) -ccopt -Wl,-rpath -ccopt -Wl,$(opam var libdash:lib) -cclib -ldash" +linkopts(byte) = "-dllpath $(opam var libdash:lib)/libdash" EOF From 8fd128be7c50bcc930fccf2e042272d01de544ff Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 13 May 2020 20:28:32 -0700 Subject: [PATCH 329/401] try to fix mk_meta --- libdash.opam | 1 + ocaml/mk_meta.sh | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libdash.opam b/libdash.opam index b612f20..abd25b0 100644 --- a/libdash.opam +++ b/libdash.opam @@ -28,6 +28,7 @@ build: [ ["./configure" "--prefix=%{build}%/_build"] [make] [make "install"] # into _build + ["ocaml/mk_meta.sh" "%{_:lib}%"] # pass along the lib directory for the rpath in the META [make "-C" "ocaml" "all"] ["./mk_dot_install.sh"] ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 8e1b5ff..381016e 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -2,13 +2,16 @@ set -e +LIB="$1" +: ${LIB:=$(opam var libdash:lib)} + cat >META < Date: Wed, 13 May 2020 20:33:47 -0700 Subject: [PATCH 330/401] fix mk_meta --- ocaml/mk_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/mk_meta.sh b/ocaml/mk_meta.sh index 381016e..22d1d18 100755 --- a/ocaml/mk_meta.sh +++ b/ocaml/mk_meta.sh @@ -3,7 +3,7 @@ set -e LIB="$1" -: ${LIB:=$(opam var libdash:lib)} +: ${LIB:=$(opam var lib)/libdash} cat >META < Date: Fri, 25 Sep 2020 12:56:06 -0700 Subject: [PATCH 331/401] update ci --- .travis-ocaml.sh | 43 ++++++++++++++++++++++++++++++++----------- .travis.yml | 30 +++++++++++++++--------------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/.travis-ocaml.sh b/.travis-ocaml.sh index 3df8d0e..6730871 100644 --- a/.travis-ocaml.sh +++ b/.travis-ocaml.sh @@ -26,7 +26,13 @@ OPAM_VERSION=${OPAM_VERSION:-2} OPAM_INIT=${OPAM_INIT:-true} OCAML_BETA=${OCAML_BETA:-disable} -OPAM_LATEST_RELEASE=2.0.6 +OPAM_LATEST_RELEASE=2.0.7 + +case ${TRAVIS_CPU_ARCH:-amd64} in + amd64|notset) OPAM_ARCH=x86_64;; + arm64) OPAM_ARCH=arm64;; + *) echo "'$TRAVIS_CPU_ARCH' architecture not currently supported"; exit 1;; +esac case $OPAM_VERSION in 2|2.0) OPAM_VERSION=$OPAM_LATEST_RELEASE;; @@ -109,19 +115,20 @@ install_opam2 () { linux) case $TRAVIS_DIST in precise|trusty|xenial) - add_ppa ansible/bubblewrap ;; + # Required for bubblewrap (supports arm64 & amd64) + add_ppa avsm/ppa ;; esac if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then install_ocaml fi apt_install bubblewrap - sudo wget https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-x86_64-linux -O /usr/local/bin/opam + sudo wget https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-linux -O /usr/local/bin/opam sudo chmod +x /usr/local/bin/opam ;; osx) if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then brew install ocaml fi - sudo curl -fsSL https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-x86_64-macos -o /usr/local/bin/opam + sudo curl -fsSL https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-macos -o /usr/local/bin/opam sudo chmod +x /usr/local/bin/opam ;; esac } @@ -156,7 +163,9 @@ install_on_freebsd () { 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; - 4.10) OCAML_FULL_VERSION=4.10.0; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; *) if [ "$OCAML_BETA" != "enable" ]; then echo "Unknown OCAML_VERSION=$OCAML_VERSION" @@ -182,7 +191,9 @@ install_on_linux () { 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; - 4.10) OCAML_FULL_VERSION=4.10.0; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; *) if [ "$OCAML_BETA" != "enable" ]; then echo "Unknown OCAML_VERSION=$OCAML_VERSION" @@ -214,7 +225,7 @@ install_on_linux () { fi if [ "${INSTALL_LOCAL:=0}" != 0 ] ; then - echo -en "travis_fold:start:build.ocaml\r" + ( set +x; echo -en "travis_fold:start:build.ocaml\r" ) 2>/dev/null echo "Building a local OCaml; this may take a few minutes..." wget "http://caml.inria.fr/pub/distrib/ocaml-${OCAML_FULL_VERSION%.*}/ocaml-$OCAML_FULL_VERSION.tar.gz" tar -xzf "ocaml-$OCAML_FULL_VERSION.tar.gz" @@ -223,7 +234,8 @@ install_on_linux () { make world.opt sudo make install cd .. - echo -en "travis_fold:end:build.ocaml\r" + rm -rf "ocaml-$OCAML_FULL_VERSION" + ( set +x; echo -en "travis_fold:end:build.ocaml\r" ) 2>/dev/null fi } @@ -250,7 +262,9 @@ install_on_osx () { OPAM_SWITCH=${OPAM_SWITCH:-ocaml-system}; brew install ocaml; install_opam2 ;; - 4.10) OCAML_FULL_VERSION=4.10.0; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; *) if [ "$OCAML_BETA" != "enable" ]; then echo "Unknown OCAML_VERSION=$OCAML_VERSION" @@ -274,6 +288,12 @@ fi OPAM_SWITCH=${OPAM_SWITCH:-$ocaml_package.$OCAML_FULL_VERSION} +PACKAGES="$OPAM_SWITCH" +case "$OCAML_VERSION" in + 3.12|4.00|4.01|4.02|4.03|4.04|4.05|4.06) + PACKAGES="$PACKAGES,ocaml-secondary-compiler";; +esac + export OPAMYES=1 case $OPAM_INIT in @@ -284,7 +304,7 @@ case $OPAM_INIT in opam repo add --dont-select beta git://github.com/ocaml/ocaml-beta-repository.git opam_repo_selection="--repo=default,beta" fi - opam switch "$OPAM_SWITCH" || opam switch create $opam_repo_selection "$OPAM_SWITCH" + opam switch "$OPAM_SWITCH" || opam switch create $opam_repo_selection "$OPAM_SWITCH" --packages="$PACKAGES" eval $(opam config env) ;; esac @@ -297,7 +317,8 @@ echo OPAM_SWITCH=$OPAM_SWITCH >> .travis-ocaml.env echo export ASSUME_ALWAYS_YES=YES >> .travis-ocaml.env if [ -x "$(command -v ocaml)" ]; then - ocaml -version + # "|| true" is a temp fix for OCaml 4.12: https://github.com/ocaml/ocaml/pull/9798 + ocaml -version || true else echo "OCaml is not yet installed" fi diff --git a/.travis.yml b/.travis.yml index 2e49652..b24e3d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,24 +2,24 @@ dist: xenial language: generic -cache: - directories: - - $HOME/.opam +env: + - OCAML_VERSION=4.09 + - OCAML_VERSION=4.10 + - OCAML_VERSION=4.11 -matrix: +os: + - freebsd + - linux + - osx + +jobs: include: - - os: osx - osx_image: xcode11.3 - env: OCAML_VERSION=4.09 - - os: freebsd - env: OCAML_VERSION=4.09 - - os: linux - env: OCAML_VERSION=4.09 INSTALL_LOCAL=1 - os: linux - env: OCAML_VERSION=4.11.0+trunk OCAML_BETA=enable - allow_failures: - - os: freebsd - env: OCAML_VERSION=4.09 + env: OCAML_VERSION=4.11 INSTALL_LOCAL=1 + +cache: + directories: + - $HOME/.opam addons: apt: From d95665ac77d23f64a40b435ec8eb7b919e70c3ca Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 23 Dec 2020 10:13:29 -0800 Subject: [PATCH 332/401] added AST guide from pash convo --- GUIDE.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 GUIDE.md diff --git a/GUIDE.md b/GUIDE.md new file mode 100644 index 0000000..6723ef8 --- /dev/null +++ b/GUIDE.md @@ -0,0 +1,72 @@ +# The dash AST + +The dash AST itself is defined in `src/nodes.h` and `src/parser.h` + +- `union node` in `src/nodes.h` on line 146 for commands/statements + node.type type tag + + node.nbinary (AND, OR, SEMI) + +- special characters and codes in `parser.h` on lines 40-64 + + CTL* for control codes in words + breaks multibyte characters/UTF-8 :( + + VS* for variable format metadata + +The parser in `src/parser.c` is not easy to read, but is a good place +to see dash ASTs being constructed. + +Input sources come in a stack (to support, e.g., the `source`/`.` +command). Dash has some subtle invariants around its own string +allocation stack... it took quite some time to get it right in Smoosh, +so don't try to "optimize" things! (See +https://github.com/mgree/smoosh/pull/18 for test cases.) + +To get a gist for how they're used, look at `evaltree` at line 200 in +`src/eval.c`. To see how the special characters and codes are used, +see `argstr` at line 23 in `src/expand.c`. + +# OCaml bindings + +The core OCaml bindings are in `ocaml/dash.ml`. The bindings are +dynamically loaded by ctypes. (It's a longstanding TODO to make these +bindings static, as it would significantly simplify the build process.) + +Lines 69 through 233 are just copying the definitions from +`src/nodes.h`. + +The primary API entry point is `parse_next`, which returns one of a +few results: + + - `Done` when EOF (dash returns the special node `neof`, not `NULL`!) has been + reached for the current input. + + - `Error` when parsing failed (dash returns the special node `nerr`, + not `NULL`!). + + - `Null` when there was no command, e.g., a blank line (dash returns + `NULL` here). + + - `Parsed n` for some `node`, `n`. Note that `n` is a dash AST, + i.e., a ctypes structure. + +These nodes are dash AST nodes not yet a usable OCaml structure. + +# AST translation + +See `ocaml/ast.ml` (or Smoosh's `src/shim.ml` for a more +battle-hardened, nearly but not quite identical version of the same +code) for the `of_node` entry point that converts dash AST nodes to +OCaml structures. + +`parse_arg` is a funny a stack machine, best thought of as a for loop +with an explicit stack. There are some tricky extra bits of +information to track (i.e., when tildes are possible, whether we're in +an assignment). + +# General approach + +Call `Dash.initialize`, then `Dash.parse` with the string you +have. Call `Ast.of_node` on the resulting dash AST to get a nice OCaml +structure. From 9b725c03cd63307d1ced1bc5c39e95b3a7fd87d6 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 20:33:58 -0700 Subject: [PATCH 333/401] support thread business in ctypes-foreign --- ocaml/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index 4e60f31..9814745 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -11,7 +11,7 @@ META : mk_meta.sh ./mk_meta.sh dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -package str,ctypes,ctypes.foreign -L$(BUILD) -ldash $^ -o dash + ocamlfind ocamlmklib -g -thread -package threads,str,ctypes,ctypes.foreign -L$(BUILD) -ldash $^ -o dash test : test.native test.byte $(wildcard ../test/tests/*) @echo "TESTING test.native" @@ -24,10 +24,10 @@ test : test.native test.byte $(wildcard ../test/tests/*) done test.native : test.ml dash.cmxa - LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlopt -g -package str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) -cclib -ldash dash.cmxa test.ml -o $@ + LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlopt -g -thread -package threads,str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) -cclib -ldash dash.cmxa test.ml -o $@ test.byte : test.ml dash.cmxa - LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlc -g -package str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) -cclib -ldash dash.cma test.ml -o $@ + LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlc -g -thread -package threads,str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) -cclib -ldash dash.cma test.ml -o $@ test.ml : ../test/test.ml cp $< $@ From d39e13102bf876a05884f1250b9dd0d21a1052f1 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 20:42:01 -0700 Subject: [PATCH 334/401] setup gh workflow --- .github/workflows/build.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..71588b0 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,37 @@ +name: Main workflow + +on: + - pull_request + - push + +jobs: + build: + strategy: + fail-fast: false + matrix: + os: + - macos-latest + - ubuntu-latest + - windows-latest + ocaml-version: + - 4.11.0 + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Use OCaml ${{ matrix.ocaml-version }} + uses: avsm/setup-ocaml@v1 + with: + ocaml-version: ${{ matrix.ocaml-version }} + + - run: opam pin add libdash . --no-action + + - run: opam depext mgt --yes --with-test + + - run: opam install . --with-test + + - run: opam exec -- make -C test test + From 50f22449eb3bca1f2f0937a9b72ee9bfe80ebcd7 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 20:46:03 -0700 Subject: [PATCH 335/401] s/mgt/libdash/ copypasta lol whoops --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71588b0..c8c3037 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ jobs: - run: opam pin add libdash . --no-action - - run: opam depext mgt --yes --with-test + - run: opam depext libdash --yes --with-test - run: opam install . --with-test From a0fd78028c9e519e9326f2890ebb5eb54be1eef0 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 20:52:22 -0700 Subject: [PATCH 336/401] drop windows build---hopeless --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c8c3037..1c44535 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,6 @@ jobs: os: - macos-latest - ubuntu-latest - - windows-latest ocaml-version: - 4.11.0 From ff100c65f133d18c915c9a80c11dbe26cebfe497 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 21:00:12 -0700 Subject: [PATCH 337/401] drop -thread for ocamlmklib --- ocaml/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/Makefile b/ocaml/Makefile index 9814745..f33dbea 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -11,7 +11,7 @@ META : mk_meta.sh ./mk_meta.sh dash.cmxa : dash.mli dash.ml ast.mli ast.ml - ocamlfind ocamlmklib -g -thread -package threads,str,ctypes,ctypes.foreign -L$(BUILD) -ldash $^ -o dash + ocamlfind ocamlmklib -g -package threads,str,ctypes,ctypes.foreign -L$(BUILD) -ldash $^ -o dash test : test.native test.byte $(wildcard ../test/tests/*) @echo "TESTING test.native" From 38d99640e6c4c928dacac6687c9c3091426238b9 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 21:07:50 -0700 Subject: [PATCH 338/401] thread support in tests --- test/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Makefile b/test/Makefile index d1fdbce..39f02dc 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,11 +14,11 @@ test : test.native test.byte $(wildcard tests/*) test.native : test.ml - ocamlfind ocamlopt -g -package str,libdash -linkpkg $^ -o test.native + ocamlfind ocamlopt -g -thread -package threads,str,libdash -linkpkg $^ -o test.native ldd $@ || otool -L $@ test.byte : test.ml - ocamlfind ocamlc -g -package str,libdash -linkpkg $^ -o test.byte + ocamlfind ocamlc -g -thread -package threads,str,libdash -linkpkg $^ -o test.byte clean : rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err From 0d8f4d7346f701cfbb9bad1cc36aa18fbab35ade Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 12 May 2021 21:19:25 -0700 Subject: [PATCH 339/401] move to gh actions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 671d2fe..33303a4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.com/mgree/libdash.svg?branch=master)](https://travis-ci.com/mgree/libdash) +[![Main workflow](https://github.com/mgree/libdash/actions/workflows/build.yml/badge.svg)](https://github.com/mgree/libdash/actions/workflows/build.yml) *libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. From be5163439edbdd5d17b7f15ee2ee4f83c3c24916 Mon Sep 17 00:00:00 2001 From: Sora Morimoto Date: Fri, 25 Feb 2022 23:19:20 +0900 Subject: [PATCH 340/401] Use setup-ocaml v2 (#14) Upgrades the OCaml install script in CI. Signed-off-by: Sora Morimoto --- .github/workflows/build.yml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1c44535..9855fe6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,8 +12,10 @@ jobs: os: - macos-latest - ubuntu-latest - ocaml-version: - - 4.11.0 + ocaml-compiler: + - 4.11.x + - 4.12.x + - 4.13.x runs-on: ${{ matrix.os }} @@ -21,16 +23,11 @@ jobs: - name: Checkout code uses: actions/checkout@v2 - - name: Use OCaml ${{ matrix.ocaml-version }} - uses: avsm/setup-ocaml@v1 + - name: Use OCaml ${{ matrix.ocaml-compiler }} + uses: ocaml/setup-ocaml@v2 with: - ocaml-version: ${{ matrix.ocaml-version }} - - - run: opam pin add libdash . --no-action - - - run: opam depext libdash --yes --with-test + ocaml-compiler: ${{ matrix.ocaml-compiler }} - run: opam install . --with-test - + - run: opam exec -- make -C test test - From 2d580423c02497363f263e35ef2c90a3979402a8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Sun, 10 Jul 2022 19:31:21 -0400 Subject: [PATCH 341/401] Python bindings (#17) Migrates in the Python bindings from [pash's vendored fork](https://github.com/angelhof/libdash/tree/ef6302502b904e33dd4cc686d71142fb1a87bbbd). Resolves issues from #9 and #13. Significantly expanded tests, with conformance tests to ensure that the OCaml and Python bindings agree. --- .github/workflows/build.yml | 49 +- .gitignore | 2 + README.md | 18 +- TODO.md | 5 + libdash.opam | 1 + mk_dot_install.sh | 11 +- ocaml/.gitignore | 6 + ocaml/Makefile | 46 +- ocaml/ast.ml | 214 ++-- ocaml/ast.mli | 36 +- ocaml/ast_atd.atd | 78 ++ ocaml/dash.ml | 2 +- ocaml/json_to_shell.ml | 38 + ocaml/rt.sh | 47 + test/test.ml => ocaml/shell_to_json.ml | 8 +- python/.gitignore | 4 + python/Makefile | 9 + python/ast.py | 558 +++++++++ python/ast2shell.py | 562 +++++++++ python/dash.py | 335 ++++++ python/parse_to_ast.py | 86 ++ python/rt.py | 19 + test/.gitignore | 1 + test/Makefile | 29 +- test/README.md | 7 + test/failing/.travis-ocaml.sh | 327 ++++++ test/failing/1.tomp3.sh | 12 + test/failing/3.resiz.sh | 13 + test/failing/append_nl_if_not.sh | 20 + test/failing/array.sh | 1 + test/failing/async.sh | 16 + test/failing/auto-split.sh | 33 + test/failing/bio.sh | 33 + test/failing/bio2.sh | 73 ++ test/failing/bio3.sh | 10 + test/failing/buggy_comm_script.sh | 21 + test/failing/build_lib.sh | 75 ++ test/failing/ci-perf.sh | 163 +++ test/failing/ci.sh | 107 ++ test/failing/dgsh_tee.sh | 21 + test/failing/driver.sh | 86 ++ test/failing/execute_baseline_sort.sh | 95 ++ .../execute_compile_evaluation_script.sh | 107 ++ test/failing/execute_eurosys_one_liners.sh | 135 +++ test/failing/execute_gnu_parallel_script.sh | 52 + .../execute_max_temp_dish_evaluation.sh | 89 ++ .../execute_web_index_dish_evaluation.sh | 57 + test/failing/exit_error.sh | 10 + test/failing/genome-diff.sh | 42 + test/failing/heredoc2.sh | 6 + test/failing/incomplete-arith.sh | 10 + test/failing/invalidate0.sh | 7 + test/failing/invalidate1.sh | 7 + test/failing/maximal.sh | 33 + test/failing/mk_meta.sh | 17 + test/failing/multiply.sh | 31 + test/failing/pash_init_setup.sh | 238 ++++ test/failing/pash_runtime.sh | 297 +++++ test/failing/pash_runtime_quick_abort.sh | 283 +++++ test/failing/pash_set_from_to.sh | 13 + test/failing/pash_source_declare_vars.sh | 26 + test/failing/pash_wrap_vars.sh | 49 + test/failing/pay_respects.sh | 13 + test/failing/process-aliases.sh | 20 + test/failing/quickcheck.sh | 15 + test/failing/remote_exec_graph.sh | 10 + test/failing/run-experiment.sh | 36 + test/failing/run.par.sh | 601 ++++++++++ test/failing/run.seq.sh | 498 ++++++++ test/failing/run.sh | 79 ++ test/failing/run_alias.sh | 25 + test/failing/run_all.sh | 109 ++ test/failing/run_all_benchmarks_ci.sh | 88 ++ test/failing/run_parser_on_scripts.sh | 10 + test/failing/safe6.sh | 1 + test/failing/setup-pash.sh | 152 +++ test/failing/sieve.sh | 20 + test/failing/split-unix50.sh | 17 + test/failing/split_pipe.sh | 11 + test/failing/sq.sh | 36 + test/failing/statistics.sh | 27 + test/failing/superoptimize.sh | 4 + test/failing/test-bsd.sh | 53 + test/failing/test-exclam.sh | 3 + test/failing/test-linux.sh | 57 + test/failing/test-shlex-aux.sh | 9 + test/failing/test_JSON_to_shell2.sh | 62 + test/failing/test_ast2shell_py.sh | 66 ++ test/failing/test_evaluation_scripts.sh | 233 ++++ test/failing/test_parse_to_JSON2.sh | 75 ++ test/failing/test_rt.sh | 71 ++ test/failing/test_rt_py.sh | 65 ++ test/failing/timing-JSON.sh | 27 + test/failing/timing.sh | 31 + test/failing/unzip-1.sh | 10 + test/failing/up.sh | 38 + test/failing/utils.sh | 123 ++ test/failing/wc.2.sh | 18 + test/failing/web-log-stats.sh | 147 +++ test/failing/with-ec2.sh | 38 + test/pash_tests/1.sh | 6 + test/pash_tests/10.sh | 6 + test/pash_tests/11.sh | 6 + test/pash_tests/12.sh | 6 + test/pash_tests/13.sh | 6 + test/pash_tests/14.sh | 6 + test/pash_tests/15.sh | 6 + test/pash_tests/16.sh | 6 + test/pash_tests/17.sh | 6 + test/pash_tests/18.sh | 6 + test/pash_tests/19.sh | 6 + test/pash_tests/1_1.sh | 15 + test/pash_tests/2.sh | 6 + test/pash_tests/2.unrtf.sh | 7 + test/pash_tests/20.sh | 6 + test/pash_tests/21.sh | 6 + test/pash_tests/22.sh | 6 + test/pash_tests/23.sh | 6 + test/pash_tests/24.sh | 6 + test/pash_tests/25.sh | 6 + test/pash_tests/26.sh | 6 + test/pash_tests/27.sh | 6 + test/pash_tests/28.sh | 6 + test/pash_tests/29.sh | 6 + test/pash_tests/2_1.sh | 17 + test/pash_tests/2_2.sh | 16 + test/pash_tests/3.sh | 6 + test/pash_tests/30.sh | 6 + test/pash_tests/31.sh | 6 + test/pash_tests/32.sh | 6 + test/pash_tests/33.sh | 6 + test/pash_tests/34.sh | 6 + test/pash_tests/35.sh | 6 + test/pash_tests/36.sh | 6 + test/pash_tests/3_1.sh | 16 + test/pash_tests/3_2.sh | 16 + test/pash_tests/3_3.sh | 16 + test/pash_tests/4.gitkernel.sh | 11 + test/pash_tests/4.sh | 6 + test/pash_tests/4_3.sh | 19 + test/pash_tests/4_3b.sh | 24 + test/pash_tests/5.apachelog.sh | 9 + test/pash_tests/5.sh | 6 + test/pash_tests/6.msg.sh | 10 + test/pash_tests/6.sh | 6 + test/pash_tests/6_1.sh | 27 + test/pash_tests/6_1_1.sh | 16 + test/pash_tests/6_1_2.sh | 16 + test/pash_tests/6_2.sh | 18 + test/pash_tests/6_3.sh | 16 + test/pash_tests/6_4.sh | 16 + test/pash_tests/6_5.sh | 16 + test/pash_tests/6_7.sh | 19 + test/pash_tests/7.nginx.sh | 6 + test/pash_tests/7.sh | 6 + test/pash_tests/7_1.sh | 16 + test/pash_tests/7_2.sh | 16 + test/pash_tests/8.2_1.sh | 16 + test/pash_tests/8.2_2.sh | 26 + test/pash_tests/8.3_2.sh | 24 + test/pash_tests/8.3_3.sh | 24 + test/pash_tests/8.sh | 6 + test/pash_tests/8.varlog.sh | 5 + test/pash_tests/8_1.sh | 22 + test/pash_tests/9.sh | 6 + test/pash_tests/add.sh | 2 + test/pash_tests/alt_bigrams.sh | 10 + test/pash_tests/alt_bigrams_env_test.sh | 1 + test/pash_tests/alt_bigrams_funs.sh | 27 + test/pash_tests/ann-agg-2.sh | 16 + test/pash_tests/ann-agg.sh | 15 + test/pash_tests/archive.sh | 6 + test/pash_tests/args_with_spaces.sh | 2 + test/pash_tests/autogen.sh | 7 + test/pash_tests/bam_to_sam.sh | 4 + test/pash_tests/bell_grep.sh | 54 + test/pash_tests/bi-gram.aux.sh | 96 ++ test/pash_tests/bi-grams.sh | 15 + test/pash_tests/bigrams.sh | 85 ++ test/pash_tests/bigrams_aux_map.sh | 76 ++ test/pash_tests/bigrams_env_test.sh | 1 + test/pash_tests/bio4.sh | 30 + test/pash_tests/braces_amp.sh | 5 + test/pash_tests/buggy_non_newline_input.sh | 18 + .../call_distrib_planner_example.sh | 1 + test/pash_tests/cat-redir-fail.sh | 1 + test/pash_tests/cat_output_files.sh | 5 + test/pash_tests/circus.sh | 15 + test/pash_tests/clone_compress_repo.sh | 16 + test/pash_tests/cmd_sbst.sh | 6 + test/pash_tests/cmd_sbst_subscript.sh | 2 + test/pash_tests/comm-par-test.sh | 10 + test/pash_tests/comm-par-test2.sh | 10 + test/pash_tests/comm-par-test2_env_test.sh | 2 + test/pash_tests/comm-par-test_env_test.sh | 2 + test/pash_tests/compile.sh | 16 + test/pash_tests/compress_files.sh | 6 + test/pash_tests/concat.sh | 2 + test/pash_tests/convert_to_fast.sh | 6 + test/pash_tests/count.sh | 2 + test/pash_tests/count_packets.sh | 3 + test/pash_tests/deadlock_test.sh | 1 + test/pash_tests/deadlock_test_env_test.sh | 1 + test/pash_tests/demo-spell.sh | 12 + test/pash_tests/dfs_split_reader.sh | 1 + test/pash_tests/dgsh-raw-sort.sh | 49 + test/pash_tests/dgsh-sort.sh | 52 + test/pash_tests/dgsh-wc.sh | 45 + test/pash_tests/diff.sh | 21 + test/pash_tests/diff_env_test.sh | 2 + test/pash_tests/distributed.sh | 49 + test/pash_tests/distro-deps.sh | 101 ++ test/pash_tests/distrotest.sh | 56 + test/pash_tests/distrotest_env.sh | 2 + test/pash_tests/distrotest_funs.sh | 40 + test/pash_tests/diverge.sh | 3 + test/pash_tests/double_sort.sh | 2 + test/pash_tests/double_sort_env_test.sh | 2 + test/pash_tests/drain_stream.sh | 7 + test/pash_tests/eager-no-task-par.sh | 11 + test/pash_tests/eager.sh | 21 + test/pash_tests/eager_test.sh | 16 + test/pash_tests/echo_args.sh | 2 + test/pash_tests/encrypt_files.sh | 5 + test/pash_tests/escape-madness.sh | 3 + test/pash_tests/exec-redirections.sh | 3 + test/pash_tests/execute_unix_benchmarks.sh | 65 ++ test/pash_tests/exit_code.sh | 4 + test/pash_tests/expand-u-positional.sh | 2 + test/pash_tests/expand-u.sh | 3 + test/pash_tests/export_var_script.sh | 2 + test/pash_tests/for-echo.sh | 6 + test/pash_tests/for-loop.sh | 8 + test/pash_tests/for_loop_simple.sh | 3 + test/pash_tests/for_loop_simple_env_test.sh | 2 + test/pash_tests/for_spaces.sh | 8 + test/pash_tests/fun-def.sh | 15 + test/pash_tests/gen_data.sh | 126 +++ test/pash_tests/gen_pl.sh | 4 + test/pash_tests/generate_single_chrom.sh | 19 + test/pash_tests/genomics.sh | 42 + test/pash_tests/genquality.sh | 75 ++ test/pash_tests/get-summary.sh | 14 + test/pash_tests/get_hash.sh | 2 + test/pash_tests/get_results.sh | 38 + test/pash_tests/get_type_count.sh | 4 + test/pash_tests/grab_submissions.sh | 15 + test/pash_tests/grade.sh | 60 + test/pash_tests/grep-test.sh | 8 + test/pash_tests/grep.sh | 2 + test/pash_tests/grep_env_test.sh | 1 + test/pash_tests/grep_f_script.sh | 28 + test/pash_tests/head.sh | 4 + test/pash_tests/head_deadlock.sh | 9 + test/pash_tests/head_deadlock_fixed.sh | 9 + test/pash_tests/head_deadlock_fixed3.sh | 39 + test/pash_tests/head_deadlock_fixed_2.sh | 9 + test/pash_tests/hello-world.sh | 8 + test/pash_tests/heredoc1.sh | 3 + test/pash_tests/identity.sh | 36 + test/pash_tests/img_convert.sh | 12 + test/pash_tests/incr.sh | 5 + test/pash_tests/innefficient_auto_split.sh | 15 + test/pash_tests/install-deps.sh | 6 + test/pash_tests/ldconfig.sh | 16 + test/pash_tests/longest-man.sh | 5 + test/pash_tests/loop1.sh | 3 + test/pash_tests/make-ec2.sh | 21 + test/pash_tests/max-temp-preprocess.sh | 12 + test/pash_tests/max-temp-process.sh | 8 + test/pash_tests/max-temp.sh | 23 + test/pash_tests/merge-uniq.sh | 9 + test/pash_tests/merge-wc.sh | 11 + test/pash_tests/micro_10.sh | 11 + test/pash_tests/micro_1000.sh | 1002 +++++++++++++++++ test/pash_tests/micro_1000_env_test.sh | 1 + test/pash_tests/micro_10_env_test.sh | 1 + test/pash_tests/minimal_grep.sh | 54 + test/pash_tests/minimal_grep_env_test.sh | 1 + test/pash_tests/minimal_grep_stdin.sh | 1 + test/pash_tests/minimal_sort.sh | 2 + test/pash_tests/minimal_sort_env_test.sh | 1 + test/pash_tests/mk_dot_install.sh | 22 + test/pash_tests/newline_in_var.sh | 9 + test/pash_tests/nfa-regex.sh | 6 + test/pash_tests/nginx.sh | 22 + test/pash_tests/no_in_script.sh | 2 + test/pash_tests/p1.sh | 16 + test/pash_tests/p2.sh | 8 + test/pash_tests/pa.sh | 90 ++ test/pash_tests/pacaur.sh | 40 + test/pash_tests/page-count.sh | 20 + test/pash_tests/page-per-line.sh | 14 + test/pash_tests/parse.sh | 17 + test/pash_tests/pash_declare_vars.sh | 10 + test/pash_tests/pash_ptempfile_name.sh | 5 + .../pash_runtime_complete_execution.sh | 43 + test/pash_tests/pash_runtime_shell_to_pash.sh | 29 + test/pash_tests/pcap.sh | 25 + test/pash_tests/pcap_bench.sh | 8 + test/pash_tests/pkg.sh | 43 + test/pash_tests/pretty_print_json.sh | 7 + test/pash_tests/proginf.sh | 18 + test/pash_tests/r-bell_grep.sh | 50 + test/pash_tests/r-minimal_grep.sh | 46 + test/pash_tests/r-shortest-scripts.sh | 110 ++ test/pash_tests/r-sort.sh | 53 + test/pash_tests/r-wc.sh | 44 + test/pash_tests/raw-r-sort.sh | 49 + test/pash_tests/readonly.sh | 8 + test/pash_tests/redir-var-test.sh | 10 + test/pash_tests/redirect.sh | 2 + test/pash_tests/redirect_stdin_to.sh | 4 + test/pash_tests/redirect_wrapper.sh | 1 + test/pash_tests/remote_read.sh | 1 + test/pash_tests/remote_write.sh | 1 + test/pash_tests/remove_adapter.sh | 3 + test/pash_tests/round_trip.sh | 29 + test/pash_tests/run_all_benchmarks.sh | 70 ++ test/pash_tests/run_evaluation.sh | 243 ++++ test/pash_tests/run_grader.sh | 26 + test/pash_tests/run_lda.sh | 57 + test/pash_tests/run_tests.sh | 25 + test/pash_tests/safe0.sh | 1 + test/pash_tests/safe1.sh | 1 + test/pash_tests/safe2.sh | 1 + test/pash_tests/safe3.sh | 1 + test/pash_tests/safe4.sh | 1 + test/pash_tests/safe5.sh | 1 + test/pash_tests/safe7.sh | 1 + test/pash_tests/search.sh | 9 + test/pash_tests/sed-test.sh | 11 + test/pash_tests/send_emails.sh | 17 + test/pash_tests/set-dash-v-x.sh | 4 + test/pash_tests/set-diff.sh | 20 + test/pash_tests/set-diff_env_test.sh | 2 + test/pash_tests/set-e-2.sh | 4 + test/pash_tests/set-e-3.sh | 21 + test/pash_tests/set-e.sh | 18 + test/pash_tests/set-v.sh | 2 + test/pash_tests/set.sh | 7 + test/pash_tests/set_bug.sh | 11 + test/pash_tests/setup-dspash.sh | 64 ++ test/pash_tests/setup.sh | 10 + test/pash_tests/shortest-scripts.sh | 221 ++++ test/pash_tests/shortest_scripts.sh | 7 + test/pash_tests/shortest_scripts_env_test.sh | 1 + test/pash_tests/sine.sh | 4 + test/pash_tests/sort-opt.sh | 3 + test/pash_tests/sort-opt_env.sh | 1 + test/pash_tests/sort-sort.sh | 6 + test/pash_tests/sort.sh | 7 + test/pash_tests/sort_env.sh | 1 + test/pash_tests/sort_env_small.sh | 1 + test/pash_tests/sort_env_test.sh | 1 + test/pash_tests/spell-grep.sh | 17 + test/pash_tests/spell.sh | 16 + test/pash_tests/spell_env_test.sh | 3 + test/pash_tests/split_pcap.sh | 40 + test/pash_tests/star-escape.sh | 1 + test/pash_tests/suggest-ec2.sh | 19 + test/pash_tests/symtab-sha.sh | 20 + test/pash_tests/tail.sh | 4 + test/pash_tests/tailprogs.sh | 46 + test/pash_tests/tee_web_index_bug.sh | 25 + test/pash_tests/temp-analytics.sh | 39 + test/pash_tests/temp_test.sh | 8 + test/pash_tests/test-common.sh | 19 + test/pash_tests/test.sh | 5 + test/pash_tests/test1.sh | 48 + test/pash_tests/tilde.sh | 4 + test/pash_tests/to_mp3.sh | 21 + test/pash_tests/top-n.sh | 8 + test/pash_tests/topn.sh | 2 + test/pash_tests/topn_env_test.sh | 3 + test/pash_tests/tr-test.sh | 25 + test/pash_tests/tr_cs_wc_test.sh | 17 + test/pash_tests/trap.sh | 7 + test/pash_tests/trigrams.sh | 28 + test/pash_tests/trim_primers.sh | 6 + test/pash_tests/uniq-c.2.sh | 18 + test/pash_tests/uniq.sh | 3 + test/pash_tests/unix50.sh | 151 +++ test/pash_tests/unparsing-special-chars.sh | 10 + test/pash_tests/unsafe0.sh | 1 + test/pash_tests/unsafe1.sh | 1 + test/pash_tests/unsafe2.sh | 1 + test/pash_tests/update-img.sh | 31 + test/pash_tests/var_assgn.sh | 3 + .../wait_for_output_and_sigpipe_rest.sh | 43 + test/pash_tests/wc.sh | 3 + test/pash_tests/web-index-aux.sh | 141 +++ test/pash_tests/web-index.sh | 148 +++ test/pash_tests/wf.sh | 6 + test/pash_tests/wf_env_test.sh | 1 + test/pash_tests/worker.sh | 33 + test/pash_tests/wrap_cat.sh | 27 + test/round_trip.sh | 50 +- test/test_ocaml_python.sh | 63 ++ test/{failing => tests}/aaaa_single | 0 test/{failing => tests}/backslash | 0 test/tests/weird_tilde.sh | 11 + 402 files changed, 14104 insertions(+), 168 deletions(-) create mode 100644 TODO.md create mode 100644 ocaml/ast_atd.atd create mode 100644 ocaml/json_to_shell.ml create mode 100755 ocaml/rt.sh rename test/test.ml => ocaml/shell_to_json.ml (84%) create mode 100644 python/.gitignore create mode 100644 python/Makefile create mode 100644 python/ast.py create mode 100644 python/ast2shell.py create mode 100644 python/dash.py create mode 100644 python/parse_to_ast.py create mode 100755 python/rt.py create mode 100644 test/README.md create mode 100644 test/failing/.travis-ocaml.sh create mode 100755 test/failing/1.tomp3.sh create mode 100755 test/failing/3.resiz.sh create mode 100755 test/failing/append_nl_if_not.sh create mode 100644 test/failing/array.sh create mode 100755 test/failing/async.sh create mode 100755 test/failing/auto-split.sh create mode 100755 test/failing/bio.sh create mode 100644 test/failing/bio2.sh create mode 100644 test/failing/bio3.sh create mode 100755 test/failing/buggy_comm_script.sh create mode 100644 test/failing/build_lib.sh create mode 100755 test/failing/ci-perf.sh create mode 100755 test/failing/ci.sh create mode 100755 test/failing/dgsh_tee.sh create mode 100755 test/failing/driver.sh create mode 100755 test/failing/execute_baseline_sort.sh create mode 100755 test/failing/execute_compile_evaluation_script.sh create mode 100755 test/failing/execute_eurosys_one_liners.sh create mode 100755 test/failing/execute_gnu_parallel_script.sh create mode 100755 test/failing/execute_max_temp_dish_evaluation.sh create mode 100755 test/failing/execute_web_index_dish_evaluation.sh create mode 100644 test/failing/exit_error.sh create mode 100755 test/failing/genome-diff.sh create mode 100644 test/failing/heredoc2.sh create mode 100644 test/failing/incomplete-arith.sh create mode 100644 test/failing/invalidate0.sh create mode 100644 test/failing/invalidate1.sh create mode 100755 test/failing/maximal.sh create mode 100755 test/failing/mk_meta.sh create mode 100755 test/failing/multiply.sh create mode 100644 test/failing/pash_init_setup.sh create mode 100755 test/failing/pash_runtime.sh create mode 100644 test/failing/pash_runtime_quick_abort.sh create mode 100644 test/failing/pash_set_from_to.sh create mode 100755 test/failing/pash_source_declare_vars.sh create mode 100755 test/failing/pash_wrap_vars.sh create mode 100644 test/failing/pay_respects.sh create mode 100644 test/failing/process-aliases.sh create mode 100755 test/failing/quickcheck.sh create mode 100755 test/failing/remote_exec_graph.sh create mode 100755 test/failing/run-experiment.sh create mode 100644 test/failing/run.par.sh create mode 100755 test/failing/run.seq.sh create mode 100755 test/failing/run.sh create mode 100644 test/failing/run_alias.sh create mode 100755 test/failing/run_all.sh create mode 100755 test/failing/run_all_benchmarks_ci.sh create mode 100755 test/failing/run_parser_on_scripts.sh create mode 100644 test/failing/safe6.sh create mode 100755 test/failing/setup-pash.sh create mode 100755 test/failing/sieve.sh create mode 100755 test/failing/split-unix50.sh create mode 100644 test/failing/split_pipe.sh create mode 100755 test/failing/sq.sh create mode 100755 test/failing/statistics.sh create mode 100755 test/failing/superoptimize.sh create mode 100755 test/failing/test-bsd.sh create mode 100755 test/failing/test-exclam.sh create mode 100755 test/failing/test-linux.sh create mode 100644 test/failing/test-shlex-aux.sh create mode 100644 test/failing/test_JSON_to_shell2.sh create mode 100644 test/failing/test_ast2shell_py.sh create mode 100755 test/failing/test_evaluation_scripts.sh create mode 100644 test/failing/test_parse_to_JSON2.sh create mode 100644 test/failing/test_rt.sh create mode 100644 test/failing/test_rt_py.sh create mode 100644 test/failing/timing-JSON.sh create mode 100644 test/failing/timing.sh create mode 100644 test/failing/unzip-1.sh create mode 100755 test/failing/up.sh create mode 100755 test/failing/utils.sh create mode 100755 test/failing/wc.2.sh create mode 100755 test/failing/web-log-stats.sh create mode 100755 test/failing/with-ec2.sh create mode 100755 test/pash_tests/1.sh create mode 100755 test/pash_tests/10.sh create mode 100755 test/pash_tests/11.sh create mode 100755 test/pash_tests/12.sh create mode 100755 test/pash_tests/13.sh create mode 100755 test/pash_tests/14.sh create mode 100755 test/pash_tests/15.sh create mode 100755 test/pash_tests/16.sh create mode 100755 test/pash_tests/17.sh create mode 100755 test/pash_tests/18.sh create mode 100755 test/pash_tests/19.sh create mode 100755 test/pash_tests/1_1.sh create mode 100755 test/pash_tests/2.sh create mode 100755 test/pash_tests/2.unrtf.sh create mode 100755 test/pash_tests/20.sh create mode 100755 test/pash_tests/21.sh create mode 100755 test/pash_tests/22.sh create mode 100755 test/pash_tests/23.sh create mode 100755 test/pash_tests/24.sh create mode 100755 test/pash_tests/25.sh create mode 100755 test/pash_tests/26.sh create mode 100755 test/pash_tests/27.sh create mode 100755 test/pash_tests/28.sh create mode 100755 test/pash_tests/29.sh create mode 100755 test/pash_tests/2_1.sh create mode 100755 test/pash_tests/2_2.sh create mode 100755 test/pash_tests/3.sh create mode 100755 test/pash_tests/30.sh create mode 100755 test/pash_tests/31.sh create mode 100755 test/pash_tests/32.sh create mode 100755 test/pash_tests/33.sh create mode 100755 test/pash_tests/34.sh create mode 100755 test/pash_tests/35.sh create mode 100755 test/pash_tests/36.sh create mode 100755 test/pash_tests/3_1.sh create mode 100755 test/pash_tests/3_2.sh create mode 100755 test/pash_tests/3_3.sh create mode 100755 test/pash_tests/4.gitkernel.sh create mode 100755 test/pash_tests/4.sh create mode 100755 test/pash_tests/4_3.sh create mode 100755 test/pash_tests/4_3b.sh create mode 100755 test/pash_tests/5.apachelog.sh create mode 100755 test/pash_tests/5.sh create mode 100755 test/pash_tests/6.msg.sh create mode 100755 test/pash_tests/6.sh create mode 100755 test/pash_tests/6_1.sh create mode 100755 test/pash_tests/6_1_1.sh create mode 100755 test/pash_tests/6_1_2.sh create mode 100755 test/pash_tests/6_2.sh create mode 100755 test/pash_tests/6_3.sh create mode 100755 test/pash_tests/6_4.sh create mode 100755 test/pash_tests/6_5.sh create mode 100755 test/pash_tests/6_7.sh create mode 100755 test/pash_tests/7.nginx.sh create mode 100755 test/pash_tests/7.sh create mode 100755 test/pash_tests/7_1.sh create mode 100755 test/pash_tests/7_2.sh create mode 100755 test/pash_tests/8.2_1.sh create mode 100755 test/pash_tests/8.2_2.sh create mode 100755 test/pash_tests/8.3_2.sh create mode 100755 test/pash_tests/8.3_3.sh create mode 100755 test/pash_tests/8.sh create mode 100755 test/pash_tests/8.varlog.sh create mode 100755 test/pash_tests/8_1.sh create mode 100755 test/pash_tests/9.sh create mode 100755 test/pash_tests/add.sh create mode 100644 test/pash_tests/alt_bigrams.sh create mode 100644 test/pash_tests/alt_bigrams_env_test.sh create mode 100644 test/pash_tests/alt_bigrams_funs.sh create mode 100755 test/pash_tests/ann-agg-2.sh create mode 100644 test/pash_tests/ann-agg.sh create mode 100644 test/pash_tests/archive.sh create mode 100644 test/pash_tests/args_with_spaces.sh create mode 100755 test/pash_tests/autogen.sh create mode 100644 test/pash_tests/bam_to_sam.sh create mode 100755 test/pash_tests/bell_grep.sh create mode 100755 test/pash_tests/bi-gram.aux.sh create mode 100755 test/pash_tests/bi-grams.sh create mode 100755 test/pash_tests/bigrams.sh create mode 100755 test/pash_tests/bigrams_aux_map.sh create mode 100755 test/pash_tests/bigrams_env_test.sh create mode 100755 test/pash_tests/bio4.sh create mode 100644 test/pash_tests/braces_amp.sh create mode 100755 test/pash_tests/buggy_non_newline_input.sh create mode 100644 test/pash_tests/call_distrib_planner_example.sh create mode 100644 test/pash_tests/cat-redir-fail.sh create mode 100755 test/pash_tests/cat_output_files.sh create mode 100755 test/pash_tests/circus.sh create mode 100755 test/pash_tests/clone_compress_repo.sh create mode 100644 test/pash_tests/cmd_sbst.sh create mode 100644 test/pash_tests/cmd_sbst_subscript.sh create mode 100644 test/pash_tests/comm-par-test.sh create mode 100644 test/pash_tests/comm-par-test2.sh create mode 100644 test/pash_tests/comm-par-test2_env_test.sh create mode 100644 test/pash_tests/comm-par-test_env_test.sh create mode 100755 test/pash_tests/compile.sh create mode 100755 test/pash_tests/compress_files.sh create mode 100755 test/pash_tests/concat.sh create mode 100644 test/pash_tests/convert_to_fast.sh create mode 100755 test/pash_tests/count.sh create mode 100644 test/pash_tests/count_packets.sh create mode 100644 test/pash_tests/deadlock_test.sh create mode 100644 test/pash_tests/deadlock_test_env_test.sh create mode 100755 test/pash_tests/demo-spell.sh create mode 100755 test/pash_tests/dfs_split_reader.sh create mode 100755 test/pash_tests/dgsh-raw-sort.sh create mode 100755 test/pash_tests/dgsh-sort.sh create mode 100755 test/pash_tests/dgsh-wc.sh create mode 100644 test/pash_tests/diff.sh create mode 100644 test/pash_tests/diff_env_test.sh create mode 100755 test/pash_tests/distributed.sh create mode 100755 test/pash_tests/distro-deps.sh create mode 100644 test/pash_tests/distrotest.sh create mode 100644 test/pash_tests/distrotest_env.sh create mode 100644 test/pash_tests/distrotest_funs.sh create mode 100755 test/pash_tests/diverge.sh create mode 100644 test/pash_tests/double_sort.sh create mode 100644 test/pash_tests/double_sort_env_test.sh create mode 100755 test/pash_tests/drain_stream.sh create mode 100755 test/pash_tests/eager-no-task-par.sh create mode 100755 test/pash_tests/eager.sh create mode 100755 test/pash_tests/eager_test.sh create mode 100644 test/pash_tests/echo_args.sh create mode 100755 test/pash_tests/encrypt_files.sh create mode 100644 test/pash_tests/escape-madness.sh create mode 100644 test/pash_tests/exec-redirections.sh create mode 100755 test/pash_tests/execute_unix_benchmarks.sh create mode 100644 test/pash_tests/exit_code.sh create mode 100644 test/pash_tests/expand-u-positional.sh create mode 100644 test/pash_tests/expand-u.sh create mode 100755 test/pash_tests/export_var_script.sh create mode 100644 test/pash_tests/for-echo.sh create mode 100644 test/pash_tests/for-loop.sh create mode 100644 test/pash_tests/for_loop_simple.sh create mode 100644 test/pash_tests/for_loop_simple_env_test.sh create mode 100755 test/pash_tests/for_spaces.sh create mode 100644 test/pash_tests/fun-def.sh create mode 100644 test/pash_tests/gen_data.sh create mode 100644 test/pash_tests/gen_pl.sh create mode 100644 test/pash_tests/generate_single_chrom.sh create mode 100755 test/pash_tests/genomics.sh create mode 100755 test/pash_tests/genquality.sh create mode 100755 test/pash_tests/get-summary.sh create mode 100644 test/pash_tests/get_hash.sh create mode 100755 test/pash_tests/get_results.sh create mode 100644 test/pash_tests/get_type_count.sh create mode 100755 test/pash_tests/grab_submissions.sh create mode 100755 test/pash_tests/grade.sh create mode 100644 test/pash_tests/grep-test.sh create mode 100755 test/pash_tests/grep.sh create mode 100755 test/pash_tests/grep_env_test.sh create mode 100755 test/pash_tests/grep_f_script.sh create mode 100755 test/pash_tests/head.sh create mode 100755 test/pash_tests/head_deadlock.sh create mode 100755 test/pash_tests/head_deadlock_fixed.sh create mode 100755 test/pash_tests/head_deadlock_fixed3.sh create mode 100755 test/pash_tests/head_deadlock_fixed_2.sh create mode 100755 test/pash_tests/hello-world.sh create mode 100644 test/pash_tests/heredoc1.sh create mode 100755 test/pash_tests/identity.sh create mode 100755 test/pash_tests/img_convert.sh create mode 100755 test/pash_tests/incr.sh create mode 100755 test/pash_tests/innefficient_auto_split.sh create mode 100755 test/pash_tests/install-deps.sh create mode 100755 test/pash_tests/ldconfig.sh create mode 100755 test/pash_tests/longest-man.sh create mode 100644 test/pash_tests/loop1.sh create mode 100755 test/pash_tests/make-ec2.sh create mode 100755 test/pash_tests/max-temp-preprocess.sh create mode 100755 test/pash_tests/max-temp-process.sh create mode 100755 test/pash_tests/max-temp.sh create mode 100755 test/pash_tests/merge-uniq.sh create mode 100755 test/pash_tests/merge-wc.sh create mode 100644 test/pash_tests/micro_10.sh create mode 100644 test/pash_tests/micro_1000.sh create mode 100644 test/pash_tests/micro_1000_env_test.sh create mode 100644 test/pash_tests/micro_10_env_test.sh create mode 100644 test/pash_tests/minimal_grep.sh create mode 100755 test/pash_tests/minimal_grep_env_test.sh create mode 100644 test/pash_tests/minimal_grep_stdin.sh create mode 100644 test/pash_tests/minimal_sort.sh create mode 100755 test/pash_tests/minimal_sort_env_test.sh create mode 100755 test/pash_tests/mk_dot_install.sh create mode 100644 test/pash_tests/newline_in_var.sh create mode 100755 test/pash_tests/nfa-regex.sh create mode 100755 test/pash_tests/nginx.sh create mode 100755 test/pash_tests/no_in_script.sh create mode 100644 test/pash_tests/p1.sh create mode 100644 test/pash_tests/p2.sh create mode 100755 test/pash_tests/pa.sh create mode 100755 test/pash_tests/pacaur.sh create mode 100755 test/pash_tests/page-count.sh create mode 100755 test/pash_tests/page-per-line.sh create mode 100755 test/pash_tests/parse.sh create mode 100644 test/pash_tests/pash_declare_vars.sh create mode 100755 test/pash_tests/pash_ptempfile_name.sh create mode 100644 test/pash_tests/pash_runtime_complete_execution.sh create mode 100644 test/pash_tests/pash_runtime_shell_to_pash.sh create mode 100755 test/pash_tests/pcap.sh create mode 100755 test/pash_tests/pcap_bench.sh create mode 100755 test/pash_tests/pkg.sh create mode 100755 test/pash_tests/pretty_print_json.sh create mode 100755 test/pash_tests/proginf.sh create mode 100755 test/pash_tests/r-bell_grep.sh create mode 100755 test/pash_tests/r-minimal_grep.sh create mode 100644 test/pash_tests/r-shortest-scripts.sh create mode 100755 test/pash_tests/r-sort.sh create mode 100755 test/pash_tests/r-wc.sh create mode 100755 test/pash_tests/raw-r-sort.sh create mode 100644 test/pash_tests/readonly.sh create mode 100644 test/pash_tests/redir-var-test.sh create mode 100755 test/pash_tests/redirect.sh create mode 100755 test/pash_tests/redirect_stdin_to.sh create mode 100644 test/pash_tests/redirect_wrapper.sh create mode 100755 test/pash_tests/remote_read.sh create mode 100755 test/pash_tests/remote_write.sh create mode 100644 test/pash_tests/remove_adapter.sh create mode 100755 test/pash_tests/round_trip.sh create mode 100755 test/pash_tests/run_all_benchmarks.sh create mode 100644 test/pash_tests/run_evaluation.sh create mode 100755 test/pash_tests/run_grader.sh create mode 100755 test/pash_tests/run_lda.sh create mode 100755 test/pash_tests/run_tests.sh create mode 100644 test/pash_tests/safe0.sh create mode 100644 test/pash_tests/safe1.sh create mode 100644 test/pash_tests/safe2.sh create mode 100644 test/pash_tests/safe3.sh create mode 100644 test/pash_tests/safe4.sh create mode 100644 test/pash_tests/safe5.sh create mode 100644 test/pash_tests/safe7.sh create mode 100755 test/pash_tests/search.sh create mode 100644 test/pash_tests/sed-test.sh create mode 100755 test/pash_tests/send_emails.sh create mode 100644 test/pash_tests/set-dash-v-x.sh create mode 100755 test/pash_tests/set-diff.sh create mode 100644 test/pash_tests/set-diff_env_test.sh create mode 100644 test/pash_tests/set-e-2.sh create mode 100644 test/pash_tests/set-e-3.sh create mode 100644 test/pash_tests/set-e.sh create mode 100644 test/pash_tests/set-v.sh create mode 100644 test/pash_tests/set.sh create mode 100755 test/pash_tests/set_bug.sh create mode 100755 test/pash_tests/setup-dspash.sh create mode 100755 test/pash_tests/setup.sh create mode 100755 test/pash_tests/shortest-scripts.sh create mode 100644 test/pash_tests/shortest_scripts.sh create mode 100644 test/pash_tests/shortest_scripts_env_test.sh create mode 100755 test/pash_tests/sine.sh create mode 100755 test/pash_tests/sort-opt.sh create mode 100755 test/pash_tests/sort-opt_env.sh create mode 100755 test/pash_tests/sort-sort.sh create mode 100755 test/pash_tests/sort.sh create mode 100644 test/pash_tests/sort_env.sh create mode 100644 test/pash_tests/sort_env_small.sh create mode 100755 test/pash_tests/sort_env_test.sh create mode 100755 test/pash_tests/spell-grep.sh create mode 100644 test/pash_tests/spell.sh create mode 100755 test/pash_tests/spell_env_test.sh create mode 100644 test/pash_tests/split_pcap.sh create mode 100644 test/pash_tests/star-escape.sh create mode 100755 test/pash_tests/suggest-ec2.sh create mode 100755 test/pash_tests/symtab-sha.sh create mode 100755 test/pash_tests/tail.sh create mode 100755 test/pash_tests/tailprogs.sh create mode 100644 test/pash_tests/tee_web_index_bug.sh create mode 100755 test/pash_tests/temp-analytics.sh create mode 100755 test/pash_tests/temp_test.sh create mode 100755 test/pash_tests/test-common.sh create mode 100755 test/pash_tests/test.sh create mode 100755 test/pash_tests/test1.sh create mode 100644 test/pash_tests/tilde.sh create mode 100755 test/pash_tests/to_mp3.sh create mode 100755 test/pash_tests/top-n.sh create mode 100755 test/pash_tests/topn.sh create mode 100644 test/pash_tests/topn_env_test.sh create mode 100644 test/pash_tests/tr-test.sh create mode 100755 test/pash_tests/tr_cs_wc_test.sh create mode 100644 test/pash_tests/trap.sh create mode 100755 test/pash_tests/trigrams.sh create mode 100644 test/pash_tests/trim_primers.sh create mode 100755 test/pash_tests/uniq-c.2.sh create mode 100755 test/pash_tests/uniq.sh create mode 100755 test/pash_tests/unix50.sh create mode 100644 test/pash_tests/unparsing-special-chars.sh create mode 100644 test/pash_tests/unsafe0.sh create mode 100644 test/pash_tests/unsafe1.sh create mode 100644 test/pash_tests/unsafe2.sh create mode 100755 test/pash_tests/update-img.sh create mode 100644 test/pash_tests/var_assgn.sh create mode 100755 test/pash_tests/wait_for_output_and_sigpipe_rest.sh create mode 100755 test/pash_tests/wc.sh create mode 100644 test/pash_tests/web-index-aux.sh create mode 100755 test/pash_tests/web-index.sh create mode 100755 test/pash_tests/wf.sh create mode 100644 test/pash_tests/wf_env_test.sh create mode 100644 test/pash_tests/worker.sh create mode 100755 test/pash_tests/wrap_cat.sh create mode 100755 test/test_ocaml_python.sh rename test/{failing => tests}/aaaa_single (100%) rename test/{failing => tests}/backslash (100%) create mode 100644 test/tests/weird_tilde.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9855fe6..34f46d2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,8 +4,8 @@ on: - pull_request - push -jobs: - build: +jobs: + build-and-test: strategy: fail-fast: false matrix: @@ -13,21 +13,46 @@ jobs: - macos-latest - ubuntu-latest ocaml-compiler: - - 4.11.x - 4.12.x - 4.13.x + - 4.14.x runs-on: ${{ matrix.os }} steps: - - name: Checkout code - uses: actions/checkout@v2 + - name: Checkout code + uses: actions/checkout@v2 - - name: Use OCaml ${{ matrix.ocaml-compiler }} - uses: ocaml/setup-ocaml@v2 - with: - ocaml-compiler: ${{ matrix.ocaml-compiler }} + - name: Use OCaml ${{ matrix.ocaml-compiler }} + uses: avsm/setup-ocaml@v2 + with: + ocaml-compiler: ${{ matrix.ocaml-compiler }} + + - name: Install system dependencies (via OPAM) + run: | + opam pin add libdash . --no-action + opam depext libdash --yes --with-test + + - name: Build dash + run: | + if [ $(uname) = "Darwin" ]; then glibtoolize; else libtoolize; fi + aclocal + autoheader + automake --add-missing + autoconf + ./configure + make + cp src/.libs/dlldash.so python/libdash.so + if [ $(uname) = "Darwin" ]; then cp src/.libs/libdash.dylib python/; fi + + - name: Test Python bindings + run: make -C python test + + - name: Install OCaml bindings + run: opam install . + + - name: Test OCaml bindings + run: opam exec -- make -C ocaml test - - run: opam install . --with-test - - - run: opam exec -- make -C test test + - name: Compare OCaml and Python bindings + run: opam exec -- make -C test test diff --git a/.gitignore b/.gitignore index a510e90..4376351 100644 --- a/.gitignore +++ b/.gitignore @@ -57,7 +57,9 @@ ar-lib config.* src/libdash.a *.lo +*.dylib m4 libtool ltmain.sh ocamlprof.dump +__pycache__ diff --git a/README.md b/README.md index 33303a4..93a9eac 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ *libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. +The OCaml bindings---packaged as the [`libdash` OPAM package](https://opam.ocaml.org/packages/libdash/)---include two executables, `shell_to_json` and `json_to_shell`. + # What are the dependencies? The C code for dash should build on a wide variety of systems. The library may not build on platforms with esoteric linkers; it's been tested on OS X. @@ -16,12 +18,26 @@ You should be able to simply run `docker build -t libdash .` to get a runnable e Install the OPAM file: `opam pin add .` or `opam install .`. This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. +You can test the OCaml bindings by running: + +``` +cd ocaml; make test +``` + +You can test the Python bindings by running: + ``` -cd test; make test +cd python; make test ``` The tests use `test/round_trip.sh` to ensure that every tester file in `test/tests` round-trips correctly through parsing and pretty printing. The OPAM package can be installed with the `-t` flag to run the tests internally; see `ocaml/Makefile`'s testing targets. +Additionally, you can run tests that compare the OCaml and Python implementations: + +``` +cd test; make +``` + # How to use the parser The ideal interface to use is `parsecmd_safe` in `parser.c`. Parsing the POSIX shell is a complicated affair: beyond the usual locale issues, aliases affect the lexer, so one must use `setalias` and `unalias` to manage any aliases that ought to exist. diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..03e504b --- /dev/null +++ b/TODO.md @@ -0,0 +1,5 @@ +- [x] clear out old C stuff +- [ ] get roundtrips correct (fix OCaml pretty printing) +- [ ] correct libdash.so installation (locally) +- [ ] pip setup +- [ ] pash pull request diff --git a/libdash.opam b/libdash.opam index abd25b0..88c0759 100644 --- a/libdash.opam +++ b/libdash.opam @@ -12,6 +12,7 @@ depends: [ "ocamlfind" {>= "1.8.0"} "ctypes" {>= "0.11.5"} "ctypes-foreign" {>= "0.4.0"} + "atdgen" {>= "2.2.1"} "opam-installer" {>= "2.0.0"} "conf-autoconf" {build} "conf-aclocal" {build} diff --git a/mk_dot_install.sh b/mk_dot_install.sh index 0ac9473..d76851d 100755 --- a/mk_dot_install.sh +++ b/mk_dot_install.sh @@ -5,18 +5,21 @@ set -e libdash_files=$(ls _build/lib) bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" -files= +lib_files= for f in ${libdash_files} do - files="${files} \"_build/lib/${f}\"" + lib_files="${lib_files} \"_build/lib/${f}\"" done for f in ${bindings_files} do - files="${files} \"ocaml/${f}\"" + lib_files="${lib_files} \"ocaml/${f}\"" done +bin_files="\"ocaml/shell_to_json\" \"ocaml/json_to_shell\"" + cat >libdash.install <test.err; \ - done - @echo "TESTING test.byte" - @for f in ../test/tests/*; do \ - LD_LIBRARY_PATH=$(BUILD) ../test/round_trip.sh ./test.byte $$f 2>test.err; \ - done - -test.native : test.ml dash.cmxa - LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlopt -g -thread -package threads,str,ctypes,ctypes.foreign -linkpkg -ccopt -L$(BUILD) -ccopt -Wl,-rpath -ccopt -Wl,$(BUILD) -cclib -ldash dash.cmxa test.ml -o $@ - -test.byte : test.ml dash.cmxa - LD_LIBRARY_PATH=$(BUILD) ocamlfind ocamlc -g -thread -package threads,str,ctypes,ctypes.foreign -linkpkg -I $(BUILD) -ccopt -L$(BUILD) -dllpath $(BUILD) -cclib -ldash dash.cma test.ml -o $@ - -test.ml : ../test/test.ml - cp $< $@ + ocamlfind ocamlmklib -g -package str,ctypes,ctypes.foreign -L$(BUILD) -ldash $^ -o dash + +test : $(wildcard ../test/*) + @type json_to_shell >/dev/null 2>&1 && type shell_to_json >/dev/null 2>&1 || $(MAKE) json_to_shell shell_to_json + @find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.sh "$$f"; done | tee ocaml.log + @cat ocaml.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c + @grep ':' ocaml.log && echo "FAILED" && exit 1 || exit 0 clean : rm -f *.o *.cmo *.cmi *.cmx dash.a dash.cma dash.cmxa diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 3105601..b93735c 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -3,25 +3,25 @@ type linno = int exception ParseException of string type t = - | Command of linno * assign list * args * redirection list (* assign, args, redir *) - | Pipe of bool * t list (* background?, commands *) - | Redir of linno * t * redirection list - | Background of linno * t * redirection list - | Subshell of linno * t * redirection list - | And of t * t - | Or of t * t - | Not of t - | Semi of t * t - | If of t * t * t (* cond, then, else *) - | While of t * t (* test, body *) (* until encoded as a While . Not *) - | For of linno * arg * t * string (* args, body, var *) - | Case of linno * arg * case list - | Defun of linno * string * t (* name, body *) + | Command of (linno * assign list * args * redirection list) (* assign, args, redir *) + | Pipe of (bool * t list) (* background?, commands *) + | Redir of (linno * t * redirection list) + | Background of (linno * t * redirection list) + | Subshell of (linno * t * redirection list) + | And of (t * t) + | Or of (t * t) + | Not of (t) + | Semi of (t * t) + | If of (t * t * t) (* cond, then, else *) + | While of (t * t) (* test, body *) (* until encoded as a While . Not *) + | For of (linno * arg list * t * string) (* args, body, var *) + | Case of (linno * arg * case list) + | Defun of (linno * string * t) (* name, body *) and assign = string * arg and redirection = - | File of redir_type * int * arg - | Dup of dup_type * int * arg - | Heredoc of heredoc_type * int * arg + | File of (redir_type * int * arg) + | Dup of (dup_type * int * arg) + | Heredoc of (heredoc_type * int * arg) and redir_type = To | Clobber | From | FromTo | Append and dup_type = ToFD | FromFD and heredoc_type = Here | XHere (* for when in a quote... not sure when this comes up *) @@ -32,7 +32,7 @@ type t = | E of char (* escape... necessary for expansion *) | T of string option (* tilde *) | A of arg (* arith *) - | V of var_type * bool (* VSNUL? *) * string * arg + | V of (var_type * bool (* VSNUL? *) * string * arg) | Q of arg (* quoted *) | B of t (* backquote *) and var_type = @@ -89,6 +89,11 @@ open Ctypes open Foreign open Dash +let rec last = function + | [] -> None + | [x] -> Some x + | x::xs -> last xs + let skip = Command (-1,[],[],[]) let special_chars : char list = explode "|&;<>()$`\\\"'" @@ -138,7 +143,7 @@ let rec of_node (n : node union ptr) : t = | 11 -> let n = n @-> node_nfor in For (getf n nfor_linno, - to_arg (getf n nfor_args @-> node_narg), + to_args (getf n nfor_args), of_node (getf n nfor_body), getf n nfor_var) (* NCASE *) @@ -215,21 +220,21 @@ and of_binary (n : node union ptr) = (of_node (getf n nbinary_ch1), of_node (getf n nbinary_ch2)) and to_arg (n : narg structure) : arg = - let a,s,bqlist,stack = parse_arg (explode (getf n narg_text)) (getf n narg_backquote) [] in + let a,s,bqlist,stack = parse_arg ~tilde_ok:true ~assign:false (explode (getf n narg_text)) (getf n narg_backquote) [] in (* we should have used up the string and have no backquotes left in our list *) assert (s = []); assert (nullptr bqlist); assert (stack = []); a -and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = +and parse_arg ?tilde_ok:(tilde_ok=false) ~assign:(assign:bool) (s : char list) (bqlist : nodelist structure ptr) stack = match s,stack with | [],[] -> [],[],bqlist,[] | [],`CTLVar::_ -> failwith "End of string before CTLENDVAR" | [],`CTLAri::_ -> failwith "End of string before CTLENDARI" | [],`CTLQuo::_ -> failwith "End of string before CTLQUOTEMARK" (* CTLESC *) - | '\129'::c::s,_ -> arg_char (E c) s bqlist stack + | '\129'::c::s,_ -> arg_char assign (E c) s bqlist stack (* CTLVAR *) | '\130'::t::s,_ -> let var_name,s = split_at (fun c -> c = '=') s in @@ -249,14 +254,20 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = failwith ("Missing CTLENDVAR for VSNORMAL/VSLENGTH, found " ^ Char.escaped c) (* every other VSTYPE takes mods before CTLENDVAR *) | vstype,'='::s -> - let a,s,bqlist,stack' = parse_arg s bqlist (`CTLVar::stack) in + let a,s,bqlist,stack' = parse_arg ~tilde_ok:true ~assign s bqlist (`CTLVar::stack) in V (var_type vstype,t land 0x10 = 0x10,implode var_name,a), s, bqlist, stack' | _,c::_ -> failwith ("Expected '=' terminating variable name, found " ^ Char.escaped c) | _,[] -> failwith "Expected '=' terminating variable name, found EOF" in - arg_char v s bqlist stack + arg_char assign v s bqlist stack + | '\130'::s, _ -> + (* original behavior *) + (* raise (ParseException "bad substitution (missing variable name in ${}?") *) + (* ignoring malformed stuff (e.g., from arrays) to behave the same as pash's python bindings *) + let a,s,bqlist,stack = parse_arg ~assign s bqlist stack in + (C '\194'::C '\130'::a,s,bqlist,stack) + (* CTLENDVAR *) - | '\130'::_, _ -> raise (ParseException "bad substitution (missing variable name in ${}?") | '\131'::s,`CTLVar::stack' -> [],s,bqlist,stack' | '\131'::_,`CTLAri::_ -> failwith "Saw CTLENDVAR before CTLENDARI" | '\131'::_,`CTLQuo::_ -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" @@ -265,12 +276,12 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = | '\132'::s,_ -> if nullptr bqlist then failwith "Saw CTLBACKQ but bqlist was null" - else arg_char (B (of_node (bqlist @-> nodelist_n))) s (bqlist @-> nodelist_next) stack + else arg_char assign (B (of_node (bqlist @-> nodelist_n))) s (bqlist @-> nodelist_next) stack (* CTLARI *) | '\134'::s,_ -> - let a,s,bqlist,stack' = parse_arg s bqlist (`CTLAri::stack) in + let a,s,bqlist,stack' = parse_arg ~assign s bqlist (`CTLAri::stack) in assert (stack = stack'); - arg_char (A a) s bqlist stack' + arg_char assign (A a) s bqlist stack' (* CTLENDARI *) | '\135'::s,`CTLAri::stack' -> [],s,bqlist,stack' | '\135'::_,`CTLVar::_' -> failwith "Saw CTLENDARI before CTLENDVAR" @@ -279,48 +290,73 @@ and parse_arg (s : char list) (bqlist : nodelist structure ptr) stack = (* CTLQUOTEMARK *) | '\136'::s,`CTLQuo::stack' -> [],s,bqlist,stack' | '\136'::s,_ -> - let a,s,bqlist,stack' = parse_arg s bqlist (`CTLQuo::stack) in + let a,s,bqlist,stack' = parse_arg ~assign s bqlist (`CTLQuo::stack) in assert (stack' = stack); - arg_char (Q a) s bqlist stack' + arg_char assign (Q a) s bqlist stack' (* tildes *) | '~'::s,stack -> if List.exists (fun m -> m = `CTLQuo || m = `CTLAri) stack then (* we're in arithmetic or double quotes, so tilde is ignored *) - arg_char (C '~') s bqlist stack + arg_char assign (C '~') s bqlist stack else let uname,s' = parse_tilde [] s in - arg_char (T uname) s' bqlist stack + arg_char assign (T uname) s' bqlist stack (* ordinary character *) | c::s,_ -> - arg_char (C c) s bqlist stack + arg_char assign (C c) s bqlist stack -and parse_tilde acc = - let ret = if acc = [] then None else Some (implode acc) in - function - | [] -> (ret , []) - (* CTLESC *) - | '\129'::_ as s -> None, s - (* CTLQUOTEMARK *) - | '\136'::_ as s -> None, s - (* terminal: CTLENDVAR, /, : *) - | '\131'::_ as s -> ret, s - | ':'::_ as s -> ret, s - | '/'::_ as s -> ret, s +and parse_tilde acc s = + match s with + (* CTLESC, CTLVAR, CTLQUOTEMARK, CTLBACKQ, CTLARI: no tilde prefix *) + | '\129'::_ | '\130'::_ | '\132'::_ | '\134'::_ | '\136'::_ -> None, s + (* CTLENDVAR, CTLENDARI, /, :, EOF: terminate tilde prefix *) + | '\131'::_ | '\135'::_ + | ':'::_ | '/'::_ | [] -> + if acc = [] then (None, s) else (Some (implode acc), s) (* ordinary char *) (* TODO 2019-01-03 only characters from the portable character set *) | c::s' -> parse_tilde (acc @ [c]) s' -and arg_char c s bqlist stack = - let a,s,bqlist,stack = parse_arg s bqlist stack in +and arg_char assign c s bqlist stack = + let tilde_ok = + match c with + | C c -> assign && (match last s with + | Some ':' -> true + | _ -> false) + | _ -> false + in + let a,s,bqlist,stack = parse_arg ~tilde_ok ~assign s bqlist stack in (c::a,s,bqlist,stack) -and to_assign v = function - | [] -> failwith ("Never found an '=' sign in assignment, got " ^ implode v) - | C '=' :: a -> (implode v,a) - | C c :: a -> to_assign (v @ [c]) a - | _ -> failwith "Unexpected special character in assignment" +and extract_assign v = function + | [] -> failwith ("Never found an '=' sign in assignment, got " ^ implode (List.rev v)) + | '=' :: a -> (implode (List.rev v),a) + | '\129'::_ -> failwith "Unexpected CTLESC in variable name" + | '\130'::_ -> failwith "Unexpected CTLVAR in variable name" + | '\131'::_ -> failwith "Unexpected CTLENDVAR in variable name" + | '\132'::_ -> failwith "Unexpected CTLBACKQ in variable name" + | '\133'::_ -> failwith "Unexpected CTL??? in variable name" + | '\134'::_ -> failwith "Unexpected CTLARI in variable name" + | '\135'::_ -> failwith "Unexpected CTLENDARI in variable name" + | '\136'::_ -> failwith "Unexpected CTLQUOTEMARK in variable name" + | c :: a -> + extract_assign (c::v) a + +and to_assign (n : narg structure) : (string * arg) = + let (v,t) = extract_assign [] (explode (getf n narg_text)) in + let a,s,bqlist,stack = parse_arg ~tilde_ok:true ~assign:true t (getf n narg_backquote) [] in + (* we should have used up the string and have no backquotes left in our list *) + assert (s = []); + assert (nullptr bqlist); + assert (stack = []); + (v,a) -and to_assigns n = List.map (to_assign []) (to_args n) +and to_assigns n = + if nullptr n + then [] + else (assert (n @-> node_type = 15); + let n = n @-> node_narg in + to_assign n::to_assigns (getf n narg_next)) and to_args (n : node union ptr) : args = if nullptr n @@ -337,7 +373,24 @@ let show_unless expected actual = else string_of_int actual let background s = "{ " ^ s ^ " & }" - + +let lines = Str.split (Str.regexp "[\n]+") + +let fresh_marker heredoc = + let eofs_in_line line = + if String.length line > 2 && String.get line 0 = 'E' && String.get line 1 == 'O' + then + try String.rindex line 'F' - 1 + with Not_found -> 0 + else 0 + in + let rec find_eofs lines max_fs = + match lines with + | [] -> max_fs + | line::lines -> find_eofs lines (max max_fs (eofs_in_line line)) + in + "EOF" ^ String.make (find_eofs heredoc 0) 'F' + let rec to_string = function | Command (_,assigns,cmds,redirs) -> separated string_of_assign assigns ^ @@ -364,17 +417,17 @@ let rec to_string = function background (to_string a ^ string_of_redirs redirs) | Subshell (_,a,redirs) -> parens (to_string a ^ string_of_redirs redirs) - | And (a1,a2) -> to_string a1 ^ " && " ^ to_string a2 - | Or (a1,a2) -> to_string a1 ^ " || " ^ to_string a2 + | And (a1,a2) -> braces (to_string a1) ^ " && " ^ braces (to_string a2) + | Or (a1,a2) -> braces (to_string a1) ^ " || " ^ braces (to_string a2) | Not a -> "! " ^ braces (to_string a) - | Semi (a1,a2) -> to_string a1 ^ " ; " ^ to_string a2 + | Semi (a1,a2) -> braces (to_string a1) ^ " \n " ^ braces (to_string a2) | If (c,t,e) -> string_of_if c t e | While (Not t,b) -> "until " ^ to_string t ^ "; do " ^ to_string b ^ "; done " | While (t,b) -> "while " ^ to_string t ^ "; do " ^ to_string b ^ "; done " | For (_,a,body,var) -> - "for " ^ var ^ " in " ^ string_of_arg a ^ "; do " ^ + "for " ^ var ^ " in " ^ separated string_of_arg a ^ "; do " ^ to_string body ^ "; done" | Case (_,a,cs) -> "case " ^ string_of_arg a ^ " in " ^ @@ -389,33 +442,38 @@ and string_of_if c t e = | If (c,t,e) -> "; el" ^ string_of_if c t e | _ -> "; else " ^ to_string e ^ "; fi") -and string_of_arg_char = function - | E '\'' -> "\\'" - | E '\"' -> "\\\"" - | E '(' -> "\\(" - | E ')' -> "\\)" - | E '{' -> "\\{" - | E '}' -> "\\}" - | E '$' -> "\\$" - | E '!' -> "\\!" - | E '&' -> "\\&" - | E '|' -> "\\|" - | E ';' -> "\\;" +and string_of_arg_char ?quoted:(quoted=false) = function + | E c -> + let chars_to_escape = "'\"`(){}$!&|;" in + let chars_to_escape_when_no_quotes = "*?[]#<>~ " in + if String.contains chars_to_escape c + then "\\" ^ String.make 1 c + else if String.contains chars_to_escape_when_no_quotes c && not quoted + then "\\" ^ String.make 1 c + else Char.escaped c + | C '"' when quoted -> "\\\"" | C c -> String.make 1 c - | E c -> Char.escaped c | T None -> "~" | T (Some u) -> "~" ^ u - | A a -> "$((" ^ string_of_arg a ^ "))" + | A a -> "$((" ^ string_of_arg ~quoted a ^ "))" | V (Length,_,name,_) -> "${#" ^ name ^ "}" | V (vt,nul,name,a) -> - "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg a ^ "}" - | Q a -> "\"" ^ string_of_arg a ^ "\"" + "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg ~quoted a ^ "}" + | Q a -> "\"" ^ string_of_arg ~quoted:true a ^ "\"" | B t -> "$(" ^ to_string t ^ ")" -and string_of_arg = function +and string_of_arg ?quoted:(quoted=false) = function | [] -> "" - | c :: a -> string_of_arg_char c ^ string_of_arg a + | c :: a -> + let char = string_of_arg_char ~quoted c in + if char = "$" && next_is_escaped a + then "\\$" ^ string_of_arg ~quoted a + else char ^ string_of_arg ~quoted a +and next_is_escaped = function + | E _ :: _ -> true + | _ -> false + and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a and string_of_case c = @@ -431,8 +489,8 @@ and string_of_redir = function | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_arg tgt | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_arg tgt | Heredoc (t,fd,a) -> - let heredoc = string_of_arg a in - let marker = fresh_marker (lines heredoc) "EOF" in + let heredoc = string_of_arg ~quoted:true a in + let marker = fresh_marker (lines heredoc) in show_unless 0 fd ^ "<<" ^ (if t = XHere then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker ^ "\n" diff --git a/ocaml/ast.mli b/ocaml/ast.mli index 73725cb..bac58ff 100644 --- a/ocaml/ast.mli +++ b/ocaml/ast.mli @@ -3,25 +3,25 @@ type linno = int exception ParseException of string type t = - Command of linno * assign list * args * redirection list - | Pipe of bool * t list - | Redir of linno * t * redirection list - | Background of linno * t * redirection list - | Subshell of linno * t * redirection list - | And of t * t - | Or of t * t - | Not of t - | Semi of t * t - | If of t * t * t - | While of t * t - | For of linno * arg * t * string - | Case of linno * arg * case list - | Defun of linno * string * t + Command of (linno * assign list * args * redirection list) + | Pipe of (bool * t list) + | Redir of (linno * t * redirection list) + | Background of (linno * t * redirection list) + | Subshell of (linno * t * redirection list) + | And of (t * t) + | Or of (t * t) + | Not of (t) + | Semi of (t * t) + | If of (t * t * t) + | While of (t * t) + | For of (linno * arg list * t * string) + | Case of (linno * arg * case list) + | Defun of (linno * string * t) and assign = string * arg and redirection = - File of redir_type * int * arg - | Dup of dup_type * int * arg - | Heredoc of heredoc_type * int * arg + File of (redir_type * int * arg) + | Dup of (dup_type * int * arg) + | Heredoc of (heredoc_type * int * arg) and redir_type = To | Clobber | From | FromTo | Append and dup_type = ToFD | FromFD and heredoc_type = Here | XHere @@ -32,7 +32,7 @@ and arg_char = | E of char | T of string option | A of arg - | V of var_type * bool * string * arg + | V of (var_type * bool * string * arg) | Q of arg | B of t and var_type = diff --git a/ocaml/ast_atd.atd b/ocaml/ast_atd.atd new file mode 100644 index 0000000..647a1de --- /dev/null +++ b/ocaml/ast_atd.atd @@ -0,0 +1,78 @@ +type char = int + +type linno = int + +type t = [ + Command of (linno * assign list * args * redirection list) (* assign, args, redir *) + | Pipe of (bool * t list) (* background?, commands *) + | Redir of (linno * t * redirection list) + | Background of (linno * t * redirection list) + | Subshell of (linno * t * redirection list) + | And of (t * t) + | Or of (t * t) + | Not of t + | Semi of (t * t) + | If of (t * t * t) (* cond, then, else *) + | While of (t * t) (* test, body *) (* until encoded as a While . Not *) + | For of (linno * arg list * t * string) (* args, body, var *) + | Case of (linno * arg * case list) + | Defun of (linno * string * t) (* name, body *) +] + +type assign = (string * arg) + +type redirection = [ + File of (redir_type * int * arg) + | Dup of (dup_type * int * arg) + | Heredoc of (heredoc_type * int * arg) +] + +type redir_type = [ + To + | Clobber + | From + | FromTo + | Append +] + +type dup_type = [ + ToFD + | FromFD +] + +type heredoc_type = [ + Here + | XHere (* for when in a quote... not sure when this comes up *) +] + +type args = arg list + +type arg = arg_char list + +type arg_char = [ + C of char + | E of char (* escape... necessary for expansion *) + | T of string option (* tilde *) + | A of arg (* arith *) + | V of (var_type * bool (* VSNUL? *) * string * arg) + | Q of arg (* quoted *) + | B of t (* backquote *) +] + +type var_type = [ + Normal + | Minus + | Plus + | Question + | Assign + | TrimR + | TrimRMax + | TrimL + | TrimLMax + | Length +] + +type case = { + cpattern : arg list; + cbody : t +} \ No newline at end of file diff --git a/ocaml/dash.ml b/ocaml/dash.ml index 1283651..ec0f33f 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -401,7 +401,7 @@ let rec show (n : node union ptr) : string = (* NFOR *) | 11 -> let n = n @-> node_nfor in - "for " ^ (getf n nfor_var) ^ " in " ^ sharg (getf n nfor_args @-> node_narg) ^ "; do " ^ show (getf n nfor_body) ^ "; done" + "for " ^ (getf n nfor_var) ^ " in " ^ intercalate " " (List.map sharg (arglist (getf n nfor_args @-> node_narg))) ^ "; do " ^ show (getf n nfor_body) ^ "; done" (* NCASE *) | 12 -> let n = n @-> node_ncase in diff --git a/ocaml/json_to_shell.ml b/ocaml/json_to_shell.ml new file mode 100644 index 0000000..7f41033 --- /dev/null +++ b/ocaml/json_to_shell.ml @@ -0,0 +1,38 @@ +(* This is straight-up copied from the libdash tests *) + +let verbose = ref false +let input_src : string option ref = ref None + +let parse_args () = + Arg.parse + [("-v",Arg.Set verbose,"verbose mode")] + (function | "-" -> input_src := None | f -> input_src := Some f) + "Final argument should be either a filename or empty (for STDIN); only the last such argument is used" + +let read_channel chan = +let lines = ref [] in +try + while true; do + lines := input_line chan :: !lines + done; !lines +with End_of_file -> + close_in chan; + List.rev !lines + +let read_lines () = + match !input_src with + | None -> read_channel stdin + | Some filename -> read_channel (open_in filename) + +let parse_lines () : Ast.t list = + let lines = read_lines () in + List.map (fun line -> Ast_json.t_of_string line) lines + + +let main () = + parse_args (); + let cs = parse_lines () in + List.map (fun c -> print_endline (Ast.to_string c)) cs +;; + +main () diff --git a/ocaml/rt.sh b/ocaml/rt.sh new file mode 100755 index 0000000..7104ffa --- /dev/null +++ b/ocaml/rt.sh @@ -0,0 +1,47 @@ +#!/bin/sh + +: ${SHELL_TO_JSON=shell_to_json} +if ! type shell_to_json >/dev/null 2>&1 +then + SHELL_TO_JSON=$(dirname $0)/$SHELL_TO_JSON +fi + +: ${JSON_TO_SHELL=json_to_shell} +if ! type json_to_shell >/dev/null 2>&1 +then + JSON_TO_SHELL=$(dirname $0)/json_to_shell +fi + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" >&2 + exit 1 +fi + +testFile="$1" + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" >&2 + exit 1 +fi + +json=$(mktemp) + +"$SHELL_TO_JSON" "$testFile" >"$json" +if [ $? -ne 0 ] +then + echo "OCAML_PARSE_ABORT: '$testFile'" >&2 + exit 1 +fi + +rt=$(mktemp) + +"$JSON_TO_SHELL" "$json" >"$rt" +if [ $? -ne 0 ] +then + echo "OCAML_UNPARSE_ABORT: '$testFile' -> '$json'" >&2 + exit 1 +fi + +cat "$rt" diff --git a/test/test.ml b/ocaml/shell_to_json.ml similarity index 84% rename from test/test.ml rename to ocaml/shell_to_json.ml index 518a72e..d170e5c 100644 --- a/test/test.ml +++ b/ocaml/shell_to_json.ml @@ -1,3 +1,5 @@ +(* This is straight-up copied from the libdash tests *) + let verbose = ref false let input_src : string option ref = ref None @@ -8,7 +10,7 @@ let set_input_src () = let parse_args () = Arg.parse - ["-v",Arg.Set verbose,"verbose mode"] + [("-v",Arg.Set verbose,"verbose mode")] (function | "-" -> input_src := None | f -> input_src := Some f) "Final argument should be either a filename or - (for STDIN); only the last such argument is used" @@ -28,12 +30,14 @@ let rec parse_all () : Ast.t list = (* keep calm and carry on *) c::parse_all () +let print_ast c = print_endline (Ast_json.string_of_t c) + let main () = Dash.initialize (); parse_args (); set_input_src (); let cs = parse_all () in - List.map (fun c -> print_endline (Ast.to_string c)) cs + List.map print_ast cs ;; main () diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..e5f78f1 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,4 @@ +*.o +*.so +*.dylib +python.log diff --git a/python/Makefile b/python/Makefile new file mode 100644 index 0000000..3ecb9f3 --- /dev/null +++ b/python/Makefile @@ -0,0 +1,9 @@ +.PHONY: test clean + +test: rt.py ast.py ast2shell.py dash.py parse_to_ast.py + @find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.py "$$f"; done | tee python.log + @cat python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c + @grep ':' python.log && echo "FAILED" && exit 1 || exit 0 + +clean: + rm *.o *.so *.log diff --git a/python/ast.py b/python/ast.py new file mode 100644 index 0000000..8b517a7 --- /dev/null +++ b/python/ast.py @@ -0,0 +1,558 @@ +import os +import sys +from dash import * + +# parser.h +CTLESC = 129 +CTLVAR = 130 +CTLENDVAR = 131 +CTLBACKQ = 132 +CTLARI = 134 +CTLENDARI = 135 +CTLQUOTEMARK = 136 + +# Internal use only +STACK_CTLVAR = 100 +STACK_CTLARI = 101 +STACK_CTLQUO = 102 + + +VAR_TYPES \ + = [ + "Normal", # 0x0 + "UNUSED", + "Minus", # 0x2 + "Plus", # 0x3 + "Question", # 0x4 + "Assign", # 0x5 + "TrimR", # 0x6 + "TrimRMax", # 0x7 + "TrimL", # 0x8 + "TrimLMax", # 0x9 + "Length" # 0xa + ] + + +SKIP_COMMAND = ["Command", [-1, [], [], []]] + +ORD_TILDE = ord ('~') +ORD_EQUALS = ord ('=') +ORD_MINUS = ord ('-') +ORD_COLON = ord (':') +ORD_SLASH = ord ('/') + + +def var_type (i): + return VAR_TYPES [i] + + +# Inline 'list (map (of_node, nodelist (nl)))' +def map_ofnode_nodelist (nl): + snek = [] + + # ctypes has different semantics for POINTER vs. c_void_p + # See https://groups.google.com/g/nzpug/c/5CJxaWjuQro + while (nl): + snek.append (of_node (nl.contents.n)) + nl = nl.contents.next + + return snek + + +def of_node (n_ptr): + if (not n_ptr): + return SKIP_COMMAND + else: + n = n_ptr.contents + +# print ("") +# print ("###" + str (n.type)) +# print ("") + + # 4412 0 NCMD + # 2442 7 NSEMI + # 517 8 NIF + # 255 12 NCASE + # 252 5 NAND + # 152 6 NOR + # 126 11 NFOR + # 119 14 NDEFUN + # 107 1 NPIPE + # 16 4 NSUBSHELL + # 14 9 NWHILE + # 4 2 NREDIR + # 2 10 NUNTIL + + if (n.type == NCMD): + return (["Command", + [n.ncmd.linno, + to_assigns (n.ncmd.assign), + to_args (n.ncmd.args), + redirs (n.ncmd.redirect)]]) + elif (n.type == NSEMI): + return ["Semi", of_binary (n)] + elif (n.type == NIF): + return (["If", + [of_node (n.nif.test), + of_node (n.nif.ifpart), + of_node (n.nif.elsepart)]]) + elif (n.type == NCASE): + cases_hashes = [] # Poetic + + for case in caselist (n.ncase.cases): + (pattern, body) = case + + current_case \ + = {'cpattern' : to_args (pattern), + 'cbody' : of_node (body)} + + cases_hashes.append (current_case) + + return (["Case", + [n.ncase.linno, + to_arg (n.ncase.expr.contents.narg), + cases_hashes]]) + elif (n.type == NAND): + return ["And", of_binary (n)] + elif (n.type == NOR): + return ["Or", of_binary (n)] + elif (n.type == NFOR): + return ["For", + [n.nfor.linno, + to_args (n.nfor.args), + of_node (n.nfor.body), + n.nfor.var.decode ("charmap")]] + elif (n.type == NDEFUN): + return ["Defun", + [n.ndefun.linno, + n.ndefun.text.decode ("charmap"), + of_node (n.ndefun.body)]] + elif (n.type == NPIPE): + return (["Pipe", + [n.npipe.backgnd != 0, + map_ofnode_nodelist (n.npipe.cmdlist)]]) + # list (map (of_node, nodelist (n.npipe.cmdlist)))]]) + elif (n.type == NSUBSHELL): + return ["Subshell", of_nredir (n)] + elif (n.type == NWHILE): + return ["While", of_binary (n)] + elif (n.type == NREDIR): + return ["Redir", of_nredir (n)] + elif (n.type == NUNTIL): + (t, b) = of_binary (n) + return ["While", [["Not", t], b]] + + elif (n.type == NBACKGND): + return ["Background", of_nredir (n)] + elif (n.type == NNOT): + return ["Not", of_node (n.nnot.com)] + else: + print ("Unexpected type") + sys.stdout.flush () + os.abort () + + +def of_nredir (n): + return ([n.nredir.linno, of_node (n.nredir.n), redirs (n.nredir.redirect)]) + + +def mk_file (ty, n): + arg = to_arg (n.nfile.fname.contents.narg) + + return ["File", [ty, n.nfile.fd, arg]] + + +def mk_dup (ty, n): + ndup = n.ndup + vname = ndup.vname + + tgt = [] + + if (not vname): + dupfd = ndup.dupfd + if (dupfd == -1): + tgt.append (["C", ORD_MINUS]) + else: + dupfd_str = str (dupfd) + + for i in range (len (dupfd_str)): + tgt.append (["C", ord (dupfd_str [i])]) + else: + tgt = to_arg (vname.contents.narg) + + return (["Dup", [ty, ndup.fd, tgt]]) + + +def mk_here (ty, n): + return ["Heredoc", [ty, n.nhere.fd, to_arg (n.nhere.doc.contents.narg)]] + + +def redirs (n_ptr): + rlist = [] + + while (n_ptr): + h = [] + + n = n_ptr.contents + + if (n.type == NTO): + h = mk_file ("To", n) + elif (n.type == NCLOBBER): + h = mk_file ("Clobber", n) + elif (n.type == NFROM): + h = mk_file ("From", n) + elif (n.type == NFROMTO): + h = mk_file ("FromTo", n) + elif (n.type == NAPPEND): + h = mk_file ("Append", n) + elif (n.type == NTOFD): + h = mk_dup ("ToFD", n) + elif (n.type == NFROMFD): + h = mk_dup ("FromFD", n) + elif (n.type == NHERE): + h = mk_here ("Here", n) + elif (n.type == NXHERE): + h = mk_here ("XHere", n) + else: + print ("unexpected node_type in redirlist") + os.abort () + + rlist.append (h) + + n_ptr = n.nfile.next + + return rlist + + +def of_binary (n): + return [of_node (n.nbinary.ch1), of_node (n.nbinary.ch2)] + + +def to_arg (narg): + s = explode_rev (narg.text) + bqlist = narg.backquote + stack = [] + + a = parse_arg (s, bqlist, stack) + + assert (len (s) == 0) + # assert (nullptr bqlist) +# if (bqlist): +# print ("bqlist is not null") +# print (bqlist) +# os.abort () + assert (len (stack) == 0) + + return (a) + + +def parse_arg (s, bqlist, stack): + acc = [] + + while (True): + s_len = len (s) + # stack_len = len (stack) + + # | [],[] -> [],[],bqlist,[] + if ((s_len == 0) and (len (stack) == 0)): + return (acc) + # | [],`CTLVar::_ -> failwith "End of string before CTLENDVAR" + + elif (s_len == 0): # We know that len (stack) > 0! + if (stack [-1] == STACK_CTLVAR): + print ("End of string before CTLENDVAR") + os.abort () + # | [],`CTLAri::_ -> failwith "End of string before CTLENDARI" + elif (stack [-1] == STACK_CTLARI): + print (s) + print (stack) + + print ("End of string before CTLENDARI") + os.abort () + # | [],`CTLQuo::_ -> failwith "End of string before CTLQUOTEMARK" + elif (stack [-1] == STACK_CTLQUO): + print (s) + print (stack) + + print ("End of string before CTLENDQUOTEMARK") + os.abort () + else: + print ("Invalid stack") + os.abort () + + else: # We know that len (s) > 0 + # (* CTLESC *) + # | '\129'::c::s,_ -> arg_char (E c) s bqlist stack + if ((s_len >= 2) and (s [-1] == CTLESC)): + s.pop () + c = s.pop () + + acc.append (["E", c]) + + # (* CTLVAR *) + # | '\130'::t::s,_ -> + elif ((s_len >= 2) and (s [-1] == CTLVAR)): + s.pop () + t = s.pop () + + # let var_name,s = split_at (fun c -> c = '=') s in + var_name = "" + while ((len (s) > 0) and (s [-1] != ORD_EQUALS)): + c = s.pop () + var_name = var_name + chr (c) + + v = [] + + if (((t & 0xf) == 0x1) and (len (s) >= 1) and (s [-1] == ORD_EQUALS)): + s.pop () + + v = ["V", ["Normal", False, var_name, []]] + elif (((t & 0xf) == 0xa) and (len (s) >= 2) and (s [-1] == ORD_EQUALS) and (s [-2] == 131)): + s.pop () + s.pop () + + v = ["V", ["Length", False, var_name, []]] + elif ((((t & 0xf) == 0x1) or ((t & 0xf) == 0xa)) and (len (s) >= 1)): + print ("Missing CTLENDVAR for VSNORMAL/VSLENGTH") + os.abort () + elif ((len (s) >= 1) and (s [-1] == ORD_EQUALS)): + s.pop () + + vstype = t & 0xf + + stack.append (STACK_CTLVAR) + + a = parse_arg (s, bqlist, stack) + + v = ["V", [var_type (vstype), (t & 0x10 == 0x10), var_name, a]] + elif (len (s) >= 1): + print (s) + print (stack) + + print ("Expected '=' terminating variable name") + os.abort () + elif (len (s) == 0): + print ("Expected '=' terminating variable name, found EOF") + os.abort () + else: + print ("This shouldn't be reachable") + os.abort () + + acc.append (v) + + # | '\130'::_, _ -> raise (ParseException "bad substitution (missing variable name in ${}?") + elif (False and (s [-1] == CTLVAR)): # Disable to match PaSH's version of libdash + print (s) + print (stack) + + print ("bad substitution (missing variable name in ${}?") + os.abort () + + # (* CTLENDVAR *) + # | '\131'::s,`CTLVar::stack' -> [],s,bqlist,stack' + elif (s [-1] == CTLENDVAR): + if (len (stack) >= 1): + if (stack [-1] == STACK_CTLVAR): + s.pop () + stack.pop () + + return (acc) + # | '\131'::_,`CTLAri::_ -> failwith "Saw CTLENDVAR before CTLENDARI" + elif (stack [-1] == STACK_CTLARI): + print ("Saw CTLENDVAR before CTLENDARI") + os.abort () + # | '\131'::_,`CTLQuo::_ -> failwith "Saw CTLENDVAR before CTLQUOTEMARK" + elif (stack [-1] == STACK_CTLQUO): + print ("Saw CTLENDVAR before CTLQUOTEMARK") + os.abort () + # | '\131'::_,[] -> failwith "Saw CTLENDVAR outside of CTLVAR" + else: + print ("Saw CTLENDVAR outside of CTLVAR") + os.abort () + + # (* CTLBACKQ *) + # | '\132'::s,_ -> + elif (s [-1] == CTLBACKQ): + s.pop () + + if (not bqlist): + print (bqlist) + print ("Saw CTLBACKQ but bqlist was null") + os.abort () + else: + acc.append (["B", of_node (bqlist.contents.n)]) + + bqlist = bqlist.contents.next + + # (* CTLARI *) + # | '\134'::s,_ -> + elif (s [-1] == CTLARI): + s.pop () + + stack.append (STACK_CTLARI) + + a = parse_arg (s, bqlist, stack) + + # TODO: assert (stack = stack') + + acc.append (["A", a]) + + # (* CTLENDARI *) + # | '\135'::s,`CTLAri::stack' -> [],s,bqlist,stack' + elif (s [-1] == CTLENDARI): + if (len (stack) >= 1): + if (stack [-1] == STACK_CTLARI): + s.pop () + stack.pop () + + return (acc) + # | '\135'::_,`CTLVar::_' -> failwith "Saw CTLENDARI before CTLENDVAR" + elif (stack [-1] == STACK_CTLVAR): + print ("Saw CTLENDARI before CTLENDVAR") + os.abort () + # | '\135'::_,`CTLQuo::_' -> failwith "Saw CTLENDARI before CTLQUOTEMARK" + elif (stack [-1] == STACK_CTLQUO): + print ("Saw CTLENDARI before CTLQUOTEMARK") + os.abort () + # | '\135'::_,[] -> failwith "Saw CTLENDARI outside of CTLARI" + else: + print ("Saw CTLENDARI outside of CTLARI") + os.abort () + + # (* CTLQUOTEMARK *) + # | '\136'::s,`CTLQuo::stack' -> [],s,bqlist,stack' + elif (s [-1] == CTLQUOTEMARK): + if ((len (stack) >= 1) and (stack [-1] == STACK_CTLQUO)): + s.pop () + stack.pop () + + return (acc) + # | '\136'::s,_ -> + else: + s.pop () + stack.append (STACK_CTLQUO) + + a = parse_arg (s, bqlist, stack) + + acc.append (["Q", a]) + + # (* tildes *) + # | '~'::s,stack -> + elif (s [-1] == ORD_TILDE): + s.pop () + + if ((STACK_CTLQUO in stack) or (STACK_CTLARI in stack)): + acc.append (["C", ORD_TILDE]) + else: + uname = parse_tilde (s) + + acc.append (["T", uname]) + + # (* ordinary character *) + # | c::s,_ -> arg_char (C c) s bqlist stack + else: + c = s.pop () + + acc.append (["C", c]) + + +def stringOrNull (acc_str): + if (acc_str == ""): + return "None" + else: + return ["Some", acc_str] + + +def parse_tilde (s): + acc_str = "" + + while (True): + if (s == []): + return stringOrNull (acc_str) + else: + s_last = s [-1] + + if (s_last in [CTLESC, CTLVAR, CTLQUOTEMARK, CTLBACKQ, CTLARI]): + return ("None") + elif (s_last in [CTLENDVAR, CTLENDARI, ORD_COLON, ORD_SLASH]): + return (stringOrNull (acc_str)) + else: + c = s.pop () + acc_str = acc_str + chr (c) + + +def to_assign (a_rev): + v_str = "" + + while (len (a_rev) > 0): + if (a_rev [-1][0] != 'C'): + print ("Unexpected special character in assignment") + sys.stdout.flush () + os.abort () + + if (a_rev [-1][1] == ORD_EQUALS): + a_rev.pop () + + a_rev.reverse () + return (v_str, a_rev) + + # return (v_str, reversed (a_rev)) + else: + c = a_rev [-1][1] + a_rev.pop () + + v_str = v_str + chr (c) + + print ("Never found an '=' sign in assignment") + os.abort () + + +# Inlined to_args +# to_assigns n = List.map (to_assign []) (to_args n) +def to_assigns (n): + assigns = [] + + while (n): + if (n.contents.type != NARG): + print ("Unexpected type: " + n.contents.type) + sys.stdout.flush () + os.abort () + + arg = to_arg (n.contents.narg) + + arg.reverse () + assigns.append (to_assign (arg)) + + n = n.contents.narg.next + + return (assigns) + + +# to_assigns n = List.map (to_assign []) (to_args n) +def to_assigns_classic (n): + assigns = [] + + for a in (to_args (n)): + a.reverse () + assigns.append (to_assign (a)) + + return (assigns) + + +def to_args (n): + snek = [] + + # ctypes has different semantics for POINTER vs. c_void_p + # See https://groups.google.com/g/nzpug/c/5CJxaWjuQro + while (n): + if (n.contents.type != NARG): + print ("Unexpected type: " + n.contents.type) + sys.stdout.flush () + os.abort () + + arg = to_arg (n.contents.narg) + snek.append (arg) + + n = n.contents.narg.next + + return snek diff --git a/python/ast2shell.py b/python/ast2shell.py new file mode 100644 index 0000000..998d978 --- /dev/null +++ b/python/ast2shell.py @@ -0,0 +1,562 @@ +#!/usr/bin/python3 + + +import os; +# from os import abort; + + +STRING_OF_VAR_TYPE_DICT = { + "Normal" : "", + "Minus" : "-", + "Plus" : "+", + "Question" : "?", + "Assign" : "=", + "TrimR" : "%", + "TrimRMax" : "%%", + "TrimL" : "#", + "TrimLMax" : "##", + "Length" : "#" +}; + + +# dash.ml +# +# let rec intercalate p ss = +# match ss with +# | [] -> "" +# | [s] -> s +# | s::ss -> s ^ p ^ intercalate p ss +def intercalate (p, ss): + str = p.join (ss); + +# str = ""; +# +# i = 0; +# for s in ss: +# if (i > 0): +# str = str + p; +# +# str = str + s; +# +# i = i + 1; + + return (str); + + +# dash.ml +# +# let braces s = "{ " ^ s ^ " ; }" +def braces (s): + return "{ " + s + " ; }"; + + +# dash.ml +# +# let parens s = "( " ^ s ^ " )" +def parens (s): + return "( " + s + " )"; + + +# let string_of_var_type = function +# | Normal -> "" +# | Minus -> "-" +# | Plus -> "+" +# | Question -> "?" +# | Assign -> "=" +# | TrimR -> "%" +# | TrimRMax -> "%%" +# | TrimL -> "#" +# | TrimLMax -> "##" +# | Length -> "#" +def string_of_var_type (var_type): + if (var_type in STRING_OF_VAR_TYPE_DICT): + return (STRING_OF_VAR_TYPE_DICT [var_type]); + + exit (1); + + +# let separated f l = intercalate " " (List.map f l) +def separated (f, l): + return " ".join (map (f, l)); + + +# let show_unless expected actual = +# if expected = actual +# then "" +# else string_of_int actual +def show_unless (expected, actual): + if (expected == actual): + return ""; + else: + return (str (actual)); + + +# let background s = "{ " ^ s ^ " & }" +def background (s): + return ("{ " + s + " & }"); + + +# let rec to_string = function +# | Command (_,assigns,cmds,redirs) -> +# separated string_of_assign assigns ^ +# (if List.length assigns = 0 || List.length cmds = 0 then "" else " ") ^ +# separated string_of_arg cmds ^ string_of_redirs redirs +# | Pipe (bg,ps) -> +# let p = intercalate " | " (List.map to_string ps) in +# if bg then background p else p +# | Redir (_,a,redirs) -> +# to_string a ^ string_of_redirs redirs +# | Background (_,a,redirs) -> +# (* we translate +# cmds... & +# to +# { cmds & } +# this avoids issues with parsing; in particular, +# cmd1 & ; cmd2 & ; cmd3 +# doesn't parse; it must be: +# cmd1 & cmd2 & cmd3 +# it's a little too annoying to track "was the last thing +# backgrounded?" so the braces resolve the issue. testing +# indicates that they're semantically equivalent. +# *) +# background (to_string a ^ string_of_redirs redirs) +# | Subshell (_,a,redirs) -> +# parens (to_string a ^ string_of_redirs redirs) +# | And (a1,a2) -> to_string a1 ^ " && " ^ to_string a2 +# | Or (a1,a2) -> to_string a1 ^ " || " ^ to_string a2 +# | Not a -> "! " ^ braces (to_string a) +# | Semi (a1,a2) -> to_string a1 ^ " ; " ^ to_string a2 +# | If (c,t,e) -> string_of_if c t e +# | While (Not t,b) -> +# "until " ^ to_string t ^ "; do " ^ to_string b ^ "; done " +# | While (t,b) -> +# "while " ^ to_string t ^ "; do " ^ to_string b ^ "; done " +# | For (_,a,body,var) -> +# "for " ^ var ^ " in " ^ string_of_arg a ^ "; do " ^ +# to_string body ^ "; done" +# | Case (_,a,cs) -> +# "case " ^ string_of_arg a ^ " in " ^ +# separated string_of_case cs ^ " esac" +# | Defun (_,name,body) -> name ^ "() {\n" ^ to_string body ^ "\n}" +def to_string (ast): + # print (ast); + + if (len (ast) == 0): + pass; + else: + (type, params) = ast; + + if (type == "Command"): + (_, assigns, cmds, redirs) = params; + str = separated (string_of_assign, assigns); + if ((len (assigns) == 0) or (len (cmds) == 0)): + pass; + else: + str += " "; + str += separated (string_of_arg, cmds) + string_of_redirs (redirs); + + return (str); + elif (type == "Pipe"): + (bg, ps) = params; + p = intercalate (" | ", (map (to_string, ps))); + + if (bg): + return (background (p)); + else: + return (p); + elif (type == "Redir"): + (_, a, redirs) = params; + + return to_string (a) + string_of_redirs (redirs); + elif (type == "Background"): + (_, a, redirs) = params; + + return background (to_string (a) + string_of_redirs (redirs)); + elif (type == "Subshell"): + (_, a, redirs) = params; + + return parens (to_string (a) + string_of_redirs (redirs)); + elif (type == "And"): + (a1, a2) = params + + return braces(to_string(a1)) + " && " + braces(to_string(a2)) + elif (type == "Or"): + (a1, a2) = params + + return braces(to_string(a1)) + " || " + braces(to_string(a2)) + elif (type == "Not"): + (a) = params + + return "! " + braces(to_string(a)) + elif (type == "Semi"): + (a1, a2) = params + + return braces(to_string(a1)) + " \n " + braces(to_string(a2)) + elif (type == "If"): + (c, t, e) = params; + return string_of_if (c, t, e); + elif (type == "While"): + (first, b) = params; + + if (first [0] == "Not"): + (_, t) = first; + + return "until " + to_string (t) + "; do " + to_string (b) + "; done "; + else: + t = first; + + return "while " + to_string (t) + "; do " + to_string (b) + "; done "; + elif (type == "For"): + (_, a, body, var) = params; + + return "for " + var + " in " + separated (string_of_arg, a) + "; do " + \ + to_string (body) + "; done"; + elif (type == "Case"): + (_, a, cs) = params; + + return "case " + string_of_arg (a) + " in " + \ + separated (string_of_case, cs) + " esac"; + abort (); + elif (type == "Defun"): + (_, name, body) = params; + + return name + "() {\n" + to_string (body) + "\n}"; + else: + print ("Invalid type: %s" % type); + abort (); + + +# and string_of_if c t e = +# "if " ^ to_string c ^ +# "; then " ^ to_string t ^ +# (match e with +# | Command (-1,[],[],[]) -> "; fi" (* one-armed if *) +# | If (c,t,e) -> "; el" ^ string_of_if c t e +# | _ -> "; else " ^ to_string e ^ "; fi") +def string_of_if (c, t, e): + str1 = "if " + to_string (c) + \ + "; then " + to_string (t); + + # ['Command', [-1, [], [], []]] + if ( (len (e) == 2) \ + and (e [0] == "Command") \ + and (len (e [1]) == 4) \ + and (e [1][0] == -1)) \ + and (len (e [1][1]) == 0) \ + and (len (e [1][2]) == 0) \ + and (len (e [1][3]) == 0): + str1 = str1 + "; fi"; + elif ( e [0] == "If" \ + and (len (e [1]) == 3)): + (c2, t2, e2) = e [1]; + + str1 += "; el" + string_of_if (c2, t2, e2); + else: + str1 += "; else " + to_string (e) + "; fi"; + + return (str1); + + +# https://github.com/ocaml/ocaml/blob/trunk/stdlib/char.ml +# let escaped = function +# | '\'' -> "\\'" +# | '\\' -> "\\\\" +# | '\n' -> "\\n" +# | '\t' -> "\\t" +# | '\r' -> "\\r" +# | '\b' -> "\\b" +# | ' ' .. '~' as c -> +# let s = bytes_create 1 in +# bytes_unsafe_set s 0 c; +# unsafe_to_string s +# | c -> +# let n = code c in +# let s = bytes_create 4 in +# bytes_unsafe_set s 0 '\\'; +# bytes_unsafe_set s 1 (unsafe_chr (48 + n / 100)); +# bytes_unsafe_set s 2 (unsafe_chr (48 + (n / 10) mod 10)); +# bytes_unsafe_set s 3 (unsafe_chr (48 + n mod 10)); +# unsafe_to_string s +def escaped (param): + char = chr (param) + + if (char == "'"): + return "\\'"; + elif (char == "\\"): + return "\\\\"; + elif (char == "\n"): + return "\\n"; + elif (char == "\t"): + return "\\t"; + elif (char == "\r"): + return "\\r"; + elif (char == "\b"): + return "\\b"; + elif ((param >= ord (' ')) and (param <= ord ('~'))): + return char; + else: +# str1 = "\\" \ +# + chr (48 + int (param / 100)) \ +# + chr (48 + ((int (param / 10)) % 10)) \ +# + chr (48 + (param % 10)); + return ("\\" + str (param)); + + +# and string_of_arg_char = function +# | E '\'' -> "\\'" +# | E '\"' -> "\\\"" +# | E '(' -> "\\(" +# | E ')' -> "\\)" +# | E '{' -> "\\{" +# | E '}' -> "\\}" +# | E '$' -> "\\$" +# | E '!' -> "\\!" +# | E '&' -> "\\&" +# | E '|' -> "\\|" +# | E ';' -> "\\;" +# | C c -> String.make 1 c +# | E c -> Char.escaped c +# | T None -> "~" +# | T (Some u) -> "~" ^ u +# | A a -> "$((" ^ string_of_arg a ^ "))" +# | V (Length,_,name,_) -> "${#" ^ name ^ "}" +# | V (vt,nul,name,a) -> +# "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg a ^ "}" +# | Q a -> "\"" ^ string_of_arg a ^ "\"" +# | B t -> "$(" ^ to_string t ^ ")" +def string_of_arg_char (c, is_quoted=False): + (type, param) = c; + + if (type == "E"): + char = chr (param); + + ## MMG 2021-09-20 It might be safe to move everything except for " in the second list, but no need to do it if the tests pass + ## Chars to escape unconditionally + chars_to_escape = ["'", '"', '`', '(', ')', '{', '}', '$', '!', '&', '|', ';'] + ## Chars to escape only when not quoted + chars_to_escape_when_no_quotes = ['*', '?', '[', ']', '#', '<', '>', '~', ' '] + if char in chars_to_escape: + return '\\' + char + elif char in chars_to_escape_when_no_quotes and not is_quoted: + return '\\' + char + else: + return escaped (param) + elif (type == "C"): + if chr(param) == '"': + return '\\"' + else: + return chr (param); + elif (type == "T"): + if (param == "None"): + return "~"; + elif (len (param) == 2): + if (param [0] == "Some"): + (_, u) = param; + + return "~" + u; + else: + abort (); + else: + print ("Unexpected param for T: %s" % param); + abort (); + elif (type == "A"): + return "$((" + string_of_arg (param, is_quoted) + "))"; + elif (type == "V"): + assert (len (param) == 4); + if (param [0] == "Length"): + (_, _, name, _) = param; + return "${#" + name + "}"; + else: + (vt, nul, name, a) = param; + + stri = "${" + name; + + # Depending on who generated the JSON, nul may be + # a string or a boolean! In Python, non-empty strings + # to True. + if (str (nul).lower () == "true"): + stri += ":"; + elif (str (nul).lower () == "false"): + pass; + else: + os.abort (); # For my own sanity + + stri += string_of_var_type (vt) + string_of_arg (a, is_quoted) + "}"; + + return stri; + elif (type == "Q"): + return "\"" + string_of_arg (param, is_quoted=True) + "\""; + elif (type == "B"): + return "$(" + to_string (param) + ")"; + else: + abort (); + + +# and string_of_arg = function +# | [] -> "" +# | c :: a -> string_of_arg_char c ^ string_of_arg a +def string_of_arg (args, is_quoted=False): + # print (args); + + i = 0 + text = [] + while i < len(args): + c = string_of_arg_char(args[i], is_quoted) + + # dash will parse '$?' as + # [(C, '$'), (E, '?')] + # but we don't normally want to escape ? + # + # so we check up after the fact: if the character after $ is escaped, + # we'll escape the $, too + if c == "$" and (i+1 < len(args)) and args[i+1][0] == "E": + c = "\\$" + text.append(c) + + i = i+1 + + text = "".join(text) + + return (text); + + +# and string_of_assign (v,a) = v ^ "=" ^ string_of_arg a +def string_of_assign (both): + (v, a) = both; + return v + "=" + string_of_arg (a); + + +# and string_of_case c = +# let pats = List.map string_of_arg c.cpattern in +# intercalate "|" pats ^ ") " ^ to_string c.cbody ^ ";;" +def string_of_case (c): + pats = map (string_of_arg, c ['cpattern']); + + return intercalate ("|", pats) + ") " + to_string (c ['cbody']) + ";;"; + + + +# let rec fresh_marker ls s = +# if List.mem s ls +# then fresh_marker ls (s ^ (String.sub s (String.length s - 1) 1)) +# else s +# +# OCaml implementation above is O(n^1.5). Algorithm below is linear. +def fresh_marker (heredoc): + respectsFound = set(); + + for line in heredoc.split ('\n'): + respects = 0; + + if ((len (line) > 2) and (line [0] == 'E') and (line [1] == 'O')): + for i in range (2, len (line)): + if (line [i] == 'F'): + respects = i - 2; + + respectsFound.add(respects); + + i = 0; + while (True): + if (not (i in respectsFound)): + return "EOF" + ("F" * i); + + i = i + 1; + + +# This version may give an unnecessarily long EOFFFF... (and therefore won't +# match the OCaml output but it is still correct w.r.t. giving a fresh +# marker, and uses less memory than fresh_marker above). +def fresh_marker0 (heredoc): + maxRespects = 0; + + for line in heredoc.split ('\n'): + respects = 0; + + if ((len (line) > 2) and (line [0] == 'E') and (line [1] == 'O')): + for i in range (2, len (line)): + if (line [i] == 'F'): + respects = i - 1; + + maxRespects = max (maxRespects, respects); + + return "EOF" + ("F" * maxRespects); + + +# and string_of_redir = function +# | File (To,fd,a) -> show_unless 1 fd ^ ">" ^ string_of_arg a +# | File (Clobber,fd,a) -> show_unless 1 fd ^ ">|" ^ string_of_arg a +# | File (From,fd,a) -> show_unless 0 fd ^ "<" ^ string_of_arg a +# | File (FromTo,fd,a) -> show_unless 0 fd ^ "<>" ^ string_of_arg a +# | File (Append,fd,a) -> show_unless 1 fd ^ ">>" ^ string_of_arg a + +# | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_arg tgt +# | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_arg tgt +# | Heredoc (t,fd,a) -> +# let heredoc = string_of_arg a in +# let marker = fresh_marker (lines heredoc) "EOF" in +# show_unless 0 fd ^ "<<" ^ +# (if t = XHere then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker ^ "\n" +def string_of_redir (redir): + assert (len (redir) == 2); + + (type, params) = redir; + if (type == "File"): + (subtype, fd, a) = params; + if (subtype == "To"): + return (show_unless (1, fd) + ">" + string_of_arg (a)); + elif (subtype == "Clobber"): + return (show_unless (1, fd) + ">|" + string_of_arg (a)); + elif (subtype == "From"): + return (show_unless (0, fd) + "<" + string_of_arg (a)); + elif (subtype == "FromTo"): + return (show_unless (0, fd) + "<>" + string_of_arg (a)); + elif (subtype == "Append"): + return (show_unless (1, fd) + ">>" + string_of_arg (a)); + else: + abort (); + elif (type == "Dup"): + (subtype, fd, tgt) = params; + + if (subtype == "ToFD"): + return (show_unless (1, fd) + ">&" + string_of_arg (tgt)); + elif (subtype == "FromFD"): + return (show_unless (0, fd) + "<&" + string_of_arg (tgt)); + else: + abort (); + elif (type == "Heredoc"): + (t, fd, a) = params; + + heredoc = string_of_arg (a, is_quoted = True); + marker = fresh_marker0 (heredoc); + + stri = show_unless (0, fd) + "<<"; + if (t == "XHere"): + stri += marker; + else: + stri += "'" + marker + "'"; + + stri += "\n" + heredoc + marker + "\n"; + + return (stri); + else: + print ("Invalid type: %s" % type); + abort (); + + +# and string_of_redirs rs = +# let ss = List.map string_of_redir rs in +# (if List.length ss > 0 then " " else "") ^ intercalate " " ss +def string_of_redirs (rs): +# if (rs == []): +# return ""; +# +# ss = map (string_of_redir, rs); +# +# return intercalate (" ", ss); + + str = ""; + + for redir in rs: + str = str + " " + string_of_redir (redir); + + return (str); diff --git a/python/dash.py b/python/dash.py new file mode 100644 index 0000000..bb15190 --- /dev/null +++ b/python/dash.py @@ -0,0 +1,335 @@ +from ctypes import * + + +# nodes.h +NCMD = 0; +NPIPE = 1; +NREDIR = 2; +NBACKGND = 3; +NSUBSHELL = 4; +NAND = 5; +NOR = 6; +NSEMI = 7; +NIF = 8; +NWHILE = 9; +NUNTIL = 10; +NFOR = 11; +NCASE = 12; +NCLIST = 13; +NDEFUN = 14; +NARG = 15; +NTO = 16; +NCLOBBER = 17; +NFROM = 18; +NFROMTO = 19; +NAPPEND = 20; +NTOFD = 21; +NFROMFD = 22; +NHERE = 23; +NXHERE = 24; +NNOT = 25; + + +# struct stackmark { +# struct stack_block *stackp; +# char *stacknxt; +# size_t stacknleft; +# }; +# +# We only care about getting the struct size correct, not the contents. +class stackmark (Structure): + _fields_ = [("stackp", c_void_p), + ("nxt", c_void_p), + ("size", c_size_t)]; + +def init_stack (libdash): + stack = create_string_buffer (sizeof (stackmark)); + + libdash.setstackmark.argtypes = [c_void_p]; # Pretend we don't know the contents + libdash.setstackmark.restypes = None; + libdash.setstackmark (stack); + + return (stack); + +def pop_stack (libdash, smark): + # Inefficient, we should only initialize once + + libdash.popstackmark.argtypes = [c_void_p]; # Again, hide the contents + libdash.popstackmark.restype = None; + + return (libdash.popstackmark (smark)); + + +def dash_init (libdash): + libdash.init.argtypes = []; + libdash.init.restype = None; + + libdash.init (); + + +def initialize_dash_errno (libdash): + libdash.initialize_dash_errno.argtypes = []; + libdash.initialize_dash_errno.restype = None; + + libdash.initialize_dash_errno (); + + +def initialize (libdash): + initialize_dash_errno (libdash); + dash_init (libdash); + + +def setinputtostdin (libdash): + libdash.setinputfd.argtypes = [c_int, c_int]; + libdash.setinputfd.restype = None; + + libdash.setinputfd (0, 0); + + +# TODO: allow push parameter +def setinputfile (libdash, filename): + libdash.setinputfile.argtypes = [c_char_p, c_int]; + libdash.setinputfile.restypes = c_int; + libdash.setinputfile (filename.encode ('utf-8'), 0); + + +def parsecmd_safe (libdash, interactive): + libdash.parsecmd_safe.argtypes = [c_int]; + libdash.parsecmd_safe.restype = c_void_p; + + return (libdash.parsecmd_safe (int (interactive))); + + +# Forward declarations to break recursive dependencies +class union_node (Union): + pass; + +class nodelist (Structure): + pass; + + +class ncmd (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("assign", POINTER (union_node)), + ("args", POINTER (union_node)), + ("redirect", POINTER (union_node))]; + +class npipe (Structure): + _fields_ = [("type", c_int), + ("backgnd", c_int), + ("cmdlist", POINTER (nodelist))]; + +class nredir (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("n", POINTER (union_node)), + ("redirect", POINTER (union_node))]; + +class nbinary (Structure): + _fields_ = [("type", c_int), + ("ch1", POINTER (union_node)), + ("ch2", POINTER (union_node))]; + +class nif (Structure): + _fields_ = [("type", c_int), + ("test", POINTER (union_node)), + ("ifpart", POINTER (union_node)), + ("elsepart", POINTER (union_node))]; + +class nfor (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("args", POINTER (union_node)), + ("body", POINTER (union_node)), + ("var", c_char_p)]; + +class ncase (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("expr", POINTER (union_node)), + ("cases", POINTER (union_node))]; + +class nclist (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("pattern", POINTER (union_node)), + ("body", POINTER (union_node))]; + +class ndefun (Structure): + _fields_ = [("type", c_int), + ("linno", c_int), + ("text", c_char_p), + ("body", POINTER (union_node))]; + +class narg (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("text", c_char_p), + ("backquote", POINTER (nodelist))]; + +class nfile (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("fd", c_int), + ("fname", POINTER (union_node)), + ("expfname", c_char_p)] + +class ndup (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("fd", c_int), + ("dupfd", c_int), + ("vname", POINTER (union_node))]; + +class nhere (Structure): + _fields_ = [("type", c_int), + ("next", POINTER (union_node)), + ("fd", c_int), + ("doc", POINTER (union_node))]; + +class nnot (Structure): + _fields_ = [("type", c_int), + ("com", POINTER (union_node))]; + + +nodelist._fields_ = [("next", POINTER (nodelist)), + ("n", POINTER (union_node))]; + +union_node._fields_ = [("type", c_int), + ("ncmd", ncmd), + ("npipe", npipe), + ("nredir", nredir), + ("nbinary", nbinary), + ("nif", nif), + ("nfor", nfor), + ("ncase", ncase), + ("nclist", nclist), + ("ndefun", ndefun), + ("narg", narg), + ("nfile", nfile), + ("ndup", ndup), + ("nhere", nhere), + ("nnot", nnot)]; + + +class strpush (Structure): + pass; + +# struct strpush { +# struct strpush *prev; /* preceding string on stack */ +# char *prevstring; +# int prevnleft; +# struct alias *ap; /* if push was associated with an alias */ +# char *string; /* remember the string since it may change */ +# +# /* Remember last two characters for pungetc. */ +# int lastc[2]; +# +# /* Number of outstanding calls to pungetc. */ +# int unget; +# }; +strpush._fields_ = [("prev", POINTER (strpush)), + ("prevstring", c_char_p), + ("prevnleft", c_int), + ("ap", c_void_p), + ("string", c_char_p), + ("lastc", 2 * c_int), + ("unget", c_int)]; + +class parsefile (Structure): + pass; + +# struct parsefile { +# struct parsefile *prev; /* preceding file on stack */ +# int linno; /* current line */ +# int fd; /* file descriptor (or -1 if string) */ +# int nleft; /* number of chars left in this line */ +# int lleft; /* number of chars left in this buffer */ +# char *nextc; /* next char in buffer */ +# char *buf; /* input buffer */ +# struct strpush *strpush; /* for pushing strings at this level */ +# struct strpush basestrpush; /* so pushing one is fast */ +# +# /* Remember last two characters for pungetc. */ +# int lastc[2]; +# +# /* Number of outstanding calls to pungetc. */ +# int unget; +# }; +parsefile._fields_ = [("prev", POINTER (parsefile)), + ("linno", c_int), + ("fd", c_int), + ("nleft", c_int), + ("lleft", c_int), + ("nextc", POINTER (c_char)), # NOT c_char_p! + ("buf", c_char_p), + ("strpush", POINTER (strpush)), + ("basestrpush", strpush), + ("lastc", 2 * c_int), + ("unget", c_int)]; + + +# dash.ast +# let rec nodelist (n : nodelist structure ptr) : (node union ptr) list = +# if nullptr n +# then [] +# else (n @-> nodelist_n)::nodelist (n @-> nodelist_next) +def nodelist (nl): + snek = []; + + # ctypes has different semantics for POINTER vs. c_void_p + # See https://groups.google.com/g/nzpug/c/5CJxaWjuQro + while (nl): + snek.append (nl.contents.n); + nl = nl.contents.next; + + return snek; + + +def caselist (n): + cases = []; + + while (n): + nclist = n.contents.nclist; + + assert (nclist.type == 13); + + cases.append ((nclist.pattern, nclist.body)); + + n = nclist.next; + + return (cases); + + +def explode_rev (bytes): + charlist = explode (bytes); + charlist.reverse (); + + return (charlist); + + +def explode (bytes): + s = bytes.decode ("charmap"); + + charlist = []; + + for i in range (len (s)): + charlist.append (ord (s [i])); + + return (charlist); + + +def implode_rev (l): + s = implode (reversed (l)); + + return (s); + + +def implode (l): + s = ""; + + for c in l: + s = s + chr (c); + + return (s); diff --git a/python/parse_to_ast.py b/python/parse_to_ast.py new file mode 100644 index 0000000..0604673 --- /dev/null +++ b/python/parse_to_ast.py @@ -0,0 +1,86 @@ +import os +import subprocess +from ctypes import * +from ast import of_node +from dash import * + +FILE_PATH = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) +LIBDASH_LIBRARY_PATH = os.path.join(FILE_PATH, "libdash.so") + +EOF_NLEFT = -99; # libdash/src/input.c + +class ParsingException(Exception): + def __init__(self, message='ParseError'): + # Call the base class constructor with the parameters it needs + super(ParsingException, self).__init__(message) + +# This is a mix of dash.ml:parse_next and parse_to_json.ml. +def parse_to_ast (inputPath, init=True): + lines = [] + + libdash = CDLL (LIBDASH_LIBRARY_PATH) + + if (init): + initialize (libdash) + + if (inputPath == "-"): + setinputtostdin (libdash) + else: + setinputfile (libdash, inputPath) + + fp = open (inputPath, 'r') + for line in fp: + lines.append (line) + fp.close() + + # struct parsefile *parsefile = &basepf; /* current input file */ + # Get the value of parsefile (not &parsefile)! + parsefile_ptr_ptr = addressof (parsefile.in_dll (libdash, "parsefile")) + parsefile_ptr = cast (parsefile_ptr_ptr, POINTER (POINTER (parsefile))) + parsefile_var = parsefile_ptr.contents + + smark = init_stack (libdash) + + NEOF = addressof (c_int.in_dll (libdash, "tokpushback")) + NERR = addressof (c_int.in_dll (libdash, "lasttoken")) + + while (True): + linno_before = parsefile_var.contents.linno - 1; # libdash is 1-indexed + + n_ptr_C = parsecmd_safe (libdash, False) + + linno_after = parsefile_var.contents.linno - 1; # libdash is 1-indexed + nleft_after = parsefile_var.contents.nleft + + if (n_ptr_C == None): # Dash.Null + pass + elif (n_ptr_C == NEOF): # Dash.Done + break + elif (n_ptr_C == NERR): # Dash.Error + raise ParsingException() + else: + if (nleft_after == EOF_NLEFT): + linno_after = linno_after + 1; # The last line wasn't counted + + if (inputPath != "-"): + ## Both of these assertions check "our" assumption with respect to the final parser state + ## and are therefore not necessary if they become an issue. + assert((linno_after == len (lines)) or (linno_after == len (lines) + 1)) + + # Last line did not have a newline + assert(len (lines [-1]) > 0 and (lines [-1][-1] != '\n')) + else: + assert (nleft_after == 0); # Read whole lines + + n_ptr = cast (n_ptr_C, POINTER (union_node)) + new_ast = of_node (n_ptr) + + if (inputPath != "-"): + parsedLines = "".join(lines[linno_before:linno_after]) + else: + ## When parsing from stdin there is no way to save the lines + parsedLines = None + + yield (new_ast, parsedLines, linno_before, linno_after) + + pop_stack (libdash, smark) diff --git a/python/rt.py b/python/rt.py new file mode 100755 index 0000000..61b235b --- /dev/null +++ b/python/rt.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +import sys + +from parse_to_ast import parse_to_ast +from ast2shell import to_string + +sys.setrecursionlimit (9001) + +def print_asts(new_asts): + for (ast, lines, linno_before, linno_after) in new_asts: + print(to_string(ast)) + +if (len(sys.argv) == 1): + new_asts = parse_to_ast("-", True) +else: + new_asts = parse_to_ast(sys.argv[1], True) + +print_asts(new_asts) diff --git a/test/.gitignore b/test/.gitignore index ed32e6c..a352601 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -9,3 +9,4 @@ test test.err test.byte test.cmo +ocaml_python.log diff --git a/test/Makefile b/test/Makefile index 39f02dc..358e409 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,24 +1,17 @@ -.PHONY : all test clean +SCRIPTS_DIR=$(PASH_TOP) -all : test.native test.byte +PYTHON_FILES=../python/rt.py ../python/ast.py ../python/ast2shell.py ../python/dash.py ../python/parse_to_ast.py +OCAML_FILES=../ocaml/rt.sh -test : test.native test.byte $(wildcard tests/*) - @echo "TESTING test.native" - @for f in tests/*; do \ - ./round_trip.sh ./test.native $$f 2>test.err; \ - done - @echo "TESTING test.byte" - @for f in tests/*; do \ - ./round_trip.sh ./test.byte $$f 2>test.err; \ - done +.PHONY : test clean +test: test_ocaml_python.sh $(PYTHON_FILES) $(OCAML_FILES) + @echo "LOCAL TESTS" + @find tests -type f | while read f; do ./test_ocaml_python.sh "$$f"; done | tee ocaml_python.log -test.native : test.ml - ocamlfind ocamlopt -g -thread -package threads,str,libdash -linkpkg $^ -o test.native - ldd $@ || otool -L $@ - -test.byte : test.ml - ocamlfind ocamlc -g -thread -package threads,str,libdash -linkpkg $^ -o test.byte + @echo "PASH TESTS" + @find pash_tests -type f | while read f; do ./test_ocaml_python.sh "$$f"; done | tee -a ocaml_python.log + @cat ocaml_python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c clean : - rm -f *.o *.cmo *.cmi *.cmx test.native test.byte test.err + rm -f ocaml_python.log diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..c3b7f08 --- /dev/null +++ b/test/README.md @@ -0,0 +1,7 @@ +There are three directories of tests: + + - `tests` are the original libdash tests, mostly handwritten + - `pash_tests` are shell scripts taken from [`pash`](https://github.com/binpash/pash) + - `failing` are shell scripts that aren't working right now (which is probably a bug) + +Both OCaml and Python bindings use the `round_trip.sh` to test round tripping. The `test_ocaml_python.sh` script compares the output from Python and OCaml. diff --git a/test/failing/.travis-ocaml.sh b/test/failing/.travis-ocaml.sh new file mode 100644 index 0000000..6730871 --- /dev/null +++ b/test/failing/.travis-ocaml.sh @@ -0,0 +1,327 @@ +## basic OCaml and opam installation + +full_apt_version () { + package=$1 + version=$2 + case "${version}" in + latest) echo -n "${package}" ;; + *) echo -n "${package}=" + apt-cache show "$package" \ + | sed -n "s/^Version: \(${version}\)/\1/p" \ + | head -1 + esac +} + +set -uex + +if [ "$TRAVIS_OS_NAME" = freebsd -a "${OPAM_VERSION+x}" = x ]; then + echo OPAM_VERSION not permitted for FreeBSD targets + exit 1 +fi + +OCAML_VERSION=${OCAML_VERSION:-latest} +SYS_OCAML_VERSION=4.05 +# Default opam is the latest release of opam 2 +OPAM_VERSION=${OPAM_VERSION:-2} +OPAM_INIT=${OPAM_INIT:-true} +OCAML_BETA=${OCAML_BETA:-disable} + +OPAM_LATEST_RELEASE=2.0.7 + +case ${TRAVIS_CPU_ARCH:-amd64} in + amd64|notset) OPAM_ARCH=x86_64;; + arm64) OPAM_ARCH=arm64;; + *) echo "'$TRAVIS_CPU_ARCH' architecture not currently supported"; exit 1;; +esac + +case $OPAM_VERSION in + 2|2.0) OPAM_VERSION=$OPAM_LATEST_RELEASE;; + 1.*) echo "Opam version '$OPAM_VERSION' is not supported"; exit 1;; +esac + +if [ "$TRAVIS_OS_NAME" = "osx" ] ; then + brew update &> /dev/null + BREW_OPAM_VERSION=$(brew info opam --json=v1 | sed -e 's/.*"versions":{[^}]*"stable":"//' -e 's/".*//') + if [ "$OPAM_VERSION" != "$BREW_OPAM_VERSION" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Ignored OPAM_VERSION=$OPAM_VERSION; interpreted as \"$BREW_OPAM_VERSION\"" >&2 + echo -e "[\e[0;31mWARNING\e[0m] opam 2 is installed via Homebrew" >&2 + set -x + fi + OPAM_VERSION="$BREW_OPAM_VERSION" +fi + +if [ "$OPAM_VERSION" != "$OPAM_LATEST_RELEASE" ] ; then + set +x + echo -e "[\e[0;31mWARNING\e[0m] Out-of-date opam $OPAM_VERSION requested" >&2 + echo -e "[\e[0;31mWARNING\e[0m] Latest release is $OPAM_LATEST_RELEASE" >&2 + set -x +fi + +if [ "${INSTALL_LOCAL+x}" = x ] ; then + if [ "$TRAVIS_OS_NAME" = osx -o "$TRAVIS_OS_NAME" = freebsd ] ; then + echo INSTALL_LOCAL not permitted for macOS and FreeBSD targets + exit 1 + fi + + if [ "${OPAM_SWITCH:=ocaml-system}" != ocaml-system ] ; then + echo "INSTALL_LOCAL requires OPAM_SWITCH=ocaml-system (or unset/null)" + exit 1 + fi +fi + +# the base opam repository to use for bootstrapping and catch-all namespace +BASE_REMOTE=${BASE_REMOTE:-git://github.com/ocaml/opam-repository} + +# whether we need a new gcc and binutils +UPDATE_GCC_BINUTILS=${UPDATE_GCC_BINUTILS:-"0"} + +# Install Xenial remotes +UBUNTU_XENIAL=${UBUNTU_XENIAL:-"0"} + +# Install XQuartz on OSX +INSTALL_XQUARTZ=${INSTALL_XQUARTZ:-"false"} + +APT_UPDATED=0 + +add_ppa () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + APT_UPDATED=0 + sudo add-apt-repository --yes ppa:$1 + fi +} + +apt_install () { + if [ "$TRAVIS_OS_NAME" = "linux" ] ; then + if [ "$APT_UPDATED" -eq 0 ] ; then + APT_UPDATED=1 + sudo apt-get update -qq + fi + sudo apt-get install --no-install-recommends -y "$@" + fi +} + +install_ocaml () { + apt_install \ + ocaml ocaml-base ocaml-native-compilers ocaml-compiler-libs \ + ocaml-interp ocaml-base-nox ocaml-nox +} + +install_opam2 () { + case $TRAVIS_OS_NAME in + freebsd) + # Opam does not have any ready to use binaries for FreeBSD + sudo pkg install -qy ocaml-opam ;; + linux) + case $TRAVIS_DIST in + precise|trusty|xenial) + # Required for bubblewrap (supports arm64 & amd64) + add_ppa avsm/ppa ;; + esac + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + install_ocaml + fi + apt_install bubblewrap + sudo wget https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-linux -O /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + osx) + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + brew install ocaml + fi + sudo curl -fsSL https://github.com/ocaml/opam/releases/download/$OPAM_VERSION/opam-$OPAM_VERSION-$OPAM_ARCH-macos -o /usr/local/bin/opam + sudo chmod +x /usr/local/bin/opam ;; + esac +} + +install_ppa () { + add_ppa $1 + if [ "${INSTALL_LOCAL:=0}" = 0 ] ; then + sudo apt-get -qq update + APT_UPDATED=1 + apt_install \ + "$(full_apt_version ocaml $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-native-compilers $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-compiler-libs $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-interp $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-base-nox $SYS_OCAML_VERSION)" \ + "$(full_apt_version ocaml-nox $SYS_OCAML_VERSION)" + fi + apt_install opam +} + +install_on_freebsd () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +install_on_linux () { + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.1; install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + echo "(An unset OCAML_VERSION used to default to \"latest\", but you must now specify it." + echo "Try something like \"OCAML_VERSION=3.12\", \"OCAML_VERSION=4.10\", or see README-travis.md at https://github.com/ocaml/ocaml-ci-scripts )" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac + + XENIAL="deb mirror://mirrors.ubuntu.com/mirrors.txt xenial main restricted universe" + + if [ "$UPDATE_GCC_BINUTILS" != "0" ] ; then + echo "installing a recent gcc and binutils (mainly to get mirage-entropy-xen working!)" + sudo add-apt-repository "${XENIAL}" + sudo add-apt-repository --yes ppa:ubuntu-toolchain-r/test + sudo apt-get -qq update + sudo apt-get install -y gcc-5 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 90 + sudo add-apt-repository -r "${XENIAL}" + fi + + if [ "$UBUNTU_XENIAL" != "0" ] ; then + echo "Adding Ubuntu Xenial mirrors" + sudo add-apt-repository "${XENIAL}" + sudo apt-get -qq update + APT_UPDATED=1 + fi + + if [ "${INSTALL_LOCAL:=0}" != 0 ] ; then + ( set +x; echo -en "travis_fold:start:build.ocaml\r" ) 2>/dev/null + echo "Building a local OCaml; this may take a few minutes..." + wget "http://caml.inria.fr/pub/distrib/ocaml-${OCAML_FULL_VERSION%.*}/ocaml-$OCAML_FULL_VERSION.tar.gz" + tar -xzf "ocaml-$OCAML_FULL_VERSION.tar.gz" + cd "ocaml-$OCAML_FULL_VERSION" + ./configure -prefix /usr/local ${OCAML_CONFIGURE_ARGS:=--with-debug-runtime} + make world.opt + sudo make install + cd .. + rm -rf "ocaml-$OCAML_FULL_VERSION" + ( set +x; echo -en "travis_fold:end:build.ocaml\r" ) 2>/dev/null + fi +} + +install_on_osx () { + case $INSTALL_XQUARTZ in + true) + curl -OL "http://xquartz.macosforge.org/downloads/SL/XQuartz-2.7.6.dmg" + sudo hdiutil attach XQuartz-2.7.6.dmg + sudo installer -verbose -pkg /Volumes/XQuartz-2.7.6/XQuartz.pkg -target / + ;; + esac + case "$OCAML_VERSION" in + 3.12) OCAML_FULL_VERSION=3.12.1; install_opam2 ;; + 4.00) OCAML_FULL_VERSION=4.00.1; install_opam2 ;; + 4.01) OCAML_FULL_VERSION=4.01.0; install_opam2 ;; + 4.02) OCAML_FULL_VERSION=4.02.3; install_opam2 ;; + 4.03) OCAML_FULL_VERSION=4.03.0; install_opam2 ;; + 4.04) OCAML_FULL_VERSION=4.04.2; install_opam2 ;; + 4.05) OCAML_FULL_VERSION=4.05.0; install_opam2 ;; + 4.06) OCAML_FULL_VERSION=4.06.1; install_opam2 ;; + 4.07) OCAML_FULL_VERSION=4.07.1; install_opam2 ;; + 4.08) OCAML_FULL_VERSION=4.08.1; install_opam2 ;; + 4.09) OCAML_FULL_VERSION=4.09.0; + OPAM_SWITCH=${OPAM_SWITCH:-ocaml-system}; + brew install ocaml; + install_opam2 ;; + 4.10) OCAML_FULL_VERSION=4.10.1; install_opam2 ;; + 4.11) OCAML_FULL_VERSION=4.11.0; install_opam2 ;; + 4.12) OCAML_FULL_VERSION=4.12.0+trunk; OCAML_BETA=enable; install_opam2 ;; + *) + if [ "$OCAML_BETA" != "enable" ]; then + echo "Unknown OCAML_VERSION=$OCAML_VERSION" + exit 1 + fi + OCAML_FULL_VERSION="${OCAML_VERSION}" + install_opam2 ;; + esac +} + +case $TRAVIS_OS_NAME in + freebsd) install_on_freebsd ;; + osx) install_on_osx ;; + linux) install_on_linux ;; +esac + +ocaml_package=ocaml-base-compiler +if [ "$OCAML_BETA" = "enable" ]; then + ocaml_package=ocaml-variants +fi + +OPAM_SWITCH=${OPAM_SWITCH:-$ocaml_package.$OCAML_FULL_VERSION} + +PACKAGES="$OPAM_SWITCH" +case "$OCAML_VERSION" in + 3.12|4.00|4.01|4.02|4.03|4.04|4.05|4.06) + PACKAGES="$PACKAGES,ocaml-secondary-compiler";; +esac + +export OPAMYES=1 + +case $OPAM_INIT in + true) + opam init -a --bare "$BASE_REMOTE" + opam_repo_selection= + if [ "$OCAML_BETA" = "enable" ]; then + opam repo add --dont-select beta git://github.com/ocaml/ocaml-beta-repository.git + opam_repo_selection="--repo=default,beta" + fi + opam switch "$OPAM_SWITCH" || opam switch create $opam_repo_selection "$OPAM_SWITCH" --packages="$PACKAGES" + eval $(opam config env) + ;; +esac + +echo OCAML_VERSION=$OCAML_VERSION > .travis-ocaml.env +echo OPAM_SWITCH=$OPAM_SWITCH >> .travis-ocaml.env + +# Temporary fix an issue with opam-depext < 1.1.3 on FreeBSD. +# See https://github.com/ocaml/opam-depext/pull/123 +echo export ASSUME_ALWAYS_YES=YES >> .travis-ocaml.env + +if [ -x "$(command -v ocaml)" ]; then + # "|| true" is a temp fix for OCaml 4.12: https://github.com/ocaml/ocaml/pull/9798 + ocaml -version || true +else + echo "OCaml is not yet installed" +fi + +opam --version +opam --git-version diff --git a/test/failing/1.tomp3.sh b/test/failing/1.tomp3.sh new file mode 100755 index 0000000..9752159 --- /dev/null +++ b/test/failing/1.tomp3.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# tag: wav-to-mp3 +set -e + +IN=${WAV:-$PASH_TOP/evaluation/benchmarks/aliases/input/wav} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/aliases/input/out} + +find $IN -name '*.wav' | + xargs -n1 basename | + sed "s;\(.*\);-i $IN/\1 -ab 192000 $OUT/\1.mp3;" | + xargs -L1 ffmpeg -y -loglevel quiet -hide_banner diff --git a/test/failing/3.resiz.sh b/test/failing/3.resiz.sh new file mode 100755 index 0000000..42c0354 --- /dev/null +++ b/test/failing/3.resiz.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# tag: resize image +set -e + + +IN=${JPG:-$PASH_TOP/evaluation/benchmarks/aliases/input/jpg} +OUT=${OUT:-PASH_TOP/evaluation/benchmarks/aliases/input/out} + +find $IN -name "*.jpg" | + xargs -n1 basename | + sed "s;\(.*\);-resize 70% $IN/\1 $OUT/\1.70;" | + xargs -L1 convert diff --git a/test/failing/append_nl_if_not.sh b/test/failing/append_nl_if_not.sh new file mode 100755 index 0000000..f8142c0 --- /dev/null +++ b/test/failing/append_nl_if_not.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +## Adds a newline at the end of a file if it doesn't already end in a newline. +## Used to prepare inputs for PaSh. + +if [ -z "$1" ]; then + echo "No file argument given!" + exit 1 +else + if [ ! -f "$1" ]; then + echo "File $1 doesn't exist!" + exit 1 + else + tail -c 1 "$1" | od -ta | grep -q nl + if [ $? -eq 1 ] + then + echo >> "$1" + fi + fi +fi diff --git a/test/failing/array.sh b/test/failing/array.sh new file mode 100644 index 0000000..06647a1 --- /dev/null +++ b/test/failing/array.sh @@ -0,0 +1 @@ +p=${cmd_array[$i]} diff --git a/test/failing/async.sh b/test/failing/async.sh new file mode 100755 index 0000000..0e1d484 --- /dev/null +++ b/test/failing/async.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# A script that showcases truly async pipes (via fs) +# Note to self: remember | { lambda } + +fz () { sleep $0; echo "1-"$0; } + +export -f fz + +: > f1 + +tail -f ./f1 | xargs -n 1 bash -c 'fz "$@"' & + +# {seq 5; echo 'yay!' >&2 ; } > ./f1 +seq 5 > ./f1 +echo 'yay!' diff --git a/test/failing/auto-split.sh b/test/failing/auto-split.sh new file mode 100755 index 0000000..d6b5b53 --- /dev/null +++ b/test/failing/auto-split.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +input="$1" +shift +outputs=("$@") +n_outputs="$#" + +# Set a default DISH_TOP in this directory if it doesn't exist +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +# generate a temp file +temp="$(mktemp -u /tmp/pash_XXXXXXXXXX)" + +cat "$input" > "$temp" +total_lines=$(wc -l "$temp" | cut -f 1 -d ' ') +batch_size=$((total_lines / n_outputs)) +# echo "Input: $input" +# echo "Ouputs: $outputs" +# echo "Number of outputs: $n_outputs" +# echo "Total Lines: $total_lines" +# echo "Batch Size: $batch_size" + +cleanup() +{ + kill -SIGPIPE "$split_pid" > /dev/null 2>&1 +} +trap cleanup EXIT + + +# echo "$PASH_TOP/evaluation/tools/split $input $batch_size $outputs" +"$PASH_TOP"/runtime/split "$temp" "$batch_size" "${outputs[@]}" & +split_pid=$! +wait "$split_pid" +rm -f "$temp" diff --git a/test/failing/bio.sh b/test/failing/bio.sh new file mode 100755 index 0000000..7c615bc --- /dev/null +++ b/test/failing/bio.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# https://www.biostars.org/p/43677/ +# https://github.com/h3abionet/h3agatk +# https://docs.google.com/document/d/1siCZrequI4plggz3ho351NnX57CoyCJl9GWp3azlxfU/edit# +bwa mem -M -p -t [num_threads] \ + -R "@RG\tID:1\tPL:ILLUMINA\tPU:pu\tLB:group1\tSM:SAMPLEID" \ + [reference_fasta] \ + [input_fastq] > [output] + +bwa mem genome.fa reads.fastq | samtools sort -o output.bam - + +# https://www.biostars.org/p/43677/ +bwa aln -t 4 ./hg19.fasta ./s1_1.fastq > ./s1_1.sai +bwa aln -t 4 ./hg19.fasta ./s1_2.fastq > ./s1_2.sai +bwa sampe ./hg19.fasta ./s1_1.sai ./s1_2.sai ./s1_1.fastq ./s1_2.fastq | + samtools view -Shu - | + samtools sort - - | + samtools rmdup -s - - | + tee s1_sorted_nodup.bam | + bamToBed > s1_sorted_nodup.bed + +# 4 cores, -M is for Picard compatibility +bwa mem -M -t 4 ./hg19.fasta ./s1_1.fastq ./s1_2.fastq > s1.sam + +samtools merge - *.bam | +# tee merged.bam | + samtools rmdup - - | +# tee rmdup.bam | + samtools mpileup - uf ./hg19.fasta - | + bcftools view -bvcg - | gzip > var.raw.bcf.gz + +bwa sampe ./hg19.fasta <(bwa aln -t 4 ./hg19.fasta ./s1_1.fastq) <(bwa aln -t 4 ./hg19.fasta ./s1_2.fastq) ./s1_1.fastq ./s1_2.fastq | samtools view -Shb /dev/stdin > s1.bam diff --git a/test/failing/bio2.sh b/test/failing/bio2.sh new file mode 100644 index 0000000..fb00a1b --- /dev/null +++ b/test/failing/bio2.sh @@ -0,0 +1,73 @@ +#### Ported #### +# https://dfzljdn9uc3pi.cloudfront.net/2013/203/1/Supplement_S2.pdf +set -e +cd $PASH_TOP/evaluation/benchmarks/bio/bio1/input/ +ls *.R1.fq > namelist +sed -i 's/.R1.fq//g' namelist +NAMES=( `cat "namelist" `) +mkdir -p assembly +# Trims raw files two different ways. +# First way removes any reads with substantial amounts of adapter, but does no +# quality trimming. These reads are used for assembly and must be uniform lengths +# Second way removes adapters and does quality trimming. These reads will be +# used for mapping. +for i in "${NAMES[@]}" +do + echo $i + Trim/trim_galore --paired -q 0 --length 90 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATATCGTATGCCGTCTTCTGCTTG -a2 GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCG --stringency 20 ${i}.R1.fq ${i}.R2.fq --output_dir ./assembly + Trim/trim_galore --paired -q 20 --length 20 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATATCGTATGCCGTCTTCTGCTTG -a2 GATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCG --stringency 10 $i.R1.fq $i.R2.fq +done + +# Renaming trimmed files to simpler names +for i in "${NAMES[@]}" +do + mv $i.R1_val_1.fq $i.1.fq + mv $i.R2_val_2.fq $i.2.fq +done + +### Assembly ### +# These parameters could be further optimized for particular taxa +# First step concatenates reads into one forward and one reverse fastq file +cat ./assembly/*.R1_val_1.fq > forward +cat ./assembly/*.R2_val_2.fq > reverse +# Rainbow now clusters and assembles +rainbow/rainbow cluster -1 forward -2 reverse > cat.rbcluster.out 2> log +# we can add -f $1 but im not good with maths +rainbow/rainbow div -i cat.rbcluster.out -o cat.rbdiv.out +rainbow/rainbow merge -a -i cat.rbdiv.out -o cat.rbasm.out -N 1000 +perl rainbow/select_best_rbcontig.pl cat.rbasm.out > rainbowf +# Renames contigs to sequential numbers for simplicity +fastx_renamer -n COUNT -i rainbowf -o reference +## Mapping +# Use BWA to index reference +bwa-0.7.17/bwa index -a bwtsw reference +# Use BWA to map reads to reference. +### These parameters could be further optimized for particular taxa +for i in "${NAMES[@]}" +do + bwa-0.7.17/bwa mem reference $i.1.fq $i.2.fq -t 32 -a -T 10 > $i.sam +done +#Convert Sam to Bam and remove low quality, ambiguous mapping +for i in "${NAMES[@]}" +do + samtools view -bS -q15 $i.sam > $i.bam + samtools sort $i.bam -o $i +done +# Index reference for SAMtools +samtools faidx reference +# sort the Sample1.bam cause it sucks. The file needs to be sorted in that +# way before index is called +samtools sort -m 2G -@ 4 Sample1.bam -o lala +mv lala Sample1.bam +# index the bamfile +samtools index Sample1.bam +samtools mpileup -D -f reference *.bam >mpileup +# VarScan calls all sites with at least 5X coverage, a variant frequency above +# 10%, and 95% probability of being a SNP. Need varscan 2.3.5 version +java -jar VarScan.jar mpileup2snp mpileup --output-vcf --min-coverage 5 --strand-filter 0 --min-var-freq 0.1 --p-value 0.05 >SNPS.vcf +# VCFtools to filter raw SNPs and create a filtered vcf file (Final.recode.vcf) +# with SNPs that are present in every individual and that are not INDels +# can also work with --geno 0.99 flag but it needs vcftools 0.1.10 version +vcftools --vcf SNPS.vcf --out Final --recode --non-ref-af 0.001 --remove-indels +# VCFtools again to filter for SNPs that are present at an average of 10X coverage +vcftools --vcf Final.recode.vcf --out Final10X --recode --min-meanDP 10 diff --git a/test/failing/bio3.sh b/test/failing/bio3.sh new file mode 100644 index 0000000..3f2c5a1 --- /dev/null +++ b/test/failing/bio3.sh @@ -0,0 +1,10 @@ +# **Create the bowtie2 alignment database for the Arabidopsis genome** +# https://bioinformaticsworkbook.org/Appendix/GNUparallel/GNU_parallel_examples.html#gsc.tab=0 +cd $PASH_TOP/evaluation/bio/input/bio3 +bowtie2-build TAIR10_chr_all.fas tair +#theirs +time parallel -j2 "bowtie2 --threads 4 -x tair -k1 -q -1 {1} -2 {2} -S {1/.}.sam >& {1/.}.log" ::: fastqfiles/*_1.fastq.gz :::+ fastqfiles/*_2.fastq.gz +#ours +paste <(find . -name "*_1.fastq.gz") <(find . -name "*_2.fastq.gz") | xargs -n \ +2 sh -c 'bowtie2 --threads 4 -x tair -k1 -q -1 "$1" -2 "$2" -S fifth_R1.sam' argv0 + diff --git a/test/failing/buggy_comm_script.sh b/test/failing/buggy_comm_script.sh new file mode 100755 index 0000000..3cf15c9 --- /dev/null +++ b/test/failing/buggy_comm_script.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +mkfifo s1 s2 s3 s4 s5 + +PREV_IN=../evaluation/scripts/input/1M.txt +IN=/tmp/1M.txt + +cat $PREV_IN > $IN +echo "end" >> $IN + +cat $IN | grep "king" | tee s4 >s3 & +comm -23 $IN s3 > s1 & +comm -23 $IN s4 > s2 & +{ ../runtime/eager s2 s5 "/tmp/eager_intermediate_#file1" & } +cat s1 s5 > /tmp/buggy.out + +comm -23 <(cat $IN $IN) <(cat $IN | grep "king") > /tmp/seq.out + +rm s1 s2 s3 s4 s5 + +diff /tmp/buggy.out /tmp/seq.out diff --git a/test/failing/build_lib.sh b/test/failing/build_lib.sh new file mode 100644 index 0000000..136fede --- /dev/null +++ b/test/failing/build_lib.sh @@ -0,0 +1,75 @@ +## +## A library of shell functions that can be used to +## easily create a building/dependency installing/input +## downloading scripts. +## + + + +## +## This function checks if all the files in the arguments exist +## It returns 0 if all files exist, or 1 otherwise +## +files_exist_done_check() +{ + for file in "$@"; do + if [ ! -f "$file" ]; then + return 1 + fi + done + return 0 +} + +## +## This function checks if number of files in a sequence of directories +## is correct. +## Returns 0 if number is correct, or 1 otherwise +## +number_of_files_in_dir() +{ + local expected_number=$1 + local actual_number=$(ls "${@:2}" | wc -l) + if [ $expected_number -eq $actual_number ]; then + return 0 + else + return 1 + fi +} + +## +## This function executes a single idempotent step only if its check fails +## +## Requirements: +## - The step needs to be idempotent +## - The check needs to also check file sizes if there is concern of non-idempotence or failed download +## +execute_step() +{ + local step_fun=$1 + local step_done_check_fun=$2 + local step_desc=${3:-"Execution step"} + + # shellcheck disable=SC2086 + if ! eval $step_done_check_fun; then + echo "$step_desc is not done, executing..." + # shellcheck disable=SC2086 + eval $step_fun + # shellcheck disable=SC2086 + eval $step_done_check_fun || { echo "ERROR: $step_desc failed!"; exit 1; } + fi + echo "$step_desc completed." +} + +## Issues: +## +## - An overarching problem is that these take time in general, +## and therefore testing them out is not really feasible. +## - Another problem is that by doing that manually, +## we cannot get completely fine-grained. For example, we could +## only copy the missing file _a la_ Rattle, instead of running +## the whole step. +## - Another problem is that idempotence checking is hard to do manually. +## - Another issue is that generating the checks is cumbersome and error-prone. +## Users need to think whether they need file_exists/number_of_files/size checks, +## and if they are downloading, they need to first download and then determine the check. +## \ No newline at end of file diff --git a/test/failing/ci-perf.sh b/test/failing/ci-perf.sh new file mode 100755 index 0000000..e613263 --- /dev/null +++ b/test/failing/ci-perf.sh @@ -0,0 +1,163 @@ +#! /usr/bin/env bash + +# Run performance tests + +main() { + set -Eex; + + local pash_d="$(get_pash_dir)"; + + cd "$pash_d"; + git fetch; + local initial_revision="$(get_revision HEAD)"; + local latest_main_revision="$(get_revision main)"; + local revision="${1:-$latest_main_revision}"; + + local output_dir="${2:-/tmp/results}"; + local output_revision_directory="${output_dir}/$revision"; + echo "Will write to $output_revision_directory"; + + # For reproducibility. + trap "git checkout '$initial_revision'" EXIT + + # Use subshell for new working directory and + # visual distinction in `set -e` + echo "Running performance tests for $revision" + (git checkout "$revision" && \ + build_pash_runtime && \ + run_performance_test_suites); + + mkdir -p "$output_revision_directory"; + cp -r "$pash_d/evaluation/results/." "$output_revision_directory/" + + # The code to build the summary file might not be in the commit + # used to run the tests. + git checkout "$latest_main_revision"; + + echo "Summarizing results"; + local eurosys_tests='bigrams,diff,minimal_grep,minimal_sort,set-diff,spell,topn,wf' + summarize_perf_suite "EuroSys One-liners" \ + "$revision" \ + "${output_revision_directory}/eurosys_small" \ + "$eurosys_tests" \ + "2" \ + "distr_auto_split" \ + "${output_dir}/summary_eurosys_small" + + # Generate index page so others can review available summaries + # through web server. + cd "${output_dir}" + ls summary_* > index; + cd - +} + + +build_pash_runtime() { + make -C "$(get_pash_dir)/runtime"; +} + +get_pash_dir() { + local here="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"; + git -C "$here" rev-parse --show-toplevel; +} + +get_revision() { + git rev-parse --short "${1:-HEAD}"; +} + +run_performance_test_suites() { + local pash_d=$(get_pash_dir); + cd "$pash_d/evaluation/eurosys"; + ./execute_eurosys_one_liners.sh -s + # ./execute_unix_benchmarks.sh -l + # ./execute_baseline_sort.sh + # ./execute_max_temp_dish_evaluation.sh + # ./execute_web_index_dish_evaluation.sh +} + +summarize_perf_suite() { + local heading="$1"; + local revision="$2"; + local input_dir="$3"; + local tests="$4"; + local width="$5"; + local variant="$6"; + local summary_file="$7"; + local cell_fmt='%-20s'; + + IFS=',' read -ra test_array <<< "$tests"; + + # When starting a summary file, include a header. + if [[ ! -f "$summary_file" ]]; then + ( + printf "$heading (width=$width variant=$variant)\n"; + printf "$cell_fmt" 'revision'; + for t in "${test_array[@]}"; do + printf "$cell_fmt" "$t"; + done; + printf '\n'; + ) > "$summary_file"; + fi + + # Add a row of test data. + printf "$cell_fmt" "$revision" >> "$summary_file"; + for t in "${test_array[@]}"; do + local perf_file="${input_dir}/${t}_${width}_${variant}.time"; + echo "Summarizing $perf_file"; + printf "$cell_fmt" $(summarize_perf_file "$perf_file") >> "$summary_file"; + done + printf '\n' >> "$summary_file"; +} + +print_pash_execution_time() { + LC_NUMERIC='C' \ + cat "$1" | \ + grep 'Execution time: ' | \ + sed 's/[^0-9\.]//g' | \ + awk '{s+=sprintf("%f", $1)}END{printf "%.4f",s}'; +} + +print_user_time() { + local time_string="$(egrep 'user[^m]+m[0-9\.]+s' "$1" | sed 's/^[^0-9]+//g')"; + local seconds="$(echo "$time_string" | sed -nr 's/.*m([^s]+)s/\1/p')" + local minutes="$(echo "$time_string" | sed -nr 's/^[^0-9]+([0-9\.]+)m.*/\1/p')"; + echo "scale=4; ($minutes * 60) + $seconds" | bc; +} + +summarize_perf_file() { + local perf_file="$1"; + read -a data < <(split_perf_file_name "$perf_file"); + + local test="${data[0]}"; + local width="${data[1]}"; + local variant="${data[2]}"; + + if [[ "$variant" == 'seq' ]]; then + printf "%ss" "$(print_user_time "$1")"; + elif [[ -f "$(make_perf_file_name "$test" "$width" "seq")" ]]; then + local ptime="$(print_pash_execution_time "$1")"; + local utime="$(print_user_time "$1")"; + printf "%ss,x%s" "$ptime" "$(echo "scale=4; $utime / $ptime" | bc)"; + else + print_pash_execution_time "$1"; + fi +} + +split_perf_file_name() { + if [[ "$(basename $1)" =~ (.*)_([0-9]+)_(.*).time$ ]]; then + echo "${BASH_REMATCH[@]:1}"; + return 0 + else + return 1 + fi +} + +make_perf_file_name() { + local name="$1"; + local width="$2"; + local variant="$3"; + echo "${name}_${width}_${variant}.time"; +} + + +(return 0 2>/dev/null) || main "$@" diff --git a/test/failing/ci.sh b/test/failing/ci.sh new file mode 100755 index 0000000..6454fd4 --- /dev/null +++ b/test/failing/ci.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +## +# This runs the majority of the core CI job, including packaging the repo +# running tests. Do not add environment installation in this script, this should +# be done manually (both for security and convenience). No two process of this +# script can execute in parallel, nor can this process be safely interleaved +# with any process running `git` (such as pkg). The program webhook.js serves as +# a synchronization point; do not start this script if webhook.js is running on +# the same computer and accepting requests, but rather use webhook.js (as a +# daemon) to launch this script. Otherwise you run the risk of running into +# concurrency issues. See additional notes on webhook.js. +## + +set -ex + +# Placeholder for CI +REPORT_DIR=../../reports +C=5 +cd .. +PASH_TOP="$PWD" +cd - + +SMOOSH_RESULTS="" + +trim() { + tr -d '\n' | awk 'length > 40{$0 = substr($0, 1, 37) "..."} {print $0}' +} + +build_runtime() { + cd ../runtime + make + cd $PASH_TOP/scripts +} + +pash_tests() { + cd ../compiler + ./test_evaluation_scripts.sh | tee >(grep '^Summary' | cut -d ' ' -f2 > pash_tests.sum) + PASH_RESULTS=$(cat pash_tests.sum) + cd $PASH_TOP/scripts +} + +smoosh_tests() { + cd ../../smoosh + TEST_SHELL="$PASH_TOP/pa.sh --width 2 --log_file /tmp/log_file" make -C tests veryclean + TEST_SHELL="$PASH_TOP/pa.sh --width 2 --log_file /tmp/log_file" make -C tests | tee >(grep 'tests passed' | cut -d ' ' -f2 > smoosh_tests.sum) + SMOOSH_RESULTS=$(cat smoosh_tests.sum) + cd $PASH_TOP/scripts +} + +git pull + +# Vars used in report summary +REV=$(git rev-parse --short HEAD) +MSG="$(git log -1 --pretty=%B | trim | head -n 1)" +RES="fail" +TIME="0s" + +# Two report files +RF=$REPORT_DIR/$REV +SF=$REPORT_DIR/summary +ISF=$REPORT_DIR/summary.inv + +err_report() { + echo "Error on line $1" + FORMAT="%s %s %-40s %s %s\n" + SUM="$(printf "$FORMAT" "$(date '+%F;%T')" "$REV" "$MSG" "$RES" "$TIME")" + echo "$SUM" >> $SF +} + +stage() { + echo $(date '+%F %T') $REV $1 >> $RF +} + +cleanup() { + git clean -f +} + +trap 'err_report $LINENO' ERR +trap 'cleanup' EXIT + +# To respect invariants of stages +mkdir -p $REPORT_DIR ../../get + +echo $(date '+%F %T') $REV "Starting" > $RF +START_TIME=$(date +%s); +stage "Packaging PaSh" +./pkg.sh +stage "Building Runtime" +build_runtime >> $RF +stage "Running PaSh Tests" +pash_tests >> $RF +stage "Running Smoosh Tests" +smoosh_tests >> $RF +stage "Completing CI" +END_TIME=$(date +%s); + +RES="$(echo $PASH_RESULTS '|' $SMOOSH_RESULTS)" +TIME=$(echo $((END_TIME-START_TIME)) | awk '{print int($1/60)":"int($1%60)}') + +FORMAT="%s %s %-40s %s %ss\n" +SUM="$(printf "$FORMAT" "$(date '+%F;%T')" "$REV" "$MSG" "$RES" "$TIME")" +cat $SF | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' > $ISF +echo "$SUM" >> $ISF +cat $ISF | awk '{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j--] }' > $SF + + diff --git a/test/failing/dgsh_tee.sh b/test/failing/dgsh_tee.sh new file mode 100755 index 0000000..7fc992a --- /dev/null +++ b/test/failing/dgsh_tee.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +input=${1?"ERROR: dgsh-tee: No input file given"} +output=${2?"ERROR: dgsh-tee: No output file given"} +args=("${@:3}") + +# Set a default DISH_TOP in this directory if it doesn't exist +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} + +# TODO: Doable check if this is still needed. Turned off for distributed exection. +# PR https://github.com/binpash/pash/pull/495 might've resolved it. +# cleanup() +# { +# kill -SIGTERM $dgsh_tee_pid > /dev/null 2>&1 +# } +# trap cleanup EXIT + +# $PASH_TOP/runtime/dgsh-tee -i "$input" -o "$output" $args & +# dgsh_tee_pid=$! +# wait $dgsh_tee_pid +"$PASH_TOP"/runtime/dgsh-tee -i "$input" -o "$output" "${args[@]}" diff --git a/test/failing/driver.sh b/test/failing/driver.sh new file mode 100755 index 0000000..7f65a81 --- /dev/null +++ b/test/failing/driver.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# Generates an executable script for running all parallel experiments +# ./driver.sh [parallelism_factor] + +set -e + +if [[ $(hostname) =~ '.*star$' ]]; +then + # {death,live,mem}star servers: + DIFF_STAT="diffstat" + PARTIAL_DIR=/dev/shm + # TODO: Try Disk vs. Memory-mapped FS + # PARTIAL_DIR="." + IN=${IN:-../scripts/input/100M.txt} +else + DIFF_STAT="wc -l" + PARTIAL_DIR="." + IN=${IN:-../scripts/input/10M.txt} +fi + +PRE="dish" +CREATE="touch" # or mkfifo +CPUs=${1:-$(nproc)} +OUT1=${OUT:-./out1.txt} +OUT2=${OUT:-./out2.txt} +SEQ=${2:-"./seq-grep"} # grep has 3 levels + +# divide by number of chunks in AWK +echo dividing input to $CPUs chunks +total_size=$(wc -l $IN | awk -F " " '{print $1}') +chunk_size=$((total_size / CPUs)) +split -l $chunk_size $IN $PRE-chunk- + +find . -maxdepth 1 -type p -delete + +echo '#!/bin/bash' > $PRE-execute.sh +chmod +x ./$PRE-execute.sh +echo "# This script is auto-generated by driver.sh" >> $PRE-execute.sh +echo "#seq script: time (cat $IN | $SEQ > $OUT)" >> $PRE-execute.sh + +# echo "set -x" >> $PRE-execute.sh + +echo creating $CPUs channels +counter=0 +for chunk in $PRE-chunk-*; do + # echo "mkfifo $PRE-channel-$((counter++))" >> $PRE-execute.sh + if [[ $CREATE == 'mkfifo' ]]; then + $CREATE $PARTIAL_DIR/$PRE-channel-$((counter++)) + fi +done + +counter=0 +for chunk in $PRE-chunk-*; do + if [[ $CREATE == 'touch' ]]; then + # echo 'Channel is persistent file, using `>` to create it' + echo "cat $chunk | $SEQ > $PARTIAL_DIR/$PRE-channel-$((counter++)) &" >> $PRE-execute.sh + else + # echo 'Channel is FIFO, using `>>` to append to it' + echo "cat $chunk | $SEQ >> $PARTIAL_DIR/$PRE-channel-$((counter++)) &" >> $PRE-execute.sh + fi +done + +# #FIXME: bash doesn't expand `*` in _numberic_ order (1, 10, 2..) affecting cat +# echo cat '$PARTIAL_DIR/$PRE-channel-* >>' $OUT2 >> $PRE-execute.sh +# # echo 'wait' >> $PRE-execute.sh + +echo 'wait' >> $PRE-execute.sh + +counter=0 +args="" +for chunk in $PRE-chunk-*; do + args="$args $PARTIAL_DIR/$PRE-channel-$((counter++))" +done +echo cat $args '>' $OUT2 >> $PRE-execute.sh + +echo Sequential Timing: +time (cat $IN | $SEQ > $OUT1) + +echo Parallel Timing: +time ./$PRE-execute.sh + +echo Result Diff: +diff $OUT1 $OUT2 | $DIFF_STAT + +find . -maxdepth 1 -type p -delete +rm $PARTIAL_DIR/$PRE-channel-* diff --git a/test/failing/execute_baseline_sort.sh b/test/failing/execute_baseline_sort.sh new file mode 100755 index 0000000..afa6275 --- /dev/null +++ b/test/failing/execute_baseline_sort.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## This sets up to what extent we run the evaluation. +## There are 2 levels: +## 1. Small input | --width 2, 16 +## 2. Big input | -- width 2, 4, 8, 16, 32, 64 +evaluation_level=1 + +while getopts 'slh' opt; do + case $opt in + s) evaluation_level=1 ;; + l) evaluation_level=2 ;; + h) echo "There are two possible execution levels:" + echo "option -s: Small input | --width 2, 16" + echo "option -l: Big input | -- width 2, 4, 8, 16, 32, 64" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +if [ "$evaluation_level" -eq 1 ]; then + echo "Executing small baseline sort evaluation..." + n_inputs=( + 2 + 16 + ) + env_suffix="small" + intermediary_prefix="small_" +elif [ "$evaluation_level" -eq 2 ]; then + echo "Executing large baseline sort evaluation..." + n_inputs=( + 2 + 4 + 8 + 16 + 32 + 64 + ) + env_suffix="" + intermediary_prefix="" +else + echo "Unrecognizable execution level: $evaluation_level" + exit 1 +fi + +eval_directory="$PASH_TOP/evaluation/" +intermediary_dir="$PASH_TOP/evaluation/intermediary/" +script_dir="${eval_directory}scripts/" +microbenchmarks_dir="$PASH_TOP/evaluation/microbenchmarks/" +results="${eval_directory}results/baseline_sort/" + +mkdir -p $results + +for n_in in "${n_inputs[@]}"; do + experiment="baseline_sort_${n_in}" + sort_parallel_script="${intermediary_dir}sort_${n_in}_seq.sh" + env_file="${intermediary_dir}sort_${n_in}_env.sh" + + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $script_dir "sort" $n_in $intermediary_dir $env_suffix + + . $env_file + export $(cut -d= -f1 $env_file) + + p_n_in="$(( $n_in * 2 ))" + experiment="baseline_sort_${intermediary_prefix}${p_n_in}" + echo "Executing sort with parallel flag for parallelism: ${p_n_in}" + { time /bin/bash $sort_parallel_script "${p_n_in}" > /tmp/seq_output ; } 2> >(tee "${results}${experiment}_parallel.time" >&2) + + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $microbenchmarks_dir "sort" $n_in $intermediary_dir $env_suffix + + exec_script="${intermediary_dir}sort_${n_in}_seq.sh" + experiment="baseline_sort_${intermediary_prefix}${n_in}" + + if [ "$n_in" -eq 2 ]; then + echo "Executing sort with bash" + { time /bin/bash $exec_script ; } 1> /tmp/bash_output 2> >(tee "${results}${experiment}_seq.time" >&2) + fi + + echo "Executing pash (no eager) on sort with --width ${n_in}" + { time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time --no_eager $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_pash_no_eager.time" >&2) + diff -s /tmp/seq_output /tmp/pash_output | head + + echo "Executing pash on sort with --width ${n_in}" + { time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_pash.time" >&2) + diff -s /tmp/seq_output /tmp/pash_output | head +done diff --git a/test/failing/execute_compile_evaluation_script.sh b/test/failing/execute_compile_evaluation_script.sh new file mode 100755 index 0000000..f1e18d7 --- /dev/null +++ b/test/failing/execute_compile_evaluation_script.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +execute_seq_flag=0 +eager_flag=0 +no_task_par_eager_flag=0 +auto_split_flag=0 +assert_compiler_success="" + +while getopts 'senpac' opt; do + case $opt in + s) execute_seq_flag=1 ;; + e) eager_flag=1 ;; + n) no_task_par_eager_flag=1 ;; + a) auto_split_flag=1 ;; + c) assert_compiler_success="--assert_compiler_success" ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +## We assume that each evaluation script has a sequential, a +## distributed, and an environment +microbenchmark=$1 +n_in=$2 +results_subdir=$3 +intermediary_prefix=$4 + +experiment="${microbenchmark}_${n_in}" + +eval_directory="$PASH_TOP/evaluation/" +directory="${eval_directory}/${4}intermediary/" +results="${eval_directory}results/${results_subdir}/" +prefix="${directory}${experiment}" +env_file="${prefix}_env.sh" +funs_file="${prefix}_funs.sh" +seq_script="${prefix}_seq.sh" +input_file="${prefix}.in" + +seq_output="${directory}/${microbenchmark}_seq_output" +pash_output="${directory}/${microbenchmark}_pash_output" + +echo "Environment:" +# cat $env_file +. $env_file +vars_to_export=$(cut -d= -f1 $env_file) +if [ ! -z "$vars_to_export" ]; then + export $vars_to_export +fi + +## Export necessary functions +if [ -f "$funs_file" ]; then + source $funs_file +fi + +## Redirect the input if there is an input file +stdin_redir="/dev/null" +if [ -f "$input_file" ]; then + stdin_redir="$(cat "$input_file")" + echo "Has input file: $stdin_redir" +fi + +## TODO: Extend this script to give input to some arguments from stdin. + +if [ "$execute_seq_flag" -eq 1 ]; then + echo "Sequential:" + cat $seq_script + cat $stdin_redir | { time /bin/bash $seq_script > $seq_output ; } 2> >(tee "${results}${experiment}_seq.time" >&2) +else + echo "Not executing sequential..." +fi + +## Save the configuration to restore it afterwards +auto_split_opt="--width 1" +config_path_opt="" + +if [ "$auto_split_flag" -eq 1 ]; then + echo "Distributed with auto-split:" + eager_opt="" + auto_split_opt="--width ${n_in}" + distr_result_filename="${results}${experiment}_distr_auto_split.time" +elif [ "$eager_flag" -eq 1 ]; then + echo "Distributed:" + eager_opt="" + distr_result_filename="${results}${experiment}_distr.time" +elif [ "$no_task_par_eager_flag" -eq 1 ]; then + echo "Distributed with naive (no-task-par) eager:" + eager_opt="" + distr_result_filename="${results}${experiment}_distr_no_task_par_eager.time" + + ## Change the configuration + config_path="/tmp/new-config.yaml" + config_path_opt="--config_path ${config_path}" + cat "$PASH_TOP/compiler/config.yaml" > ${config_path} + sed -i 's/runtime\/eager.sh/runtime\/eager-no-task-par.sh/g' "${config_path}" +else + echo "Distributed without eager:" + eager_opt="--no_eager" + distr_result_filename="${results}${experiment}_distr_no_eager.time" +fi + +cat $stdin_redir | { time python3 $PASH_TOP/compiler/pash.py -d 1 --speculation no_spec $assert_compiler_success $eager_opt $auto_split_opt $config_path_opt --output_time $seq_script ; } 1> $pash_output 2> >(tee "${distr_result_filename}" >&2) && +echo "Checking for equivalence..." && +diff -s $seq_output $pash_output | head | tee -a "${distr_result_filename}" >&2 diff --git a/test/failing/execute_eurosys_one_liners.sh b/test/failing/execute_eurosys_one_liners.sh new file mode 100755 index 0000000..4df30f8 --- /dev/null +++ b/test/failing/execute_eurosys_one_liners.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## This sets up to what extent we run the evaluation. +## There are 3 levels: +## 1. Small inputs | --width 2, 16 | Only full PaSh config +## 2. Small inputs | --width 2, 16 | All PaSh configs +## 3. Big inputs | -- width 2, 4, 8, 16, 32, 64 | All PaSh configs +## +## Note that for the small inputs there could be some variance with the results +## (especially with higher widths). +evaluation_level=1 + +while getopts 'smlh' opt; do + case $opt in + s) evaluation_level=1 ;; + m) evaluation_level=2 ;; + l) evaluation_level=3 ;; + h) echo "There are three possible execution levels:" + echo "option -s: Small inputs | --width 2, 16 | Only full PaSh config" + echo "option -m: Small inputs | --width 2, 16 | All PaSh configs" + echo "option -l: Big inputs | -- width 2, 4, 8, 16, 32, 64 | All PaSh configs" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +## TODO: Add a script that runs the parallel sort evaluation + +if [ "$evaluation_level" -eq 1 ]; then + echo "Executing small evaluation..." + n_inputs=( + 2 + 16 + ) + result_subdir="eurosys_small" + env_suffix="small" + intermediary_prefix="small_" + microbenchmarks=( + 'minimal_grep;-a' # EuroSys: nfa-regex + 'minimal_sort;-a' # EuroSys: sort + 'topn;-a' # EuroSys: top-n + 'wf;-a' # EuroSys: wf + 'spell;-a' # EuroSys: spell + 'diff;-a' # EuroSys: difference + 'bigrams;-a' # EuroSys: bi-grams + 'set-diff;-a' # EuroSys: set-difference + 'double_sort;-a' # EuroSys: sort-sort + 'shortest_scripts;-a' # EuroSys: shortest-scripts + ) +elif [ "$evaluation_level" -eq 2 ]; then + echo "Executing medium evaluation..." + n_inputs=( + 2 + 16 + ) + result_subdir="eurosys_small" + env_suffix="small" + intermediary_prefix="small_" + microbenchmarks=( + 'minimal_grep;-n;-a' # EuroSys: nfa-regex + 'minimal_sort;;-n;-a' # EuroSys: sort + 'topn;;-n;-a' # EuroSys: top-n + 'wf;;-n;-a' # EuroSys: wf + 'spell;-e;-a' # EuroSys: spell + 'diff;;-n;-a' # EuroSys: difference + 'bigrams;-e;-a' # EuroSys: bi-grams + 'set-diff;;-n;-a' # EuroSys: set-difference + 'double_sort;;-n;-e;-a' # EuroSys: sort-sort + 'shortest_scripts;;-n;-a' # EuroSys: shortest-scripts + ) +elif [ "$evaluation_level" -eq 3 ]; then + echo "Executing standard evaluation..." + n_inputs=( + 2 + 4 + 8 + 16 + 32 + 64 + ) + ## TODO: Maybe change the result_subdir for the full evaluation + result_subdir="eurosys_standard" + env_suffix="" + intermediary_prefix="" + microbenchmarks=( + 'minimal_grep;-n;-a' # EuroSys: nfa-regex + 'minimal_sort;;-n;-a' # EuroSys: sort + 'topn;;-n;-a' # EuroSys: top-n + 'wf;;-n;-a' # EuroSys: wf + 'spell;-e;-a' # EuroSys: spell + 'diff;;-n;-a' # EuroSys: difference + 'bigrams;-e;-a' # EuroSys: bi-grams + 'set-diff;;-n;-a' # EuroSys: set-difference + 'double_sort;;-n;-e;-a' # EuroSys: sort-sort + 'shortest_scripts;;-n;-a' # EuroSys: shortest-scripts + ) +else + echo "Unrecognizable execution level: $evaluation_level" + exit 1 +fi + +microbenchmarks_dir="$PASH_TOP/evaluation/microbenchmarks/" +intermediary_dir="$PASH_TOP/evaluation/${intermediary_prefix}intermediary/" +mkdir -p $intermediary_dir +mkdir -p "$PASH_TOP/evaluation/results/$result_subdir/" + +for microbenchmark_config in "${microbenchmarks[@]}"; do + IFS=";" read -r -a flags <<< "${microbenchmark_config}" + microbenchmark=${flags[0]} + echo "Executing: $microbenchmark" + # Execute the sequential script on the first run only + exec_seq="-s" + for n_in in "${n_inputs[@]}"; do + + ## Generate the intermediary script + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $microbenchmarks_dir $microbenchmark $n_in $intermediary_dir $env_suffix + + for flag in "${flags[@]:1}"; do + echo "Flag: ${flag}" + + ## Execute the intermediary script + "$PASH_TOP/evaluation/execute_compile_evaluation_script.sh" $exec_seq $flag "${microbenchmark}" "${n_in}" $result_subdir $intermediary_prefix > /dev/null 2>&1 + + ## Only run the sequential the first time around + exec_seq="" + done + done +done diff --git a/test/failing/execute_gnu_parallel_script.sh b/test/failing/execute_gnu_parallel_script.sh new file mode 100755 index 0000000..9a90a51 --- /dev/null +++ b/test/failing/execute_gnu_parallel_script.sh @@ -0,0 +1,52 @@ +#!/bin/bash + + +microbenchmark=$1 +n_in=$2 +results_subdir="gnu_parallel" + +experiment="${microbenchmark}_${n_in}" + +DISH_TOP=${DISH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +eval_directory="../evaluation/" +intermediary_directory="${eval_directory}/intermediary/" +results="${eval_directory}results/${results_subdir}/" +prefix="${intermediary_directory}${experiment}_gnu_parallel" + +mkdir -p results + +env_file="${prefix}_env.sh" +funs_file="${prefix}_funs.sh" +gnu_parallel_script="${prefix}.sh" + +gnu_parallel_scripts_dir="${eval_directory}/gnu_parallel_benchmarks/" +microbenchmarks_dir="${eval_directory}/microbenchmarks/" + +## Generate the intermediary gnu parallel scripts +python3 generate_gnu_parallel_intermediary_script.py "${gnu_parallel_scripts_dir}" "${microbenchmarks_dir}" \ + "${microbenchmark}" "${n_in}" "${intermediary_directory}" || +{ echo 'GNU parallel script generation failed' ; exit 1; } + +seq_output="${intermediary_directory}/${microbenchmark}_seq_output" +gnu_parallel_output="${intermediary_directory}/${microbenchmark}_gnu_parallel_output" + +echo "Environment:" +cat "$env_file" +. "$env_file" +export "$(cut -d= -f1 "$env_file")" + +## Export necessary functions +if [ -f "$funs_file" ]; then + source "$funs_file" +fi + +gnu_parallel_result_filename="${results}${experiment}_gnu_parallel.time" + +echo "GNU Parallel:" +cat "$gnu_parallel_script" +{ time /bin/bash "$gnu_parallel_script" > "$gnu_parallel_output" ; } 2> >(tee "$gnu_parallel_result_filename" >&2) + +echo "Checking for equivalence..." +diff -s "$seq_output" "$gnu_parallel_output" | tee -a "$gnu_parallel_result_filename" + diff --git a/test/failing/execute_max_temp_dish_evaluation.sh b/test/failing/execute_max_temp_dish_evaluation.sh new file mode 100755 index 0000000..0eefbc1 --- /dev/null +++ b/test/failing/execute_max_temp_dish_evaluation.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## There are two possible execution levels: +## option -s: end_year=2000 +## option -l: end_year=2004 (The EuroSys evaluation) +## option -e: Run extended (separate preprocessing from processing) (The EuroSys evaluation) +start_year=2000 +end_year=2000 # For the small evaluation +execute_separate_flag=0 # Whether to execute processing and preprocessing separately + +while getopts 'sleh' opt; do + case $opt in + s) end_year=2000 ;; + l) end_year=2004 ;; + e) execute_separate_flag=1 ;; + h) echo "There are three possible execution levels:" + echo "option -s: end_year=2000" + echo "option -l: end_year=2004 (The EuroSys evaluation)" + echo "option -e: Run extended (separate preprocessing from processing) (The EuroSys evaluation)" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + + +eval_dir="$PASH_TOP/evaluation/" +results_dir="${eval_dir}/results/" + + +echo "Running max-temp evaluation for years: $start_year-$end_year" + +max_temp_complete_script="${eval_dir}/scripts/max-temp-complete.sh" + +temp_dir=max_temp_tmp_results +mkdir -p $temp_dir + +seq_output="${temp_dir}/max_temp_seq_output" +pash_width_16_output="${temp_dir}/max_temp_pash_16_output" +seq_time="$results_dir/max-temp-complete-$start_year-$end_year-seq.time" +pash_width_16_time="$results_dir/max-temp-complete-$start_year-$end_year-16-pash.time" + +echo "Executing the complete max-temp script with bash..." +seq "$start_year" "$end_year" | { time /bin/bash $max_temp_complete_script > $seq_output ; } 2> >(tee "${seq_time}" >&2) + +echo "Executing the complete max-temp script with pash -w 16 (log in: ${temp_dir}/max_temp_pash_16_log)" +seq "$start_year" "$end_year" | { time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/max_temp_pash_16_log" --output_time $max_temp_complete_script ; } 1> "$pash_width_16_output" 2> >(tee "$pash_width_16_time" >&2) +echo "Checking for output equivalence..." +diff -s $seq_output $pash_width_16_output | head + +if [ "$execute_separate_flag" -eq 1 ]; then + echo "Extended: Executing preprocessing and processing separately" + + max_temp_preprocess_script="${eval_dir}/scripts/max-temp-preprocess.sh" + max_temp_process_script="${eval_dir}/scripts/max-temp-process.sh" + + seq_preprocess_time="$results_dir/max-temp-preprocess-$start_year-$end_year-seq.time" + pash_width_16_preprocess_time="$results_dir/max-temp-preprocess-$start_year-$end_year-16-pash.time" + seq_process_time="$results_dir/max-temp-process-$start_year-$end_year-seq.time" + pash_width_16_process_time="$results_dir/max-temp-process-$start_year-$end_year-16-pash.time" + preprocess_output="${temp_dir}/max-temp-preprocess-output" + + echo "Executing the preprocessing max-temp script with bash..." + seq "$start_year" "$end_year" | { time /bin/bash $max_temp_preprocess_script > $seq_output ; } 2> >(tee "${seq_preprocess_time}" >&2) + + echo "Executing the preprocessing max-temp script with pash -w 16 (log in: ${temp_dir}/pash_16_log)" + seq "$start_year" "$end_year" | { time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/pash_16_log" --output_time $max_temp_preprocess_script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_preprocess_time}" >&2) + ## This equivalence takes a very long time to check (uncomment with caution) + # echo "Checking for output equivalence..." + # diff -s $seq_output $pash_width_16_output | head + + ## Copy the sequential preprocess output to another file so that it doesn't get overwritten + echo "Copying intermediate file..." + split -n l/16 -d "$seq_output" ${preprocess_output}_16_ + + ## Export the input variable for the process script + export IN="${preprocess_output}_16_*" + + echo "Executing the processing max-temp script with bash..." + { time /bin/bash $max_temp_process_script > $seq_output ; } 2> >(tee "${seq_process_time}" >&2) + + echo "Executing the processing max-temp script with pash -w 16 (log in: ${temp_dir}/pash_16_log)" + { time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/pash_16_log" --output_time $max_temp_process_script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_process_time}" >&2) + echo "Checking for output equivalence..." + diff -s $seq_output $pash_width_16_output | head +fi diff --git a/test/failing/execute_web_index_dish_evaluation.sh b/test/failing/execute_web_index_dish_evaluation.sh new file mode 100755 index 0000000..921fd8b --- /dev/null +++ b/test/failing/execute_web_index_dish_evaluation.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## There are two possible execution levels: +## options -s: 1,000 urls (about 1.5 minutes in bash) +## options -l: 100,000 urls (a couple hours in bash) +input_number=1000 # About 1.5 minutes in bash +# input_number=100 # About 7 seconds in bash + +while getopts 'slh' opt; do + case $opt in + s) input_number=1000 ;; + l) input_number=100000 ;; + h) echo "There are two possible execution levels:" + echo "option -s: 1,000 urls (about 1.5 minutes in bash)" + echo "option -l: 100,000 urls (a couple hours in bash) (EuroSys evaluation)" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +eval_dir="$PASH_TOP/evaluation/" +directory="${eval_dir}/scripts/web-index/" +results_dir="${eval_dir}/results/" +input_dir="${HOME}/wikipedia/" + +export IN="$input_dir/index_h_${input_number}.txt" +export WIKI="${input_dir}" +export WEB_INDEX_DIR="${directory}" + +web_index_script="${eval_dir}/scripts/web-index.sh" + +temp_dir=web_index_tmp_results +mkdir -p "$temp_dir" + +seq_output="${temp_dir}/seq_output" +pash_width_2_output="${temp_dir}/pash_2_output" +pash_width_16_output="${temp_dir}/pash_16_output" +seq_time="$results_dir/web-index-${input_number}-seq.time" +pash_width_2_time="$results_dir/web-index-${input_number}-2-pash.time" +pash_width_16_time="$results_dir/web-index-${input_number}-16-pash.time" + +echo "Executing the script with bash..." +{ time /bin/bash $web_index_script > $seq_output ; } 2> >(tee "${seq_time}" >&2) + +echo "Executing the script with pash -w 2 (log in: ${temp_dir}/pash_2_log)" +{ time $PASH_TOP/pa.sh -w 2 --log_file "${temp_dir}/pash_2_log" --output_time $web_index_script ; } 1> "$pash_width_2_output" 2> >(tee "${pash_width_2_time}" >&2) +echo "Checking for output equivalence..." +diff -s $seq_output $pash_width_2_output | head + +echo "Executing the script with pash -w 16 (log in: ${temp_dir}/pash_16_log)" +{ time $PASH_TOP/pa.sh -w 16 --log_file "${temp_dir}/pash_16_log" --output_time $web_index_script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_time}" >&2) +echo "Checking for output equivalence..." +diff -s $seq_output $pash_width_16_output | head diff --git a/test/failing/exit_error.sh b/test/failing/exit_error.sh new file mode 100644 index 0000000..34c234c --- /dev/null +++ b/test/failing/exit_error.sh @@ -0,0 +1,10 @@ +( ( true ) 3>/dev/null/abc; echo $?; false); echo $? +({ true; } 3>/dev/null/abc; echo $?; false); echo $? +(for i in 1; do true; done 3>/dev/null/abc; echo $?; false); echo $? +(case x in (x) true ;; esac 3>/dev/null/abc; echo $?; false); echo $? +(if true; then true; fi 3>/dev/null/abc; echo $?; false); echo $? +(while false; do true; done 3>/dev/null/abc; echo $?; false); echo $? +(until true; do true; done 3>/dev/null/abc; echo $?; false); echo $? +(func() { true; } 3>/dev/null/abc && func; echo $?; false); echo $? +func() { true; }; (func 3>/dev/null/abc; echo $?; false); echo $? +(name_of_a_command_that_will_not_be_found; echo $?; false); echo $? \ No newline at end of file diff --git a/test/failing/genome-diff.sh b/test/failing/genome-diff.sh new file mode 100755 index 0000000..a269f9e --- /dev/null +++ b/test/failing/genome-diff.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Find differences between two genome sequences---a a paired Illumina sequencing +# read (FASTQ files) and an assembled reference genome from GenBank (e.g., +# Pasteurella multocida). The reads are aligned to the reference, and sorted by +# coordinate. Instead of saving the BAM file, we pipe it directly to a series of +# BCF tool steps. Note the use of -l 0 and -Ou to keep the piped data in an +# uncompressed form, to avoid repeated compression/decompression steps. The +# --min-MQ 60 ensures only uniquely mapped reads are used. The final filter step +# removes low quality variant calls, heterozygous calls (this is haploid +# bacteria), and any regions with less than 10 supporting reads. + +# Requires: samtools, minimap2, bcftools +# Data: http://ndr.md/data/bio/R1.fastq.gz http://ndr.md/data/bio/R2.fastq.gz http://ndr.md/data/bio/ref.fa + +# https://github.com/samtools/samtools/releases/latest +# https://github.com/lh3/minimap2 +# http://thegenomefactory.blogspot.com/2018/10/a-unix-one-liner-to-call-bacterial.html + +CPUS=1 +REF=./input/ref.fa +R1=./input/R1.fastq.gz +R2=./input/R2.fastq.gz +OUT=/dev/shm/out.txt + +BIO_TOOLS=~/biotools + +# These should be added to every script +export PATH="$PATH:$BIO_TOOLS/bcftools-1.9" +export PATH="$PATH:$BIO_TOOLS/samtools-1.9" +export PATH="$PATH:$BIO_TOOLS/htslib-1.9" +export PATH="$PATH:$BIO_TOOLS/minimap2-2.17_x64-linux" + +minimap2 -a -x sr -t "$CPUS" "$REF" "$R1" "$R2" | # align reads to the reference + samtools sort -l 0 --threads "$CPUS" | # sort reads by coordinate + bcftools mpileup -Ou -B --min-MQ 60 -f "$REF" - | # multi-way pileup producing genotype likelihoods + bcftools call -Ou -v -m - | # SNP/indel calling + bcftools norm -Ou -f "$REF" -d all - | # left-align and normalize indels + bcftools filter -Ov -e 'QUAL<40 || DP<10 || GT!="1/1"' | # removes low-quality variant calls, etc + bcftools stats | # produce VCF/BCF stats + grep '^SN' | # look for a starting pattern + cut -f3- > $OUT # only write third column diff --git a/test/failing/heredoc2.sh b/test/failing/heredoc2.sh new file mode 100644 index 0000000..115f361 --- /dev/null +++ b/test/failing/heredoc2.sh @@ -0,0 +1,6 @@ +for i in '#' +do + cat << EOF + x=\`printf '%s' \\$i\`; printf '%s\\n' "\$x" +EOF +done diff --git a/test/failing/incomplete-arith.sh b/test/failing/incomplete-arith.sh new file mode 100644 index 0000000..04be620 --- /dev/null +++ b/test/failing/incomplete-arith.sh @@ -0,0 +1,10 @@ +cat=1 +EOH=2 +echo $((cat <(true) + +echo "start"; ls -l . | grep '.sh' | wc -l; echo "..scripts found here" > $OUT +{ echo "start"; + echo $(ls -l .) | grep '.sh' | wc -l; + echo "..scripts found here" +} > $OUT + +{ ls -R ../ | sort -rn | uniq | head; } > /dev/null 2>&1 & + +tee >(wc -l >&2) < $( echo $OUT ) | gzip > $OUT.gz + +# "optional" AND and OR composition operators +[ -f 'pizza.123' ] && ( echo 'exists' >$OUT ) || { echo 'does not' >$OUT; } + +wait + diff --git a/test/failing/mk_meta.sh b/test/failing/mk_meta.sh new file mode 100755 index 0000000..22d1d18 --- /dev/null +++ b/test/failing/mk_meta.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -e + +LIB="$1" +: ${LIB:=$(opam var lib)/libdash} + +cat >META <&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + +# echo "Mult by: $multiply_factor" + +temp_file="$(mktemp -u)" + +cat > temp_file + +for (( i = 0; i < $multiply_factor; i++ )); do + cat temp_file +done + +rm temp_file diff --git a/test/failing/pash_init_setup.sh b/test/failing/pash_init_setup.sh new file mode 100644 index 0000000..a2d41c9 --- /dev/null +++ b/test/failing/pash_init_setup.sh @@ -0,0 +1,238 @@ +# source the local pash config +source ~/.pash_init +## File directory +export RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") +## TODO: Is there a better way to do this? +export RUNTIME_LIBRARY_DIR="$RUNTIME_DIR/../runtime/" +export PASH_REDIR="&2" +export PASH_DEBUG_LEVEL=0 + +## Check flags +export pash_output_time_flag=1 +export pash_execute_flag=1 +export pash_speculation_flag=0 # By default there is no speculation +export pash_dry_run_compiler_flag=0 +export pash_assert_compiler_success_flag=0 +export pash_checking_speculation=0 +export pash_checking_log_file=0 +export pash_checking_debug_level=0 +export pash_avoid_pash_runtime_completion_flag=0 +export pash_profile_driven_flag=1 +export pash_daemon=1 +export pash_parallel_pipelines=0 +export pash_daemon_communicates_through_unix_pipes_flag=0 +export show_version=0 +export distributed_exec=0 + +for item in "$@" +do + if [ "$pash_checking_speculation" -eq 1 ]; then + export pash_checking_speculation=0 + if [ "no_spec" == "$item" ]; then + export pash_speculation_flag=0 + elif [ "quick_abort" == "$item" ]; then + ## TODO: Fix how speculation interacts with dry_run, assert_compiler_success + export pash_speculation_flag=1 + echo "$$: Error: Speculation quick-abort is currently unmaintained!" 1>&2 + echo "Exiting..." 1>&2 + exit 1 + else + echo "$$: Unknown value for option --speculation" 1>&2 + exit 1 + fi + fi + + if [ "$pash_checking_log_file" -eq 1 ]; then + export pash_checking_log_file=0 + export PASH_REDIR="$item" + fi + + if [ "$pash_checking_debug_level" -eq 1 ]; then + export pash_checking_debug_level=0 + export PASH_DEBUG_LEVEL=$item + fi + + # We output time always + # if [ "--output_time" == "$item" ]; then + # pash_output_time_flag=1 + # fi + if [ "--version" == "$item" ]; then + export show_version=1 + fi + if [ "--dry_run_compiler" == "$item" ]; then + export pash_dry_run_compiler_flag=1 + fi + + if [ "--assert_compiler_success" == "$item" ]; then + export pash_assert_compiler_success_flag=1 + fi + + if [ "--speculation" == "$item" ]; then + pash_checking_speculation=1 + fi + + if [ "--log_file" == "$item" ]; then + pash_checking_log_file=1 + fi + + if [ "--avoid_pash_runtime_completion" == "$item" ]; then + export pash_avoid_pash_runtime_completion_flag=1 + fi + + if [ "--profile_driven" == "$item" ]; then + export pash_profile_driven_flag=1 + fi + + if [ "-d" == "$item" ] || [ "--debug" == "$item" ]; then + pash_checking_debug_level=1 + fi + + if [ "--no_daemon" == "$item" ]; then + export pash_daemon=0 + fi + + if [ "--parallel_pipelines" == "$item" ]; then + export pash_parallel_pipelines=1 + fi + + if [ "--daemon_communicates_through_unix_pipes" == "$item" ]; then + export pash_daemon_communicates_through_unix_pipes_flag=1 + fi + + if [ "--distributed_exec" == "$item" ]; then + export distributed_exec=1 + fi +done + +## `pash_redir_output` and `pash_redir_all_output` are strictly for logging. +## +## They do not execute their arguments if there is no debugging. +if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then + pash_redir_output() + { + : + } + + pash_redir_all_output() + { + : + } + + pash_redir_all_output_always_execute() + { + > /dev/null 2>&1 "$@" + } + +else + if [ "$PASH_REDIR" == '&2' ]; then + pash_redir_output() + { + >&2 "$@" + } + + pash_redir_all_output() + { + >&2 "$@" + } + + pash_redir_all_output_always_execute() + { + >&2 "$@" + } + else + pash_redir_output() + { + >>"$PASH_REDIR" "$@" + } + + pash_redir_all_output() + { + >>"$PASH_REDIR" 2>&1 "$@" + } + + pash_redir_all_output_always_execute() + { + >>"$PASH_REDIR" 2>&1 "$@" + } + fi +fi + +export -f pash_redir_output +export -f pash_redir_all_output +export -f pash_redir_all_output_always_execute + + +if [ "$pash_daemon_communicates_through_unix_pipes_flag" -eq 1 ]; then + pash_communicate_daemon() + { + local message=$1 + pash_redir_output echo "Sending msg to daemon: $message" + echo "$message" > "$RUNTIME_IN_FIFO" + daemon_response=$(cat "$RUNTIME_OUT_FIFO") + pash_redir_output echo "Got response from daemon: $daemon_response" + echo "$daemon_response" + } + + pash_communicate_daemon_just_send() + { + local message=$1 + pash_redir_output echo "Sending msg to daemon: $message" + echo "$message" > "$RUNTIME_IN_FIFO" + } + + pash_wait_until_daemon_listening() + { + : + } +else + pash_communicate_daemon() + { + local message=$1 + pash_redir_output echo "Sending msg to daemon: $message" + daemon_response=$(echo "$message" | nc -U "$DAEMON_SOCKET") + pash_redir_output echo "Got response from daemon: $daemon_response" + echo "$daemon_response" + } + + pash_communicate_daemon_just_send() + { + pash_communicate_daemon "$1" + } + + pash_wait_until_daemon_listening() + { + ## Only wait for a limited amount of time. + ## If the daemon cannot start listening in ~ 1 second, + ## then it must have crashed or so. + i=0 + ## This is a magic number to make sure that we wait enough + maximum_retries=100 + ## For some reason, `nc -z` doesn't work on livestar (it always returns error) + ## and therefore we need to send something. + until echo "Daemon Start" 2> /dev/null | nc -U "$DAEMON_SOCKET" >/dev/null 2>&1 ; + do + ## TODO: Can we wait for the daemon in a better way? + sleep 0.01 + i=$((i+1)) + if [ $i -eq $maximum_retries ]; then + echo "Error: Maximum retries: $maximum_retries exceeded when waiting for daemon to bind to socket!" 1>&2 + echo "Exiting..." 1>&2 + exit 1 + fi + done + } +fi + +if [ "$distributed_exec" -eq 1 ]; then + pash_communicate_worker_manager() + { + local message=$1 + pash_redir_output echo "Sending msg to worker manager: $message" + manager_response=$(echo "$message" | nc -U "$DSPASH_SOCKET") + pash_redir_output echo "Got response from worker manager: $manager_response" + echo "$manager_response" + } +fi +export -f pash_communicate_daemon +export -f pash_communicate_daemon_just_send +export -f pash_wait_until_daemon_listening diff --git a/test/failing/pash_runtime.sh b/test/failing/pash_runtime.sh new file mode 100755 index 0000000..14ee3c1 --- /dev/null +++ b/test/failing/pash_runtime.sh @@ -0,0 +1,297 @@ +#!/bin/bash + +## +## High level design. +## +## (1) The `pash_runtime` should behave as a wrapper, saving all the necessary state: +## - previous exit code +## - previous set status +## - previous variables +## and then reverting to PaSh internal state +## +## (2) Then it should perform pash-internal work. +## +## (3) Then it should make sure to revert the exit code and `set` state to the saved values. +## +## (4) Then it should execute the inside script (either original or parallel) +## TODO: Figure out what could be different before (1), during (4), and after (7) +## +## (5) Then it save all necessary state and revert to pash-internal state. +## (At the moment this happens automatically because the script is ran in a subshell.) +## +## (6) Then it should do all left pash internal work. +## +## (7) Before exiting it should revert all exit state. +## +## Visually: +## +## -- bash -- | -- pash -- +## ... | +## \----(1)----\ +## | ... +## | (2) +## | ... +## /----(3)----/ +## ... | +## (4) | +## ... | +## +## (The rest of the steps happen only in debug mode) +## ... +## \----(5)----\ +## | ... +## | (6) +## | ... +## /----(7)----/ +## ... | + +## TODO: Make a list/properly define what needs to be saved at (1), (3), (5), (7) +## +## Necessary for pash: +## - PATH important for PaSh but might be changed in bash +## - IFS has to be kept default for PaSh to work +## +## Necessary for bash: +## - Last PID $! (TODO) +## - Last exit code $? +## - set state $- +## - File descriptors (TODO) +## - Loop state (?) Maybe `source` is adequate for this (TODO) +## - Traos (TODO) +## +## (maybe) TODO: After that, maybe we can create cleaner functions for (1), (3), (5), (7). +## E.g. we can have a correspondence between variable names and revert them using them + +## +## (1) +## + +## Store the previous exit status to propagate to the compiler +## export pash_previous_exit_status=$? +## The assignment now happens outside +export pash_previous_exit_status + +## Store the current `set` status to pash to the inside script +export pash_previous_set_status=$- + +pash_redir_output echo "$$: (1) Previous exit status: $pash_previous_exit_status" +pash_redir_output echo "$$: (1) Previous set state: $pash_previous_set_status" + +## Prepare a file with all shell variables +## +## This is only needed by PaSh to expand. +## +## TODO: Maybe we can get rid of it since PaSh has access to the environment anyway? +## TODO: Remove this call to pash_ptempfile_name.sh. Actually remove this file in general. +## PaSh should only generate temp files using $RANDOM$RANDOM$RANDOM +# pash_runtime_shell_variables_file="$($RUNTIME_DIR/pash_ptempfile_name.sh $distro)" +pash_runtime_shell_variables_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" +source "$RUNTIME_DIR/pash_declare_vars.sh" "$pash_runtime_shell_variables_file" +pash_redir_output echo "$$: (1) Bash variables saved in: $pash_runtime_shell_variables_file" + +## Abort script if variable is unset +pash_default_set_state="huB" + +## Revert the `set` state to not have spurious failures +pash_redir_output echo "$$: (1) Bash set state at start of execution: $pash_previous_set_status" +source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_previous_set_status" "$pash_default_set_state" +pash_redir_output echo "$$: (1) Set state reverted to PaSh-internal set state: $-" + +## +## (2) +## + +## The first argument contains the sequential script. Just running it should work for all tests. +pash_sequential_script_file=$1 + +## The second argument SHOULD be the file that contains the IR to be compiled +pash_input_ir_file=$2 + +## The parallel script will be saved in the following file if compilation is successful. +# pash_compiled_script_file="$($RUNTIME_DIR/pash_ptempfile_name.sh $distro)" +pash_compiled_script_file="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" + + +if [ "$pash_speculation_flag" -eq 1 ]; then + ## Count the execution time + pash_exec_time_start=$(date +"%s%N") + source "$RUNTIME_DIR/pash_runtime_quick_abort.sh" + pash_runtime_final_status=$? + ## For now this will fail!!! + exit 1 +else + + if [ "$pash_daemon" -eq 1 ]; then + ## TODO: Have a more proper communication protocol + ## TODO: Make a proper client for the daemon + pash_redir_output echo "$$: (2) Before asking the daemon for compilation..." + ## Send and receive from daemon + msg="Compile:${pash_compiled_script_file}| Variable File:${pash_runtime_shell_variables_file}| Input IR File:${pash_input_ir_file}" + daemon_response=$(pash_communicate_daemon "$msg") # Blocking step, daemon will not send response until it's safe to continue + + if [[ "$daemon_response" == *"OK:"* ]]; then + pash_runtime_return_code=0 + elif [ -z "$daemon_response" ]; then + ## Trouble... Daemon crashed, rip + pash_redir_output echo "$$: ERROR: (2) Daemon crashed!" + exit 1 + else + pash_runtime_return_code=1 + fi + + # Get assigned process id + # We need to split the daemon response into elements of an array by + # shell's field splitting. + # shellcheck disable=SC2206 + response_args=($daemon_response) + process_id=${response_args[1]} + else + pash_redir_all_output_always_execute python3 -S "$RUNTIME_DIR//pash_runtime.py" --var_file "${pash_runtime_shell_variables_file}" "${pash_compiled_script_file}" "${pash_input_ir_file}" "$@" + pash_runtime_return_code=$? + fi + + pash_redir_output echo "$$: (2) Compiler exited with code: $pash_runtime_return_code" + if [ "$pash_runtime_return_code" -ne 0 ] && [ "$pash_assert_compiler_success_flag" -eq 1 ]; then + pash_redir_output echo "$$: ERROR: (2) Compiler failed with error code: $pash_runtime_return_code while assert_compiler_success was enabled! Exiting PaSh..." + exit 1 + fi + + # store functions for distributed execution + if [ "$distributed_exec" -eq 1 ]; then + declared_functions="${PASH_TMP_PREFIX}/pash_$RANDOM$RANDOM$RANDOM" + declare -f > "$declared_functions" + export declared_functions + fi + + ## + ## (3) + ## + + ## Count the execution time + pash_exec_time_start=$(date +"%s%N") + + + ## If the compiler failed or if we dry_run the compiler, we have to run the sequential + if [ "$pash_runtime_return_code" -ne 0 ] || [ "$pash_dry_run_compiler_flag" -eq 1 ]; then + pash_script_to_execute="${pash_sequential_script_file}" + else + pash_script_to_execute="${pash_compiled_script_file}" + fi + + # ## + # ## (4) + # ## + + ## TODO: It might make sense to move these functions in pash_init_setup to avoid the cost of redefining them here. + function clean_up () { + if [ "$pash_daemon" -eq 1 ]; then + if [ "$parallel_script_time_start" == "None" ] || [ "$pash_profile_driven_flag" -eq 0 ]; then + exec_time="" + else + parallel_script_time_end=$(date +"%s%N") + parallel_script_time_ms=$(echo "scale = 3; ($parallel_script_time_end-$parallel_script_time_start)/1000000" | bc) + pash_redir_output echo " --- --- Execution time: $parallel_script_time_ms ms" + exec_time=$parallel_script_time_ms + fi + ## Send to daemon + msg="Exit:${process_id}|Time:$exec_time" + daemon_response=$(pash_communicate_daemon_just_send "$msg") + fi + } + + function run_parallel() { + trap clean_up SIGTERM SIGINT EXIT + if [ "$pash_profile_driven_flag" -eq 1 ]; then + parallel_script_time_start=$(date +"%s%N") + fi + source "$RUNTIME_DIR/pash_wrap_vars.sh" "$pash_script_to_execute" + internal_exec_status=$? + final_steps + clean_up + (exit $internal_exec_status) + } + + ## We only want to execute (5) and (6) if we are in debug mode and it is not explicitly avoided + function final_steps() { + if [ "$PASH_DEBUG_LEVEL" -ne 0 ] && [ "$pash_avoid_pash_runtime_completion_flag" -ne 1 ]; then + ## + ## (5) + ## + + ## Prepare a file for the output shell variables to be saved in + pash_output_var_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh" "$distro") + # pash_redir_output echo "$$: Output vars: $pash_output_var_file" + + ## Prepare a file for the `set` state of the inner shell to be output + pash_output_set_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh" "$distro") + + source "$RUNTIME_DIR/pash_runtime_shell_to_pash.sh" "$pash_output_var_file" "$pash_output_set_file" + + ## + ## (6) + ## + source "$RUNTIME_DIR/pash_runtime_complete_execution.sh" + fi + } + + ## TODO: Add a check that `set -e` is not on + + ## Check if there are traps set, and if so do not execute in parallel + ## + ## TODO: This might be an overkill but is conservative + traps_set=$(trap) + pash_redir_output echo "$$: (2) Traps set: $traps_set" + # Don't fork if compilation failed. The script might have effects on the shell state. + if [ "$pash_runtime_return_code" -ne 0 ] || + ## If parallel pipelines is not enabled we shouldn't fork + [ "$pash_parallel_pipelines" -eq 0 ] || + ## If parallel pipelines is explicitly disabled (e.g., due to context), no forking + [ "$pash_disable_parallel_pipelines" -eq 1 ] || + ## If traps are set, no forking + [ ! -z "$traps_set" ] || + [ "$pash_daemon" -eq 0 ]; then + # Early clean up in case the script effects shell like "break" or "exec" + # This is safe because the script is run sequentially and the shell + # won't be able to move forward until this is finished + + ## Needed to clear up any past script time start execution times. + parallel_script_time_start=None + clean_up + source "$RUNTIME_DIR/pash_wrap_vars.sh" "$pash_script_to_execute" + pash_runtime_final_status=$? + final_steps + else + # Should we redirect errors aswell? + # TODO: capturing the return state here isn't completely correct. + # Might need more complex design if this end up being a problem + run_parallel <&0 & + pash_runtime_final_status=$? + pash_redir_output echo "$$: (2) Running pipeline" + + ## Here we need to also revert the state back to bash state + ## since run_parallel will do that in a separate shell + ## + ## This happens right before we exit from pash_runtime! + + ## Recover the `set` state of the previous shell + # pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" + # pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" + pash_current_set_state=$- + source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" + pash_redir_output echo "$$: (5) Reverted to BaSh set state: $-" + + ## TODO: This might not be necessary + ## Recover the input arguments of the previous script + ## Note: We don't need to care about wrap_vars arguments because we have stored all of them already. + # + # This variable stores arguments as a space-separated stirng, so we + # need to unquote it and to split it into multiple strings by shell's + # field splitting. + # shellcheck disable=SC2086 + set -- $pash_input_args + pash_redir_output echo "$$: (5) Reverted to BaSh input arguments: $@" + + ## TODO: We probably need to exit with the exit code here or something! + fi +fi + diff --git a/test/failing/pash_runtime_quick_abort.sh b/test/failing/pash_runtime_quick_abort.sh new file mode 100644 index 0000000..b3b541b --- /dev/null +++ b/test/failing/pash_runtime_quick_abort.sh @@ -0,0 +1,283 @@ +#!/bin/bash + +## File directory +RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") + +still_alive() +{ + jobs -p | tr '\n' ' ' +} + +log() +{ + pash_redir_output echo "$$: (QAbort) " "$@" +} + +# Taken from: https://stackoverflow.com/a/20473191 +# list_include_item "10 11 12" "2" +list_include_item() { + local list="$1" + local item="$2" + if [[ $list =~ (^|[[:space:]])"$item"($|[[:space:]]) ]] ; then + # yes, list include item + result=0 + else + result=1 + fi + return $result +} + +## This spawns a buffer command to buffer inputs and outputs +## +## It writes the pid to stdout +spawn_eager() +{ + local name=$1 + local input=$2 + local output=$3 + local eager_file=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + ## Note: Using eager actually leads to some deadlock issues. It must have to do with eagers behavior when + ## its input or output closes. + # "$RUNTIME_DIR/../runtime/eager" "$input" "$output" "$eager_file" /dev/null 2>/dev/null & + "$RUNTIME_DIR/../runtime/dgsh_tee.sh" "$input" "$output" -I -f /dev/null 2>/dev/null & + local eager_pid=$! + log "Spawned $name eager: $eager_pid with:" + log " -- IN: $input" + log " -- OUT: $output" + log " -- INTERM: $eager_file" + echo "$eager_pid" +} + +## Kills the process group that belongs to the given pgid +kill_pg() +{ + local pg_lead_pid=$1 + /bin/kill -15 "-${pg_lead_pid}" 2> /dev/null +} + +## TODO: Make sure that this waits for all processes in the process group to finish executing. +wait_pg() +{ + local pg_lead_pid=$1 + wait "$pg_lead_pid" 2> /dev/null +} + +kill_wait_pg() +{ + kill_pg "$1" + wait_pg "$1" +} + +## Solution Schematic: +## +## (A) (B) (C) (D) (E) +## stdin --- tee --- eager --- seq.sh --- eager --- OUT_SEQ +## \ (F) +## \--- eager --- PAR_IN +## +## (1) If compiler fails, or sequential is done executing: +## - cat OUT_SEQ > stdout +## +## (2) If compiler succeeds: +## - USR1 to reroute so that it redirects to /dev/null +## - PAR_IN redirect to par stdin. +## +## Simplifying assumptions: +## - Not worrying about stderr +## - Not worrying about other inputs at the moment (assuming they are files if compiler succeeds) +## - Not worrying about other outputs +## + assuming that the parallel implementation will overwrite them +## + Assuming that the DFG outputs are not appended +## +## TODO: A first TODO would be to check them in the compilation process +## +## TODO: An alternative TODO would be to let preprocessing give us information about them, allowing us to +## have a finer tuned execution plan depending on this information. For example, if we see that script +## has append to some file we can be carefull and buffer its output using eager. + +## NOTE: The intuition about why quick-abort works is that if the compilation succeeds, then the +## script is a DFG, meaning that we know exactly how it affects its environment after completing. +## Therefore, we can go back and stop the already running script without risking unsafe behavior. + +## TODO: We also want to avoid executing the compiled script if it doesn't contain any improvement. + +## TODO: Maybe the reroute needs to be put around (C) and not (D) + +## TODO: Improve the happy path (very fast sequential) execution time + +## TODO: Use reroute around dgsh_tees to make sure that they do not use storage unnecessarily +## (if their later command is done). + +if [ "$pash_execute_flag" -eq 1 ]; then + # set -x + ## (A) Redirect stdin to `tee` + pash_tee_stdin=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_tee_stdin" + ## The redirections below are necessary to ensure that the background `cat` reads from stdin. + { setsid cat > "$pash_tee_stdin" <&3 3<&- & } 3<&0 + pash_input_cat_pid=$! + log "Spawned input cat with pid: $pash_input_cat_pid" + + ## (B) A `tee` that duplicates input to both the sequential and parallel + pash_tee_stdout1=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + pash_tee_stdout2=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_tee_stdout1" "$pash_tee_stdout2" + tee "$pash_tee_stdout1" > "$pash_tee_stdout2" < "$pash_tee_stdin" & + + ## (C) The sequential input eager + pash_seq_eager_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_seq_eager_output" + seq_input_eager_pid=$(spawn_eager "sequential input" "$pash_tee_stdout1" "$pash_seq_eager_output") + + ## (D) Sequential command + pash_seq_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_seq_output" + setsid "$RUNTIME_DIR/pash_wrap_vars.sh" \ + "$pash_runtime_shell_variables_file" \ + "$pash_output_variables_file" \ + "$pash_output_set_file" \ + "$pash_sequential_script_file" \ + > "$pash_seq_output" < "$pash_seq_eager_output" & + pash_seq_pid=$! + log "Sequential pid: $pash_seq_pid" + + ## (E) The sequential output eager + pash_seq_eager2_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_seq_eager2_output" + seq_output_eager_pid=$(spawn_eager "sequential output" "$pash_seq_output" "$pash_seq_eager2_output") + + ## (F) Second eager + pash_par_eager_output=$("$RUNTIME_DIR/pash_ptempfile_name.sh") + mkfifo "$pash_par_eager_output" + par_eager_pid=$(spawn_eager "parallel input" "$pash_tee_stdout2" "$pash_par_eager_output") + + ## Run the compiler + setsid python3 "$RUNTIME_DIR/pash_runtime.py" "$pash_compiled_script_file" --var_file "$pash_runtime_shell_variables_file" "${@:2}" & + pash_compiler_pid=$! + log "Compiler pid: $pash_compiler_pid" + + ## Wait until one of the two (original script, or compiler) die + alive_pids=$(still_alive) + log "Still alive: $alive_pids" + while `list_include_item "$alive_pids" "$pash_seq_pid"` && `list_include_item "$alive_pids" "$pash_compiler_pid"` ; do + ## Wait for either of the two to complete + wait -n "$pash_seq_pid" "$pash_compiler_pid" + completed_pid_status=$? + log "Process exited with return code: $completed_pid_status" + alive_pids=$(still_alive) + log "Still alive: $alive_pids" + done + + ## If the sequential is still alive we want to see if the compiler succeeded + if `list_include_item "$alive_pids" "$pash_seq_pid"` ; then + pash_runtime_return_code=$completed_pid_status + log "Compilation was done first with return code: $pash_runtime_return_code" + + ## We only want to run the parallel if the compiler succeeded. + if [ "$pash_runtime_return_code" -eq 0 ]; then + + ## TODO: Is this necessary + ## Redirect the sequential output to /dev/null + cat "$pash_seq_eager2_output" > /dev/null & + seq_cat_pid=$! + log "seq to /dev/null cat pid: $seq_cat_pid" + + ## Kill the sequential process tree + log "Killing sequential pid: $pash_seq_pid..." + kill_pg "$pash_seq_pid" + kill_status=$? + wait_pg "$pash_seq_pid" + seq_exit_status=$? + log "Sequential pid: $pash_seq_pid was killed successfully returning status $seq_exit_status." + log "Still alive: $(still_alive)" + + ## If kill failed it means it was already completed, + ## and therefore we do not need to run the parallel. + ## + ## TOOD: Enable this optimization + if true || [ "$kill_status" -eq 0 ]; then + ## (2) Run the parallel + log "Run parallel:" + log " -- Runtime vars: $pash_runtime_shell_variables_file" + log " -- Output vars: $pash_output_variables_file" + log " -- Output set: ${pash_output_set_file}" + log " -- Compiled script: ${pash_compiled_script_file}" + log " -- Input: $pash_par_eager_output" + + "$RUNTIME_DIR/pash_wrap_vars.sh" \ + "$pash_runtime_shell_variables_file" \ + "$pash_output_variables_file" \ + "$pash_output_set_file" \ + "$pash_compiled_script_file" \ + < "$pash_par_eager_output" & + ## Note: For some reason the above redirection used to create some issues, + ## but no more after we started using dgsh-tee + + pash_par_pid=$! + log "Parallel is running with pid: $pash_par_pid..." + # strace -p $pash_par_pid 2>> $PASH_REDIR + wait "$pash_par_pid" + pash_runtime_final_status=$? + log "Parallel is done with status: $pash_runtime_final_status" + else + ## TODO: Handle that case properly by enabling the optimization above. + log "ERROR: Shouldn't have reached that" + exit 1 + fi + else + ## If the compiler failed we just wait until the sequential is done. + + ## (1) Redirect the seq output to stdout + cat "$pash_seq_eager2_output" & + seq_output_cat_pid=$! + log "STDOUT cat pid: $seq_output_cat_pid" + + log "Waiting for sequential: $pash_seq_pid" + wait "$pash_seq_pid" + pash_runtime_final_status=$? + log "DONE Sequential: $pash_seq_pid exited with status: $pash_runtime_final_status" + + ## TODO: It is not clear if we also need to wait for the output cat to end. + log "Waiting for sequential output cat: $seq_output_cat_pid" + wait "$seq_output_cat_pid" + log "DONE Waiting for sequential output cat: $seq_output_cat_pid" + + fi + else + pash_runtime_final_status=$completed_pid_status + log "Sequential was done first with return code: $pash_runtime_final_status" + + ## (1) Redirect the seq output to stdout + cat "$pash_seq_eager2_output" & + final_cat_pid=$! + log "STDOUT cat pid: $final_cat_pid" + + ## We need to kill the compiler to not get delayed log output + ## If this fails (meaning that compilation is done) we do not care + kill_wait_pg "$pash_compiler_pid" + + wait "$final_cat_pid" + fi + + ## TODO: Not clear if this is needed or if it doesn indeed kill all the + ## processes and cleans up everything properly + ## Kill the input process + log "Killing the input cat process: $pash_input_cat_pid" + kill_wait_pg "$pash_input_cat_pid" + # kill -9 $pash_input_cat_pid 2> /dev/null + # wait $pash_input_cat_pid 2> /dev/null + log "The input cat: $pash_input_cat_pid died!" + + + ## TODO: This (and the above) should not be needed actually, everything should be already done due to + ## sequential and parallel both having exited. + ## Kill every spawned process + still_alive_pids="$(still_alive)" + log "Killing all the still alive: $still_alive_pids" + kill -15 "$still_alive_pids" 2> /dev/null + wait "$still_alive_pids" 2> /dev/null + log "All the alive pids died: $still_alive_pids" + + ## Return the exit code + (exit "$pash_runtime_final_status") +fi diff --git a/test/failing/pash_set_from_to.sh b/test/failing/pash_set_from_to.sh new file mode 100644 index 0000000..c27a884 --- /dev/null +++ b/test/failing/pash_set_from_to.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +from_set=${1?From set not given} +to_set=${2?To set not given} + +## Finds the difference of set variables (removing the c, s one since it cannot be actually set and unset) +pash_redir_output echo "From set: $from_set" +pash_redir_output echo "To set: $to_set" +IFS=',' read -r pash_set_to_remove pash_set_to_add <<<"$("$RUNTIME_LIBRARY_DIR/set-diff" "$from_set" "$to_set")" +pash_redir_output echo "To add: $pash_set_to_add" +pash_redir_output echo "To remove: $pash_set_to_remove" +pash_redir_all_output_always_execute set "-$pash_set_to_add" +pash_redir_all_output_always_execute set "+$pash_set_to_remove" diff --git a/test/failing/pash_source_declare_vars.sh b/test/failing/pash_source_declare_vars.sh new file mode 100755 index 0000000..06d654f --- /dev/null +++ b/test/failing/pash_source_declare_vars.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +## This sources variables that were produced from `declare -p` + +## TODO: Fix this to not source read only variables +## TODO: Does this work with arrays + +## TODO: Fix this to not source pash variables so as to not invalidate PaSh progress + +## TODO: Fix this filtering + +filter_vars_file() +{ + cat "$1" | grep -v "^declare -\([A-Za-z]\|-\)* \(pash\|BASH\|LINENO\|EUID\|GROUPS\)" +} + +## TODO: Error handling if the argument is empty? +if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then + > /dev/null 2>&1 "$@" +else + if [ "$PASH_REDIR" == '&2' ]; then + >&2 source <(filter_vars_file "$1") + else + >>"$PASH_REDIR" 2>&1 source <(filter_vars_file "$1") + fi +fi diff --git a/test/failing/pash_wrap_vars.sh b/test/failing/pash_wrap_vars.sh new file mode 100755 index 0000000..4b2139a --- /dev/null +++ b/test/failing/pash_wrap_vars.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +## File directory +RUNTIME_DIR=$(dirname "${BASH_SOURCE[0]}") + +script_source="$1" + +#ONLY WAY OUT IS TO TREAT EXEC in special way + +## Recover the `set` state of the previous shell +# pash_redir_output echo "$$: (3) Previous BaSh set state: $pash_previous_set_status" +# pash_redir_output echo "$$: (3) PaSh-internal set state of current shell: $-" +export pash_current_set_state=$- +source "$RUNTIME_DIR/pash_set_from_to.sh" "$pash_current_set_state" "$pash_previous_set_status" +pash_redir_output echo "$$: (3) Reverted to BaSh set state: $-" + +## Recover the input arguments of the previous script +## Note: We don't need to care about wrap_vars arguments because we have stored all of them already. +# +# This variable stores arguments as a space-separated stirng, so we need to +# unquote it and to split it into multiple strings by shell's field splitting. +# shellcheck disable=SC2086 +set -- $pash_input_args +pash_redir_output echo "$$: (3) Reverted to BaSh input arguments: $@" + +## Execute the script +pash_redir_output echo "$$: (4) Restoring previous exit code: ${pash_previous_exit_status}" +pash_redir_output echo "$$: (4) Will execute script in ${script_source}:" +pash_redir_output cat "${script_source}" + +## Note: We run the `exit` in a checked position so that we don't simply exit when we are in `set -e`. +if (exit "$pash_previous_exit_status") +then +{ + source "${script_source}" + internal_exec_status=$? + ## Make sure that any input argument changes are propagated outside + export pash_input_args="$@" + (exit "$internal_exec_status") +} +else +{ + source "${script_source}" + internal_exec_status=$? + ## Make sure that any input argument changes are propagated outside + export pash_input_args="$@" + (exit "$internal_exec_status") +} +fi diff --git a/test/failing/pay_respects.sh b/test/failing/pay_respects.sh new file mode 100644 index 0000000..e566c8b --- /dev/null +++ b/test/failing/pay_respects.sh @@ -0,0 +1,13 @@ +#!/bin/sh + + +cat < results.db # 3.2GB +sqlite3 results.db <(echo $QUERY) | csvcut -c 4 pipelines.csv | awk '{$1=$1};1' | sort | uniq tee >( + # Schwartzian transform + awk -F'|' '{print NF,$0}' file | sort -nr | cut -d' ' -f2- > likely-longest-pipelines.txt + ) >( + tr '|' '\n' | awk '{$1=$1};1' | awk '{print $1}' | tr -cs 'A-Za-z' '\n' | sort | uniq -c | sort -rn > freq-commands.txt + ) diff --git a/test/failing/quickcheck.sh b/test/failing/quickcheck.sh new file mode 100755 index 0000000..4b17f27 --- /dev/null +++ b/test/failing/quickcheck.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +cd $PASH_TOP + +echo confirms the necessary components for running the artifact +echo +echo Git commit ID: $(git rev-parse --short HEAD) +echo \$PASH_TOP: $(echo $PASH_TOP) +echo pash executable: $PASH_TOP/pa.sh + +echo +$PASH_TOP/pa.sh --help + +echo "Testing graph generation" +$PASH_TOP/pa.sh -c 'echo Pash Installation is complete!' diff --git a/test/failing/remote_exec_graph.sh b/test/failing/remote_exec_graph.sh new file mode 100755 index 0000000..768aa8a --- /dev/null +++ b/test/failing/remote_exec_graph.sh @@ -0,0 +1,10 @@ +ir_file=$1 + +# pash_redir_output echo "Sending msg to worker manager: $message" +response=($(echo "Exec-Graph: $ir_file $declared_functions" | nc -U "$DSPASH_SOCKET")) +# pash_redir_output echo "Got response from worker manager: $response" + +status=${response[0]} #do something if false +script_to_execute=${response[1]} + +source "$script_to_execute" diff --git a/test/failing/run-experiment.sh b/test/failing/run-experiment.sh new file mode 100755 index 0000000..ca0a0c0 --- /dev/null +++ b/test/failing/run-experiment.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +eval_dir="$PASH_TOP/evaluation/buses/" +results_dir="${eval_dir}/results/" + +mkdir -p $results_dir + +for i in 1 2 3 4 +do + script="${eval_dir}/${i}.sh" + echo "Executing $script..." + + seq_output=/tmp/seq_output + pash_width_16_no_cat_split_output=/tmp/pash_16_no_cat_split_output + pash_width_16_output=/tmp/pash_16_output + + seq_time="${results_dir}/${i}_2_seq.time" + pash_width_16_no_cat_split_time="${results_dir}/${i}_16_distr_auto_split_fan_in_fan_out.time" + pash_width_16_time="${results_dir}/${i}_16_distr_auto_split.time" + + echo "Executing the script with bash..." + { time /bin/bash $script > $seq_output ; } 2> >(tee "${seq_time}" >&2) + + echo "Executing the script with pash -w 16 without the cat-split optimization (log in: /tmp/pash_16_log)" + { time $PASH_TOP/pa.sh -w 16 -d 1 --log_file /tmp/pash_16_no_cat_split_log --no_cat_split_vanish --output_time $script ; } 1> "$pash_width_16_no_cat_split_output" 2> >(tee "${pash_width_16_no_cat_split_time}" >&2) + echo "Checking for output equivalence..." + diff -s $seq_output $pash_width_16_no_cat_split_output | head + + echo "Executing the script with pash -w 16 (log in: /tmp/pash_16_log)" + { time $PASH_TOP/pa.sh -w 16 -d 1 --log_file /tmp/pash_16_log --output_time $script ; } 1> "$pash_width_16_output" 2> >(tee "${pash_width_16_time}" >&2) + echo "Checking for output equivalence..." + diff -s $seq_output $pash_width_16_output | head + +done diff --git a/test/failing/run.par.sh b/test/failing/run.par.sh new file mode 100644 index 0000000..7be2127 --- /dev/null +++ b/test/failing/run.par.sh @@ -0,0 +1,601 @@ +#!/bin/bash + +# time: print real in seconds, to simplify parsing +TIMEFORMAT="%3R" # %3U %3S" + +if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 +fi + +source "$PASH_TOP/scripts/utils.sh" + +oneliners_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "oneliners/$times_file" ]; then + echo "skipping oneliners/$times_file" + return 0 + fi + + cd oneliners/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + scripts_inputs=( + "nfa-regex;100M.txt" + "sort;3G.txt" + "top-n;1G.txt" + "wf;3G.txt" + "spell;1G.txt" + "diff;3G.txt" + "bi-grams;1G.txt" + "set-diff;3G.txt" + "sort-sort;1G.txt" + "shortest-scripts;all_cmdsx100.txt" + ) + + touch "$times_file" + echo executing one-liners with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + for script_input in ${scripts_inputs[@]} + do + IFS=";" read -r -a script_input_parsed <<< "${script_input}" + script="${script_input_parsed[0]}" + input="${script_input_parsed[1]}" + source_var $1 $input + printf -v pad %30s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + + cd .. +} + +unix50_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "unix50/${times_file}" ]; then + echo "skipping unix50/${times_file}" + return 0 + fi + + cd unix50/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + touch "$times_file" + echo executing Unix50 $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for number in `seq 36` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +web-index_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "web-index/${times_file}" ]; then + echo "skipping web-index/${times_file}" + return 0 + fi + + cd web-index/ + + install_deps_source_setup $1 + + source_var $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + touch "$times_file" + echo executing web index with pash $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/web-index.${outputs_suffix}" + pash_log="${pash_logs_dir}/web-index.pash.log" + single_time_file="${outputs_dir}/web-index.${time_suffix}" + + ## FIXME: There is a bug when running with r_split at the moment. r_wrap cannot execute bash_functions + echo -n "web-index.sh:" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" web-index.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + cd .. +} + +max-temp_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "max-temp/${times_file}" ]; then + echo "skipping max-temp/${times_file}" + return 0 + fi + cd max-temp/ + + install_deps_source_setup + + source_var + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + touch "$times_file" + echo executing max temp with pash $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/temp-analytics.${outputs_suffix}" + pash_log="${pash_logs_dir}/temp-analytics.pash.log" + single_time_file="${outputs_dir}/temp-analytics.${time_suffix}" + + echo -n "temp-analytics.sh:" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" temp-analytics.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + cd .. +} + +analytics-mts_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "analytics-mts/${times_file}" ]; then + echo "skipping analytics-mts/${times_file}" + return 0 + fi + + cd analytics-mts/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + touch "$times_file" + echo executing MTS analytics with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + ## FIXME 5.sh is not working yet + for number in `seq 4` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + source_var $1 + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +nlp_pash(){ + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "nlp/${times_file}" ]; then + echo "skipping nlp/${times_file}" + return 0 + fi + + cd nlp/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "1syllable_words;6_4" + "2syllable_words;6_5" + "4letter_words;6_2" + "bigrams_appear_twice;8.2_2" + "bigrams;4_3" + "compare_exodus_genesis;8.3_3" + "count_consonant_seq;7_2" + # "count_morphs;7_1" + "count_trigrams;4_3b" + "count_vowel_seq;2_2" + "count_words;1_1" + "find_anagrams;8.3_2" + "merge_upper;2_1" + "sort;3_1" + "sort_words_by_folding;3_2" + "sort_words_by_num_of_syllables;8_1" + "sort_words_by_rhyming;3_3" + # "trigram_rec;6_1" + "uppercase_by_token;6_1_1" + "uppercase_by_type;6_1_2" + "verses_2om_3om_2instances;6_7" + "vowel_sequencies_gr_1K;8.2_1" + "words_no_vowels;6_3" + ) + + touch "$times_file" + echo executing Unix-for-nlp with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + + + +# everything under this line is WIP + + +dgsh_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "dgsh/${times_file}" ]; then + echo "skipping dgsh/${times_file}" + return 0 + fi + + cd dgsh/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "compressionbench;1" + "gitstats;2" + "cmetrics;3" + "dublicatefiles;4" + "highlightwords;5" + # "wordproperties;6" + # "weatherreport;7" + "textproperties;8" + "staticsymbols;9" + # "hierarchymap;10" + # "plotgit;11" + "parallelword;12" + # "venuauthor;13" + # "2dfourier;14" + # "nuclear;15" + # "fft;16" + "reordercol;17" + "dirlisting;18" + ) + + touch "$times_file" + echo executing DGSH with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + export VOC=/usr/share/dict/words + export IN=$PASH_TOP/evaluation/benchmarks/dgsh/input/ + export FULL=$IN/dblp.xml + export MINI=$IN/mini.xml + export OUT=$PASH_TOP/evaluation/benchmarks/dgsh/input/ + export BIN=/usr/local/bin + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + export IN="$PASH_TOP/evaluation/benchmarks/dgsh/input/genesis" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + + +aliases_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "aliases/${times_file}" ]; then + echo "skipping aliases/${times_file}" + return 0 + fi + + cd aliases/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "compressionbench;1" + "gitstats;2" + "cmetrics;3" + "dublicatefiles;4" + "highlightwords;5" + # "wordproperties;6" + # "weatherreport;7" + "textproperties;8" + "staticsymbols;9" + # "hierarchymap;10" + # "plotgit;11" + "parallelword;12" + # "venuauthor;13" + # "2dfourier;14" + # "nuclear;15" + # "fft;16" + "reordercol;17" + "dirlisting;18" + ) + + touch "$times_file" + echo executing aliases with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + export IN="$PASH_TOP/evaluation/benchmarks/aliases/input/genesis" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + + + +posh_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "posh/${times_file}" ]; then + echo "skipping posh/${times_file}" + return 0 + fi + + cd posh/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "discat;1" + "convert;2" + "raytracing;3" + # "zannotate;4" where is zannotate binary + ) + + touch "$times_file" + echo executing posh with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + export OUT=$PASH_TOP/evaluation/benchmarks/posh/input/output + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +bio_pash(){ + + times_file="par.res" + outputs_suffix="par.out" + time_suffix="par.time" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "bio/${times_file}" ]; then + echo "skipping bio/${times_file}" + return 0 + fi + + cd bio/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "bio4.sh;bio4" + ) + + touch "$times_file" + echo executing bio with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + export OUT=$PASH_TOP/evaluation/benchmarks/bio/input/output + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} + +dependency_untangling_pash() { + times_file="par.res" + outputs_suffix="par.out" + outputs_dir="outputs" + pash_logs_dir="pash_logs" + width=16 + if [ -e "dependency_untangling/${times_file}" ]; then + echo "skipping dependency_untangling/${times_file}" + return 0 + fi + + cd dependency_untangling/ + + rm -rf input/output/ + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + mkdir -p "$pash_logs_dir" + + names_scripts=( + "MediaConv1;img_convert" + "MediaConv2;to_mp3" + "Program_Inference;proginf" + "LogAnalysis1;nginx" + "LogAnalysis2;pcap" + "Genomics_Computation;genomics" + "AurPkg;pacaur" + "FileEnc1;compress_files" + "FileEnc2;encrypt_files" + ) + + touch "$times_file" + echo executing dependency_untangling with pash $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + pash_log="${pash_logs_dir}/${script}.pash.log" + single_time_file="${outputs_dir}/${script}.${time_suffix}" + + echo -n "${padded_script}" | tee -a "$times_file" + { time "$PASH_TOP/pa.sh" -w "${width}" $PASH_FLAGS --log_file "${pash_log}" ${script}.sh > "$outputs_file"; } 2> "${single_time_file}" + cat "${single_time_file}" | tee -a "$times_file" + done + cd .. +} diff --git a/test/failing/run.seq.sh b/test/failing/run.seq.sh new file mode 100755 index 0000000..a561abd --- /dev/null +++ b/test/failing/run.seq.sh @@ -0,0 +1,498 @@ +#!/bin/bash + +# FIXME: skip running if output file exists (using tee?) + +## FIX: We should not have a set -e in a script that is supposed to be sourced. +# set -e + +# time: print real in seconds, to simplify parsing +TIMEFORMAT="%3R" # %3U %3S" + +if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 +fi + +source "$PASH_TOP/scripts/utils.sh" + +oneliners(){ + seq_times_file="seq.res" + seq_outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "oneliners/$seq_times_file" ]; then + echo "skipping oneliners/$seq_times_file" + return 0 + fi + + cd oneliners/ + # we need to download the whole dataset to generate the small input as well + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + scripts_inputs=( + "nfa-regex;100M.txt" + "sort;3G.txt" + "top-n;1G.txt" + "wf;3G.txt" + "spell;1G.txt" + "diff;3G.txt" + "bi-grams;1G.txt" + "set-diff;3G.txt" + "sort-sort;1G.txt" + "shortest-scripts;all_cmdsx100.txt" + ) + + touch "$seq_times_file" + echo executing one-liners $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + for script_input in ${scripts_inputs[@]} + do + IFS=";" read -r -a script_input_parsed <<< "${script_input}" + script="${script_input_parsed[0]}" + input="${script_input_parsed[1]}" + # source the required variables from setup.sh + source_var $1 $input + printf -v pad %30s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:30} + + seq_outputs_file="${outputs_dir}/${script}.${seq_outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$seq_outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + + cd .. +} + +unix50(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "unix50/${times_file}" ]; then + echo "skipping unix50/${times_file}" + return 0 + fi + + cd unix50/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + touch "$times_file" + echo executing Unix50 $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for number in `seq 36` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$times_file" + done + cd .. +} + +web-index(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "web-index/${times_file}" ]; then + echo "skipping web-index/${times_file}" + return 0 + fi + + cd web-index/ + + install_deps_source_setup $1 + + source_var $1 + + mkdir -p "$outputs_dir" + + touch "$times_file" + echo executing web index $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/web-index.${outputs_suffix}" + echo web-index.sh: $({ time ./web-index.sh > "${outputs_file}"; } 2>&1) | tee -a "$times_file" + cd .. +} + +max-temp(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "max-temp/${times_file}" ]; then + echo "skipping max-temp/${times_file}" + return 0 + fi + cd max-temp/ + + install_deps_source_setup + + source_var + mkdir -p "$outputs_dir" + touch "$times_file" + echo executing max temp $(date) | tee -a "$times_file" + outputs_file="${outputs_dir}/temp-analytics.${outputs_suffix}" + echo max-temp.sh: $({ time ./temp-analytics.sh > "${outputs_file}"; } 2>&1) | tee -a "$times_file" + cd .. +} + +analytics-mts(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "analytics-mts/${times_file}" ]; then + echo "skipping analytics-mts/${times_file}" + return 0 + fi + + cd analytics-mts/ + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + touch "$times_file" + echo executing MTS analytics $(date) | tee -a "$times_file" + echo '' >> "$times_file" + ## FIXME 5.sh is not working yet + for number in `seq 4` + do + script="${number}" + + printf -v pad %20s + padded_script="${script}.sh:${pad}" + padded_script=${padded_script:0:20} + # select the respective input + source_var $1 + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$times_file" + done + cd .. +} + +nlp(){ + times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "nlp/${times_file}" ]; then + echo "skipping nlp/${times_file}" + return 0 + fi + + cd nlp/ + + install_deps_source_setup $1 + + mkdir -p "$outputs_dir" + + names_scripts=( + "1syllable_words;6_4" + "2syllable_words;6_5" + "4letter_words;6_2" + "bigrams_appear_twice;8.2_2" + "bigrams;4_3" + "compare_exodus_genesis;8.3_3" + "count_consonant_seq;7_2" + # "count_morphs;7_1" + "count_trigrams;4_3b" + "count_vowel_seq;2_2" + "count_words;1_1" + "find_anagrams;8.3_2" + "merge_upper;2_1" + "sort;3_1" + "sort_words_by_folding;3_2" + "sort_words_by_num_of_syllables;8_1" + "sort_words_by_rhyming;3_3" + # "trigram_rec;6_1" + "uppercase_by_token;6_1_1" + "uppercase_by_type;6_1_2" + "verses_2om_3om_2instances;6_7" + "vowel_sequencies_gr_1K;8.2_1" + "words_no_vowels;6_3" + ) + + touch "$times_file" + echo executing Unix-for-nlp $(date) | tee -a "$times_file" + echo '' >> "$times_file" + + source_var $1 + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$times_file" + done + cd .. +} + +aliases(){ + seq_times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "aliases/${seq_times_file}" ]; then + echo "skipping aliases/${seq_times_file}" + return 0 + fi + + cd aliases/ + + cd input/ + ./setup.sh + ./install-deps.sh + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + #"tomp3;1.tomp3" + #"unrtf;2.unrtf" + #"convertjpg;3.resiz" + # "gitkernel;4.gitkernel" # needs complex grep command + "apachelog;5.apachelog" + "msg;6.msg" + "nginx;7.nginx" + "varlog;8.varlog" + ) + + touch "$seq_times_file" + echo executing aliases $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export WAV=$PASH_TOP/evaluation/benchmarks/aliases/input/wav + export JPG=$PASH_TOP/evaluation/benchmarks/aliases/input/jpg + export RTF=$PASH_TOP/evaluation/benchmarks/aliases/input/rtf + export GIT=$PASH_TOP/evaluation/benchmarks/aliases/input/linux + export IN=$PASH_TOP/evaluation/benchmarks/aliases/input/ + export OUT=$PASH_TOP/evaluation/benchmarks/aliases/input/out + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +bio() { + seq_times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "aliases/${seq_times_file}" ]; then + echo "skipping aliases/${seq_times_file}" + return 0 + fi + + cd bio/ + + cd input/ + ./setup.sh + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + "bio4.sh;bio4" + ) + + touch "$seq_times_file" + echo executing bio $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export IN=$PASH_TOP/evaluation/benchmarks/bio/ + # takes too many files to download + export IN_N=input_all.txt + export OUT=$PASH_TOP/evaluation/benchmarks/bio/output + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +# everything under this line is WIP + +dgsh() { + seq_times_file="seq.res" + seq_outpus_suffix="seq.out" + outputs_dir="outputs" + if [ -e "dgsh/$seq_times_file" ]; then + echo "skipping dgsh/$seq_times_file" + return 0 + fi + + cd dgsh + + cd ./input/ + ./setup.sh -full + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + "compressionbench;1" + "gitstats;2" + "cmetrics;3" + "dublicatefiles;4" + "highlightwords;5" + # "wordproperties;6" + # "weatherreport;7" + "textproperties;8" + "staticsymbols;9" + # "hierarchymap;10" + # "plotgit;11" + "parallelword;12" + # "venuauthor;13" + # "2dfourier;14" + # "nuclear;15" + # "fft;16" + "reordercol;17" + "dirlisting;18" + ) + + + touch "$seq_times_file" + echo executing DGSH $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export VOC=/usr/share/dict/words + export IN=$PASH_TOP/evaluation/benchmarks/dgsh/input + export FULL=$IN/dblp.xml + export MINI=$IN/mini.xml + export OUT=$PASH_TOP/evaluation/benchmarks/dgsh/input + export BIN=/usr/local/bin + + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +posh() { + seq_times_file="seq.res" + seq_outpus_suffix="seq.out" + outputs_dir="outputs" + if [ -e "posh/$seq_times_file" ]; then + echo "skipping posh/$seq_times_file" + return 0 + fi + + cd posh + + cd ./input/ + ./setup.sh -full + cd .. + + mkdir -p "$outputs_dir" + + names_scripts=( + "discat;1" + "convert;2" + "raytracing;3" + # "zannotate;4" where is zannotate binary + ) + + touch "$seq_times_file" + echo executing posh $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + + export OUT=$PASH_TOP/evaluation/benchmarks/posh/input/output + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} + +dependency_untangling() { + seq_times_file="seq.res" + outputs_suffix="seq.out" + outputs_dir="outputs" + if [ -e "dependency_untangling/${seq_times_file}" ]; then + echo "skipping dependency_untangling/${seq_times_file}" + return 0 + fi + + cd dependency_untangling/ + + rm -rf input/output + install_deps_source_setup $1 + mkdir -p "$outputs_dir" + + names_scripts=( + "MediaConv1;img_convert" + "MediaConv2;to_mp3" + "Program_Inference;proginf" + "LogAnalysis1;nginx" + "LogAnalysis2;pcap" + "Genomics_Computation;genomics" + "AurPkg;pacaur" + "FileEnc1;compress_files" + "FileEnc2;encrypt_files" + ) + + touch "$seq_times_file" + echo executing dependency_untangling $(date) | tee -a "$seq_times_file" + echo '' >> "$seq_times_file" + source_var + for name_script in ${names_scripts[@]} + do + IFS=";" read -r -a name_script_parsed <<< "${name_script}" + name="${name_script_parsed[0]}" + script="${name_script_parsed[1]}" + printf -v pad %30s + padded_script="${name}.sh:${pad}" + padded_script=${padded_script:0:30} + outputs_file="${outputs_dir}/${script}.${outputs_suffix}" + echo "${padded_script}" $({ time ./${script}.sh > "$outputs_file"; } 2>&1) | tee -a "$seq_times_file" + done + cd .. +} diff --git a/test/failing/run.sh b/test/failing/run.sh new file mode 100755 index 0000000..718a604 --- /dev/null +++ b/test/failing/run.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# time: print real in seconds, to simplify parsing +TIMEFORMAT="%3R" # %3U %3S" + +if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 +fi + +eval_dir="$PASH_TOP/evaluation/benchmarks/runtime-overhead/" + +bash_outputs_suffix="bash.out" +par_outputs_suffix="par.out" +outputs_dir="$eval_dir/outputs" +pash_logs_dir="$eval_dir/pash_logs" + +mkdir -p "$outputs_dir" +mkdir -p "$pash_logs_dir" + +times_file="$eval_dir/time.res" + +script_name="for-echo" +script="${script_name}.sh" + +# The number of loop iterations +export N=100 + +printf -v pad %40s + +## Bash +bash_outputs_file="${outputs_dir}/${script_name}.${bash_outputs_suffix}" +config="bash:${pad}" +config=${config:0:40} +echo "${config}" $({ time bash ${script} > "$bash_outputs_file" ; } 2>&1) | tee "$times_file" + +run_pash() +{ + local config="$1" + local PASH_FLAGS="$2" + config_padded="$config:${pad}" + config_padded=${config_padded:0:40} + par_outputs_file="${outputs_dir}/${script_name}.${config}.${par_outputs_suffix}" + pash_log="${pash_logs_dir}/${script_name}.${config}.pash.log" + + ## We don't want -d 1 since it adds overhead! + echo "${config_padded}" $({ time "$PASH_TOP/pa.sh" $PASH_FLAGS --log_file "${pash_log}" ${script} > "$par_outputs_file"; } 2>&1) | tee -a "$times_file" + diff -q "$bash_outputs_file" "$par_outputs_file" +} + +config="PaSh_no_daemon" +PASH_FLAGS="--no_daemon" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_bash_mirror" +PASH_FLAGS="--expand_using_bash_mirror" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon" +PASH_FLAGS="" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_fifos" +PASH_FLAGS="--daemon_communicates_through_unix_pipes" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_par_pipelines" +PASH_FLAGS="--parallel_pipelines" + +run_pash "$config" "$PASH_FLAGS" + +config="PaSh_daemon_par_pipelines_fifos" +PASH_FLAGS="--parallel_pipelines --daemon_communicates_through_unix_pipes" + +run_pash "$config" "$PASH_FLAGS" diff --git a/test/failing/run_alias.sh b/test/failing/run_alias.sh new file mode 100644 index 0000000..7e2b037 --- /dev/null +++ b/test/failing/run_alias.sh @@ -0,0 +1,25 @@ +# parses the generated.file, and creates a log of the commands that were executed +# successfully (succ.txt) and the failed ones (err.txt) + +cd $PASH_TOP/evaluation/scripts/input/ +# we could read the file iteratively with IFS, but the environment was affected +IFS=$'\r\n' GLOBIGNORE='*' command eval 'cmd_array=($(cat generated.file))' +lc=$(cat generated.file | wc -l) +for i in $(seq 0 $lc) +do + # get the entry from the array + p=${cmd_array[$i]} + # add a timeout to our script + timeout --signal=SIGINT 50s /bin/bash -e $p >> /dev/null 2>&1 #./cmd.sh #eval "bash ./cmd.sh" + ## get status ## + status=$? + if [ $status -eq 0 ]; then + echo $p >> $PASH_TOP/evaluation/scripts/input/succ.txt + else + echo $p >> $PASH_TOP/evaluation/scripts/input/err.txt + fi + if ! ((i % 100)); then + echo $i + fi +done +echo "Done" diff --git a/test/failing/run_all.sh b/test/failing/run_all.sh new file mode 100755 index 0000000..d6d26f0 --- /dev/null +++ b/test/failing/run_all.sh @@ -0,0 +1,109 @@ +#!/bin/bash +RES_FOLDER=${PWD}/eval_results/run +# go to benchmark directory +cd ${PASH_TOP}/evaluation/benchmarks +# use the small input for the benchmarks +setup_flags='--small' +if [ "$1" = "--full" ]; then + setup_flags="--full" + echo "Using full input" +elif [ "$1" = "--small" ] || [ "$#" -eq "0" ]; then + echo "Using small input" +fi + +# run all the scripts using bash +run_bash() { + ## This script is necessary to ensure that sourcing happens with bash + source run.seq.sh + bench_len=$((${#PASH_BENCHMARK[@]} -1)) + array_len=$((${#PASH_ALL_FLAGS[@]} -1)) + for i in $(seq 0 $bench_len) + do + export IN= + export IN_PRE= + bench=${PASH_BENCHMARK[$i]} + echo 'Running bash:' ${bench} + bdir=${RES_FOLDER}/bash/${bench} + mkdir -p ${bdir} + # run the benchmark + ${bench} ${setup_flags} + # copy the time file + mv ${bench}/seq.res ${bdir}/ + done +} + +# run all the scripts using different configurations of PaSh JIT/PaSh AOT +run_bench() { + ## This script is necessary to ensure that sourcing happens with bash + source run.par.sh + bench_len=$((${#PASH_BENCHMARK[@]} -1)) + array_len=$((${#PASH_ALL_FLAGS[@]} -1)) + for i in $(seq 0 $bench_len) + do + bench=${PASH_BENCHMARK[$i]} + # remove all the time files + for j in $(seq 0 $array_len) + do + export IN= + export IN_PRE= + export mode=${PASH_MODE[$j]} + export PASH_FLAGS=${PASH_ALL_FLAGS[$j]} + pdir=${RES_FOLDER}/${mode}/${bench} + ${bench}_pash ${setup_flags} + mkdir -p ${pdir} + # move the folder to our dest + rm -rf ${bench}/outputs + # copy the time file + mv ${bench}/par.res ${pdir}/ + done + done +} + +run_all_benchmarks() { + # generate output folder for each run + export RES_FOLDER=$1 + # clean previous runs + rm -rf ${RES_FOLDER} + mkdir -p ${RES_FOLDER} + cd ${PASH_TOP}/evaluation/benchmarks + # remove all res files from previous runs + find . -type d -name "outputs" 2> /dev/null | xargs rm -rf + # do not remove any input from the node_modules dataset + find . -type d -not -path "*/node_modules/*" -name "output" 2> /dev/null | xargs rm -rf + find . -type d -name "pash_logs" 2> /dev/null | xargs rm -rf + find . -type f -name "*.res" 2> /dev/null | xargs rm -f + # start preparing from execution + export PASH_ALL_FLAGS=(" " + "--r_split --dgsh_tee --r_split_batch_size 1000000 --parallel_pipelines --profile_driven") + export PASH_BENCHMARK=("oneliners" "unix50" "analytics-mts" "nlp" "max-temp" "web-index" "dependency_untangling") + export PASH_MODE=("pash_aot" + "pash_jit") + + echo 'Running all bash benchmarks' + time run_bash + echo 'Running PaSh JIT/PaSh AOT benchmarks' + time run_bench + + ##### Figure 6 + export PASH_ALL_FLAGS=("--r_split --dgsh_tee --r_split_batch_size 1000000" + "--r_split --dgsh_tee --r_split_batch_size 1000000 --parallel_pipelines" ) + export PASH_BENCHMARK=("nlp" "max-temp" "dependency_untangling") + export PASH_MODE=("pash_jit_no_prof_no_du" + "pash_jit_no_prof") + + time run_bench + + ##### Figure 7 + export PASH_ALL_FLAGS=( + #"--dgsh_tee # omitted until it's fixed + "--parallel_pipelines --profile_driven" ) + export PASH_BENCHMARK=("oneliners" "unix50" "analytics-mts" "max-temp" "web-index") + export PASH_MODE=("pash_jit_no_comm") + + time run_bench + + # kill the hanging processes + pkill -f cat +} +# run all the tests and store the results $RES_FOLDER +run_all_benchmarks ${RES_FOLDER} diff --git a/test/failing/run_all_benchmarks_ci.sh b/test/failing/run_all_benchmarks_ci.sh new file mode 100755 index 0000000..99d7f8f --- /dev/null +++ b/test/failing/run_all_benchmarks_ci.sh @@ -0,0 +1,88 @@ +#!/bin/bash +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} +## This script is necessary to ensure that sourcing happens with bash +source run.seq.sh +source run.par.sh +# total tests +total=0 +# number of tests that passed +passed=0 +compare_outputs(){ + dir=$1 + outputs=$(ls $dir | grep "seq" | sed 's/.seq.out$//') + for out in $outputs; + do + seq_output="${dir}/${out}.seq.out" + pash_output="${dir}/${out}.par.out" + res=$(diff -q "$seq_output" "$pash_output") + if [[ "${res}" -eq "" ]]; then + passed=$((passed + 1)) + fi + total=$((total + 1)) + done +} + +EXPERIMENTAL=1 +if [ "$EXPERIMENTAL" -eq 1 ]; then + configurations=( + # "" # Commenting this out since the tests take a lot of time to finish + "--r_split" + "--dgsh_tee" + "--r_split --dgsh_tee" + # "--speculation quick_abort" + ) +else + configurations=( + "" + ) +fi + + +n_inputs=( + 2 + 8 + 16 +) +# Array to store all the execution results +EXEC=() +# cleanup +rm -f $1/*.res +# run bash +$1 > /dev/null +# execute the bash script and fetch the script_name, time +b=$(cat $1/seq.res | awk '{if (NR>2) {print $1","$2}}' | sed 's\.sh:\\g') +labels="group,Bash" +for conf in "${configurations[@]}"; do + for n_in in "${n_inputs[@]}"; do + # cleanup all the files generated by pash + trash=$(find /tmp/ -group dkarnikis | grep sg | xargs -n1 rm -f 2> /dev/null) + # on each run, clean all the res files + rm -f $1/par.res + # re-export the new config + export PASH_FLAGS="${conf} -w ${n_in}" + # append the new labels for the plot + labels="${labels},${conf}_${n_in}" + # execute the pash with the new config + $1_pash > /dev/null + res=$(awk '{if (NR>2) {print $2}}' $1/par.res) + # store the results + EXEC+=("${res}") + done +done +# concat all the results and merge them to create the final data for plotting +labels=$(echo $labels | sed 's\--\\g' | sed -e 's/ /_/g') +res="$b" +for i in "${EXEC[@]}" +do + res=$(paste -d'@' <(echo "$res") <(echo "$i")) +done +# write the labels to the file +echo "$labels" > results.time +# write the data formatted +echo -e "$res" | sed 's\@\,\g' >> results.time +# compare the results +compare_outputs "$1/outputs" +# this is going to be written on the UI output log +cat results.time +# this is going to be written on the UI output log / CLI output +echo "Summary: ${passed}/${total} tests passed." diff --git a/test/failing/run_parser_on_scripts.sh b/test/failing/run_parser_on_scripts.sh new file mode 100755 index 0000000..42996ee --- /dev/null +++ b/test/failing/run_parser_on_scripts.sh @@ -0,0 +1,10 @@ +#! /bin/bash + +SCRIPTS_DIR="../scripts/" + +for script in "$SCRIPTS_DIR"*.sh +do + echo "Parsing $script..." + output=${script/"scripts"/"scripts/json"}.json + ./parse_to_json.native "$script" > "$output" +done diff --git a/test/failing/safe6.sh b/test/failing/safe6.sh new file mode 100644 index 0000000..6518f90 --- /dev/null +++ b/test/failing/safe6.sh @@ -0,0 +1 @@ +x=5 ; { x=6 ; echo $x; } | { x=7; echo $x; } diff --git a/test/failing/setup-pash.sh b/test/failing/setup-pash.sh new file mode 100755 index 0000000..7194507 --- /dev/null +++ b/test/failing/setup-pash.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash + +set -e + +cd "$(dirname "$0")" +# check the git status of the project +if git rev-parse --git-dir > /dev/null 2>&1; then + # we have cloned from the git repo, so all the .git related files/metadata are available + git submodule init + git submodule update + # set PASH_TOP + PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +else + # set PASH_TOP to the root folder of the project if it is not available + PASH_TOP=${PASH_TOP:-$PWD/..} + # remove previous installation if it exists + rm -rf $PASH_TOP/compiler/parser/libdash + # we are in package mode, no .git information is available + git clone https://github.com/angelhof/libdash/ $PASH_TOP/compiler/parser/libdash +fi +cd $PASH_TOP +. "$PASH_TOP/scripts/utils.sh" +read_cmd_args $@ + +LOG_DIR=$PASH_TOP/install_logs +mkdir -p $LOG_DIR +PYTHON_PKG_DIR=$PASH_TOP/python_pkgs +# remove the folder in case it exists +rm -rf $PYTHON_PKG_DIR +# create the new folder +mkdir -p $PYTHON_PKG_DIR + +echo "Building parser..." +cd compiler/parser + +if type lsb_release >/dev/null 2>&1 ; then + distro=$(lsb_release -i -s) +elif [ -e /etc/os-release ] ; then + distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) +fi + +echo "|-- making libdash..." +# convert to lowercase +distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') +# save distro in the init file +echo "export distro=$distro" > ~/.pash_init +# now do different things depending on distro +case "$distro" in + freebsd*) + gsed -i 's/ make/ gmake/g' Makefile + gmake libdash &> $LOG_DIR/make_libdash.log + echo "Building runtime..." + # Build runtime tools: eager, split + cd ../../runtime/ + gmake &> $LOG_DIR/make.log + ;; + *) + make libdash &> $LOG_DIR/make_libdash.log + echo "Building runtime..." + # Build runtime tools: eager, split + cd ../../runtime/ + make &> $LOG_DIR/make.log + if [ -f /.dockerenv ]; then + # issue with docker only + python3 -m pip install -U --force-reinstall pip + cp "$PASH_TOP"/pa.sh /usr/bin/ + fi + ;; +esac + +## This was the old parser installation that required opam. +# # Build the parser (requires libtool, m4, automake, opam) +# echo "Building parser..." +# eval $(opam config env) +# cd compiler/parser +# echo "|-- installing opam dependencies..." +# make opam-dependencies &> $LOG_DIR/make_opam_dependencies.log +# echo "|-- making libdash... (requires sudo)" +# ## TODO: How can we get rid of that `sudo make install` in here? +# make libdash &> $LOG_DIR/make_libdash.log +# make libdash-ocaml &>> $LOG_DIR/make_libdash.log +# echo "|-- making parser..." +# make &> $LOG_DIR/make.log +# cd ../../ + +cd ../ + +echo "Installing python dependencies..." + +python3 -m pip install jsonpickle --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_jsonpickle.log +python3 -m pip install pexpect --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_pexpect.log +python3 -m pip install graphviz --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_graphviz.log +python3 -m pip install numpy --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_numpy.log +python3 -m pip install matplotlib --root $PYTHON_PKG_DIR --ignore-installed #&> $LOG_DIR/pip_install_matplotlib.log + +# clean the python packages +cd $PYTHON_PKG_DIR +# can we find a better alternative to that +pkg_path=$(find . \( -name "site-packages" -or -name "dist-packages" \) -type d) +mv ${pkg_path}/* ${PYTHON_PKG_DIR}/ + +echo "Generating input files..." +$PASH_TOP/evaluation/tests/input/setup.sh + +# export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" +echo " * * * " +echo "Do not forget to export PASH_TOP before using pash: \`export PASH_TOP=$PASH_TOP\`" +echo '(optionally, you can update PATH to include it: `export PATH=$PATH:$PASH_TOP`)' +echo " * * * " +# in case we are running on docker or CI, installation is complete at this moment +if [[ -f /.dockerenv || -f /.githubenv ]]; then + exit 0 +fi +## append PASH Configuration paths to the respective rc files +rc_configs=(~/.shrc ~/.bashrc ~/.zshrc ~/.cshrc ~/.kshrc) # add more shell configs here +for config in "${rc_configs[@]}" +do + ## if the config exists + ## check if it contains an old entry of Pash + if [ -e "$config" ]; then + # get the shell name + shell_name=$(echo $(basename $config) | sed 's/rc//g' | sed 's/\.//g') + echo "Do you want to append \$PASH_TOP to $shell_name ($config) (y/n)?" + read answer + if [ "$answer" != "${answer#[Yy]}" ] ;then + tmpfile=$(mktemp -u /tmp/tmp.XXXXXX) + # create a backup of the shell config + cp $config ${config}.backup + # remove all the entries pointing to PASH_TOP and PATH + grep -ve "export PASH_TOP" $config > $tmpfile + mv $tmpfile $config + path_ans=0 + # check if PATH contains PASH_TOP reference + # we need to store it in a variable otherwise is messes up with the + # existing environment + var=$(grep -e "export PATH" $config | grep -e '$PASH_TOP') || path_ans=$? + # if the return code is 0 -> there is a reference of $PASH_TOP in + # PATH, remove it + if [ "$path_ans" == 0 ]; then + # remove previous references to PASH_TOP from PATH + grep -v 'export PATH=$PATH:$PASH_TOP' $config > $tmpfile + mv $tmpfile $config + fi + ## there isn't a previous Pash installation, append the configuration + echo "export PASH_TOP="$PASH_TOP >> $config + echo 'export PATH=$PATH:$PASH_TOP' >> $config + fi + fi +done + +# running simple test that everything installed fine +$PASH_TOP/pa.sh -c 'echo PaSh installation complete!' diff --git a/test/failing/sieve.sh b/test/failing/sieve.sh new file mode 100755 index 0000000..5781bcf --- /dev/null +++ b/test/failing/sieve.sh @@ -0,0 +1,20 @@ +#!/bin/bash + + +# Doug McIlroy's implementation of Sieve of Eratosthenes + +# A combination of: +# https://swtch.com/~rsc/thread/ +# https://stackoverflow.com/questions/14927895/sieve-of-eratosthenes-unix-script + +OUT=./output/out.txt + +limit=10000 +sieve="$(seq 2 $limit | sort)" + +for n in 2 $(seq 3 2 $limit) +do + sieve="$(comm -23 <(echo "$sieve") <(seq $(($n * $n)) $n $limit|sort))" +done + +echo "$sieve" | sort -n > $OUT diff --git a/test/failing/split-unix50.sh b/test/failing/split-unix50.sh new file mode 100755 index 0000000..a0afe14 --- /dev/null +++ b/test/failing/split-unix50.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +awk -v RS= '{print > (NR ".txt")}' unix50.sh + +for file in *.txt; do + fname=$(basename -- "$file") + fscript="${fname%.*}".sh + echo $fscript + echo '#!/bin/bash' > $fscript + + echo 'export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input}' >> $fscript + input=$(grep -o 'IN..' $file) + grep "^$(echo $input | xargs)=" unix50.sh >> $fscript + cat $file >> $fscript + echo '' >> $fscript +done + diff --git a/test/failing/split_pipe.sh b/test/failing/split_pipe.sh new file mode 100644 index 0000000..aebef9d --- /dev/null +++ b/test/failing/split_pipe.sh @@ -0,0 +1,11 @@ +BATCH_SIZE=$1 +VIRTUAL_DIR=$2 +OUTPUT1=$3 +OUTPUT2=$4 + +tee >( + head -n "$BATCH_SIZE" > "${VIRTUAL_DIR}/${OUTPUT1}"; + "$PASH_TOP"/evaluation/tools/drain_stream.sh & + cat "${VIRTUAL_DIR}/${OUTPUT1}" > "${OUTPUT1}") | + ( tail -n $((BATCH_SIZE+1)) > "${OUTPUT2}"; + "$PASH_TOP"/evaluation/tools/drain_stream.sh) diff --git a/test/failing/sq.sh b/test/failing/sq.sh new file mode 100755 index 0000000..bce2a72 --- /dev/null +++ b/test/failing/sq.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Clever trick that uses the /dev/fd/xx pseudo-file system +# https://stackoverflow.com/questions/40244/how-to-make-a-pipe-loop-in-bash + +# MMG 2022-06-30 the `function` kw is a bash-ism; leaving it in to not disrupt what gets optimized in previous evaluations +function calc() { + # calculate sum of squares of numbers 0,..,10 + + sum=0 + for ((i=0; i<10; i++)); do + echo $i # "request" the square of i + + read ii # read the square of i + echo "got $ii" >&2 # debug message + + let sum=$sum+$ii + done + + echo "sum $sum" >&2 # output result to stderr +} + +function square() { + # square numbers + + read j # receive first "request" + while [ "$j" != "" ]; do + let jj=$j*$j + echo "square($j) = $jj" >&2 # debug message + + echo $jj # send square + + read j # receive next "request" + done +} + +read | { calc | square; } >/dev/fd/0 diff --git a/test/failing/statistics.sh b/test/failing/statistics.sh new file mode 100755 index 0000000..4bf6faf --- /dev/null +++ b/test/failing/statistics.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# This classification is wrt to their operation, not its input---i.e., whether +# input contains the use of fs identifiers (identifiers can be fs or not fs) + +# We need to think about how to translate DFS commands +# What is a distributed fs? Directories are simply keys? + +# everything else (i.e., side-effectful) just needs to be converted to location independent commands + +p="../c_stats/" +A=${1:-${p}posix.txt} +B=${2:-${p}coreutils.txt} + +# Take commands that are shared and use existing distributability descriptions +comm -12 <(cat $A | grep 'Mandatory' | cut -d ' ' -f 1 | sort ) <( cut -d ' ' -f 1 $B | sort) | + sed s/^/\^/ | + xargs -n 1 -I {} grep -w {} ./coreutils.txt | + sort -b -k2,2 -k1,1 # > posix_mandatory1.txt # commenting out this redirection will overwrite! + +# Analyze mandatory commands not in the second, and not built-ins +comm -23 <(cat $A | grep 'Mandatory' | cut -d ' ' -f 1 | sort ) <( cut -d ' ' -f 1 $B | sort) | + comm -23 - <(cat ../c_stats/builtins.txt | sed 's/ */ /g' | cut -d ' ' -f 1 | sort) | + sed s/^/\^/ | + xargs -n 1 -I {} grep -w {} $A | + sed s/Mandatory// | + sort -b -k2,2 -k1,1 # > posix_mandatory2.txt # commenting out this redirection will overwrite! diff --git a/test/failing/superoptimize.sh b/test/failing/superoptimize.sh new file mode 100755 index 0000000..dc31c91 --- /dev/null +++ b/test/failing/superoptimize.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +echo "Superotmizer run!" + diff --git a/test/failing/test-bsd.sh b/test/failing/test-bsd.sh new file mode 100755 index 0000000..6ed04a9 --- /dev/null +++ b/test/failing/test-bsd.sh @@ -0,0 +1,53 @@ +./test-common.sh grep "'\.'" ../bin/grep +./test-common.sh grep "'[A-Z]'" ../bin/grep +./test-common.sh grep "'x'" ../bin/grep +./test-common.sh grep "'Bell'" ../bin/grep +./test-common.sh grep "-c '^[A-Z]'" ../bin/grep +./test-common.sh grep "-c '^....$'" ../bin/grep +./test-common.sh grep "gz" ../bin/grep +./test-common.sh grep "1969" ../bin/grep +./test-common.sh grep "-vi '[aeiou]'" ../bin/grep +./test-common.sh grep "-vc 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-v '^0$'" ../bin/grep +./test-common.sh grep "-v '[KQRBN]'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*$'" ../bin/grep +./test-common.sh grep "-c 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-c 'light.\*light'" ../bin/grep +./test-common.sh grep "'print'" ../bin/grep +./test-common.sh grep "'light.\*light'" ../bin/grep +./test-common.sh grep "'\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4'" ../bin/grep +./test-common.sh grep "'[KQRBN]'" ../bin/grep +./test-common.sh grep "'UNIX'" ../bin/grep +./test-common.sh grep "'AT&T'" ../bin/grep + +./test-common.sh tr "'[a-z]' '\n'" ../bin/tr +./test-common.sh tr "A-Z a-z" ../bin/tr +./test-common.sh tr "-cud A-Z" ../bin/tr +./test-common.sh tr "-c '[A-Z]' '\n'" ../bin/tr +./test-common.sh tr "-d '\n'" ../bin/tr + +./test-common.sh wc "" ../bin/wc +./test-common.sh wc "-l" ../bin/wc +./test-common.sh wc "-w" ../bin/wc +./test-common.sh wc "-c" ../bin/wc +./test-common.sh wc "-m" ../bin/wc +./test-common.sh wc "-L" ../bin/wc +./test-common.sh wc "-lcm" ../bin/wc +./test-common.sh wc "-mlw" ../bin/wc +./test-common.sh wc "-mLc" ../bin/wc +./test-common.sh wc "-L -mc -w" ../bin/wc + +./test-common.sh uniq "" ../bin/uniq +./test-common.sh uniq "-c" ../bin/uniq +./test-common.sh uniq "--count" ../bin/uniq + +# These tests are run during PASH_TOP/scripts/run_tests.sh +# Make sure to build the aggregators using PASH_TOP/scripts/setup-pash.sh first +# +# More tests can be added like this: +# ./test-common.sh cmd args agg +# where +# cmd - is a shell command like uniq +# args - are arguements like -c +# agg - is an aggregator like ./uniq-c diff --git a/test/failing/test-exclam.sh b/test/failing/test-exclam.sh new file mode 100755 index 0000000..8fb0eee --- /dev/null +++ b/test/failing/test-exclam.sh @@ -0,0 +1,3 @@ +#!/bin/sh +echo "!" + diff --git a/test/failing/test-linux.sh b/test/failing/test-linux.sh new file mode 100755 index 0000000..f7bd59b --- /dev/null +++ b/test/failing/test-linux.sh @@ -0,0 +1,57 @@ +./test-common.sh grep "'\.'" ../bin/grep +./test-common.sh grep "'[A-Z]'" ../bin/grep +./test-common.sh grep "'x'" ../bin/grep +./test-common.sh grep "'Bell'" ../bin/grep +./test-common.sh grep "-c '^[A-Z]'" ../bin/grep +./test-common.sh grep "-c '^....$'" ../bin/grep +./test-common.sh grep "gz" ../bin/grep +./test-common.sh grep "1969" ../bin/grep +./test-common.sh grep "-vi '[aeiou]'" ../bin/grep +./test-common.sh grep "-vc 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-v '^0$'" ../bin/grep +./test-common.sh grep "-v '[KQRBN]'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$'" ../bin/grep +./test-common.sh grep "-i '^[^aeiou]*[aeiou][^aeiou]*$'" ../bin/grep +./test-common.sh grep "-c 'light.\*light.\*light'" ../bin/grep +./test-common.sh grep "-c 'light.\*light'" ../bin/grep +./test-common.sh grep "'print'" ../bin/grep +./test-common.sh grep "'light.\*light'" ../bin/grep +./test-common.sh grep "'\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4'" ../bin/grep +./test-common.sh grep "'[KQRBN]'" ../bin/grep +./test-common.sh grep "'UNIX'" ../bin/grep +./test-common.sh grep "'AT&T'" ../bin/grep + +./test-common.sh tr "'[a-z]' '\n'" ../bin/tr +./test-common.sh tr "A-Z a-z" ../bin/tr +./test-common.sh tr "-c '[A-Z]' '\n'" ../bin/tr +./test-common.sh tr "--complement '[1-9]\n*' '[a-z][A-Z]" ../bin/tr +./test-common.sh tr "--complement -t '[1-9]\n*' '[a-z][A-Z]" ../bin/tr +./test-common.sh tr "-d '\n'" ../bin/tr +./test-common.sh tr "-tcd '[1-9][a-z][A-Z]\n'" ../bin/tr + +./test-common.sh wc "" ../bin/wc +./test-common.sh wc "-l" ../bin/wc +./test-common.sh wc "-w" ../bin/wc +./test-common.sh wc "-c" ../bin/wc +./test-common.sh wc "-m" ../bin/wc +./test-common.sh wc "-L" ../bin/wc +./test-common.sh wc "-lcm" ../bin/wc +./test-common.sh wc "-mlw" ../bin/wc +./test-common.sh wc "-mLc" ../bin/wc +./test-common.sh wc "-L -mc -w" ../bin/wc +./test-common.sh wc "--bytes -c --chars -L" ../bin/wc +./test-common.sh wc "-L --lines --words" ../bin/wc + +./test-common.sh uniq "" ../bin/uniq +./test-common.sh uniq "-c" ../bin/uniq +./test-common.sh uniq "--count" ../bin/uniq + +# These tests are run during PASH_TOP/scripts/run_tests.sh +# Make sure to build the aggregators using PASH_TOP/scripts/setup-pash.sh first +# +# More tests can be added like this: +# ./test-common.sh cmd args agg +# where +# cmd - is a shell command like uniq +# args - are arguements like -c +# agg - is an aggregator like ./uniq-c diff --git a/test/failing/test-shlex-aux.sh b/test/failing/test-shlex-aux.sh new file mode 100644 index 0000000..50857ed --- /dev/null +++ b/test/failing/test-shlex-aux.sh @@ -0,0 +1,9 @@ +comment_fun() +{ + cat > /dev/null #Consume data from pipe so writers don't get SIGPIPE +} + +bad_quote_fun() +{ + echo ${asf"asd} +} diff --git a/test/failing/test_JSON_to_shell2.sh b/test/failing/test_JSON_to_shell2.sh new file mode 100644 index 0000000..45c99f9 --- /dev/null +++ b/test/failing/test_JSON_to_shell2.sh @@ -0,0 +1,62 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=/pash/compiler/parser/parse_to_json.native +JSON_TO_SHELL_OCAML=/pash/compiler/parser/json_to_shell.native +JSON_TO_SHELL_C=./json_to_shell2 + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json.$$ +if [ $? -ne 0 ] +then + echo "INVALID_INPUT_1: '$testFile' | Unable to run '$SHELL_TO_JSON_OCAML' on '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "INVALID_INPUT_2: '$testFile' | Unable to run '$JSON_TO_SHELL_OCAML' on '/tmp/json.$$'" + exit 1 +fi + +"$JSON_TO_SHELL_C" < /tmp/json.$$ > /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + echo "ABORT: '$testFile' | Unable to run '$JSON_TO_SHELL_C' on '/tmp/json.$$'" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_c.$$ + if [ $? -ne 0 ] + then + echo "FAIL: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + else + echo "FAIL_WHITESPACE: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/json.$$ /tmp/rt_ocaml.$$ /tmp/rt_c.$$" diff --git a/test/failing/test_ast2shell_py.sh b/test/failing/test_ast2shell_py.sh new file mode 100644 index 0000000..a765aec --- /dev/null +++ b/test/failing/test_ast2shell_py.sh @@ -0,0 +1,66 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=/pash/compiler/parser/parse_to_json.native +JSON_TO_SHELL_OCAML=/pash/compiler/parser/json_to_shell.native + +RT_PY="rt.py" + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" + exit 1 +fi + +# python3 "$RT_PY" < "$testFile" > /tmp/rt_py.$$ +python3 "$RT_PY" "$testFile" > /tmp/rt_py.$$ +if [ $? -ne 0 ] +then + echo "ABORT: '$testFile'" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_py.$$ > /dev/null +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_py.$$ > /dev/null + if [ $? -ne 0 ] + then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_py.$$ + echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" + else + diff /tmp/rt_ocaml.$$ /tmp/rt_py.$$ + echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_py.$$" diff --git a/test/failing/test_evaluation_scripts.sh b/test/failing/test_evaluation_scripts.sh new file mode 100755 index 0000000..18fa408 --- /dev/null +++ b/test/failing/test_evaluation_scripts.sh @@ -0,0 +1,233 @@ +#!/bin/bash +# time: print real in seconds, to simplify parsing +## Necessary to set PASH_TOP +cd $(dirname $0) +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} +export DEBUG=0 +export PASH_LOG=1 +# export DEBUG=1 # Uncomment to print pash output +## Determines whether the experimental pash flags will be tested. +## By default they are not. +export EXPERIMENTAL=0 +for item in $@ +do + if [ "--debug" == "$item" ] || [ "-d" == "$item" ]; then + export DEBUG=1 + fi + if [ "--no-pash-log" == "$item" ]; then + export PASH_LOG=0 + fi + if [ "--experimental" == "$item" ]; then + export EXPERIMENTAL=1 + fi +done + +microbenchmarks_dir="${PASH_TOP}/evaluation/tests" +intermediary_dir="${PASH_TOP}/evaluation/tests/test_intermediary" +test_results_dir="${PASH_TOP}/evaluation/tests/results" +results_time="$test_results_dir/results.time" +results_time_bash=${results_time}_bash +results_time_pash=${results_time}_pash + +echo "Deleting eager intermediate files..." +rm -rf "$test_results_dir" +rm -rf "$intermediary_dir" +mkdir -p $intermediary_dir +mkdir -p "$test_results_dir" + +echo "Generating inputs..." +cd "$microbenchmarks_dir/input" +./setup.sh +cd - + +n_inputs=( + 2 + 8 +) + +if [ "$EXPERIMENTAL" -eq 1 ]; then + configurations=( + # "" # Commenting this out since the tests take a lot of time to finish + "--r_split" + "--dgsh_tee" + # "--r_split --dgsh_tee" + # "--speculation quick_abort" + "--parallel_pipelines" + ) +else + configurations=( + "--r_split --dgsh_tee --parallel_pipelines --profile_driven" + ) +fi + + +## Tests where the compiler will not always succeed (e.g. because they have mkfifo) +script_microbenchmarks=( + diff # (quick-abort) BUG: Might have to do with the named pipes, and the fact that they are reused for parallel and sequential script. + set-diff # TODO: Handle redirection after reduce + export_var_script # Tests whether exported variables in the scripts that are processed by PaSh runtime are visible to the rest of the script. + comm-par-test # Small comm test to ensure non-parallelizability + comm-par-test2 # Small comm test with input redirection and hyphen + tee_web_index_bug # Tests a tee bug from web index + fun-def # Tests whether PaSh can handle a simple function definition + bigrams # One-liner + spell-grep # Spell variant with `grep -f` instead of `comm` +) + +pipeline_microbenchmarks=( + grep # One-liner + minimal_sort # One-liner + minimal_grep # One-liner + topn # One-liner + wf # One-liner + spell # One-liner + shortest_scripts # One-liner + alt_bigrams # One-liner + deadlock_test # Test to check deadlock prevention using drain_stream + double_sort # Checks maximum peformance gains from split + no_in_script # Tests whether a script can be executed by our infrastructure without having its input in a file called $IN + for_loop_simple # Tests whether PaSh can handle a for loop where the body is parallelizable + minimal_grep_stdin # Tests whether PaSh can handle a script that reads from stdin + micro_10 # A small version of the pipeline above for debugging. + sed-test # Tests all sed occurences in our evaluation to make sure that they work + tr-test # Tests all possible behaviors of tr that exist in our evaluation + grep-test # Tests some interesting grep invocations + ann-agg # Tests custom aggregators in annotations + # # # # micro_1000 # Not being run anymore, as it is very slow. Tests whether the compiler is fast enough. It is a huge pipeline without any computation. +) + + + +execute_pash_and_check_diff() { + TIMEFORMAT="%3R" # %3U %3S" + if [ "$DEBUG" -eq 1 ]; then + { time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2> >(tee -a "${pash_time}" >&2) && + diff -s "$seq_output" "$pash_output" | head | tee -a "${pash_time}" >&2 + else + + { time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2>> "${pash_time}" && + b=$(cat "$pash_time"); + test_diff_ec=$(cmp -s "$seq_output" "$pash_output" && echo 0 || echo 1) + # differ + script=$(basename $script_to_execute) + if [ $test_diff_ec -ne 0 ]; then + c=$(diff -s "$seq_output" "$pash_output" | head) + echo "$c$b" > "${pash_time}" + echo "$script are not identical" >> $test_results_dir/result_status + else + echo "Files $seq_output and $pash_output are identical" > "${pash_time}" + echo "$script are identical" >> $test_results_dir/result_status + fi + + fi +} + +execute_tests() { + assert_correctness="$1" + microbenchmarks=("${@:2}") + + microbenchmark_configs=( ) + for i in "${!microbenchmarks[@]}"; do + all_flags=${test_flags[@]} + microbenchmark_configs[$i]="${microbenchmarks[$i]};${all_flags// /;}" + done + + ## This is almost the same loop as the one in execute_evaluation_scripts + for microbenchmark_config in "${microbenchmark_configs[@]}"; do + IFS=";" read -r -a flags <<< "${microbenchmark_config}" + microbenchmark=${flags[0]} + echo "Executing test: $microbenchmark" + # Execute the sequential script on the first run only + + prefix="${microbenchmarks_dir}/${microbenchmark}" + + export seq_output="${intermediary_dir}/${microbenchmark}_seq_output" + seq_time="$test_results_dir/${microbenchmark}_seq.time" + + export script_to_execute="${prefix}.sh" + env_file="${prefix}_env_test.sh" + funs_file="${prefix}_funs.sh" + input_file="${prefix}_test.in" + + if [ -f "$env_file" ]; then + . $env_file + vars_to_export=$(cut -d= -f1 $env_file) + if [ ! -z "$vars_to_export" ]; then + export $vars_to_export + fi + else + echo "|-- Does not have env file" + fi + + ## Export necessary functions + if [ -f "$funs_file" ]; then + source $funs_file + fi + + ## Redirect the input if there is an input file + stdin_redir="/dev/null" + if [ -f "$input_file" ]; then + stdin_redir="$(cat "$input_file")" + echo "|-- Has input file: $stdin_redir" + fi + + TIMEFORMAT="${microbenchmark%%.*}:%3R" # %3U %3S" + echo -n "|-- Executing the script with bash..." + { time /bin/bash "$script_to_execute" > $seq_output ; } \ + < "$stdin_redir" 2>> "${seq_time}" + echo " exited with $?" + tail -n1 ${seq_time} >> ${results_time_bash} + for conf in "${configurations[@]}"; do + for n_in in "${n_inputs[@]}"; do + echo "|-- Executing with pash --width ${n_in} ${conf}..." + export pash_time="${test_results_dir}/${microbenchmark}_${n_in}_distr_$(echo ${conf} | tr -d ' ').time" + export pash_output="${intermediary_dir}/${microbenchmark}_${n_in}_pash_output" + export script_conf=${microbenchmark}_${n_in} + echo '' > "${pash_time}" + # do we need to write the PaSh output ? + cat $stdin_redir | + execute_pash_and_check_diff -d $PASH_LOG $assert_correctness ${conf} --width "${n_in}" --output_time $script_to_execute + tail -n1 "${pash_time}" >> "${results_time_pash}_${n_in}" + done + done + done +} + +execute_tests "" "${script_microbenchmarks[@]}" +execute_tests "--assert_compiler_success" "${pipeline_microbenchmarks[@]}" + +#cat ${results_time} | sed 's/,/./' > /tmp/a +#cat /tmp/a | sed 's/@/,/' > ${results_time} + + +if type lsb_release >/dev/null 2>&1 ; then + distro=$(lsb_release -i -s) +elif [ -e /etc/os-release ] ; then + distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) +fi + +distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') +# now do different things depending on distro +case "$distro" in + freebsd*) + # change sed to gsed + sed () { + gsed $@ + } + ;; + *) + ;; +esac + +echo "group,Bash,Pash2,Pash8" > ${results_time} +paste -d'@' $test_results_dir/results.time_* | sed 's\,\.\g' | sed 's\:\,\g' | sed 's\@\,\g' >> ${results_time} + +#echo "Below follow the identical outputs:" +#grep "are identical" "$test_results_dir"/result_status | awk '{print $1}' + +echo "Below follow the non-identical outputs:" +grep "are not identical" "$test_results_dir"/result_status | awk '{print $1}' + +TOTAL_TESTS=$(cat "$test_results_dir"/result_status | wc -l) +PASSED_TESTS=$(grep -c "are identical" "$test_results_dir"/result_status) +echo "Summary: ${PASSED_TESTS}/${TOTAL_TESTS} tests passed." diff --git a/test/failing/test_parse_to_JSON2.sh b/test/failing/test_parse_to_JSON2.sh new file mode 100644 index 0000000..5edc87f --- /dev/null +++ b/test/failing/test_parse_to_JSON2.sh @@ -0,0 +1,75 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=../parse_to_json.native + +PRETTYPRINT_JSON=./prettyprint_json + +SHELL_TO_JSON_C=./parse_to_json2 + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +json_ocaml="/tmp/json_ocaml.$$" +json_ocaml_pretty="/tmp/json_ocaml_pretty.$$" +json_c="/tmp/json_c.$$" + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > "${json_ocaml}" +if [ $? -ne 0 ] +then + echo "INVALID_INPUT: '$testFile' | Unable to run '$SHELL_TO_JSON_OCAML' on '$testFile'" + exit 1 +fi + +"$SHELL_TO_JSON_C" < "$testFile" > "${json_c}" +if [ $? -ne 0 ] +then + echo "ABORT: '$testFile' | Unable to run '$SHELL_TO_JSON_C' on '$testFile'" + exit 1 +fi + + +diff "${json_ocaml}" "${json_c}" > /dev/null +if [ $? -ne 0 ] +then + for f in "${json_ocaml}" "${json_c}" + do + "$PRETTYPRINT_JSON" < "${f}" > "${f}.pretty" + if [ $? -ne 0 ] + then + echo "PRETTYPRINT_FAIL: '$testFile' | Unable to run '$PRETTYPRINT_JSON' on '${f}'" + exit 1 + fi + done + + diff -w "${json_ocaml}.pretty" "${json_c}.pretty" > /dev/null + if [ $? -ne 0 ] + then + diff -w "${json_ocaml}.pretty" "${json_c}.pretty" + echo "FAIL: '$testFile' | ${json_ocaml} ${json_c} ${json_ocaml}.pretty ${json_c}.pretty" + else + diff "${json_ocaml}" "${json_c}" + echo "FAIL_WHITESPACE: '$testFile' | ${json_ocaml} ${json_c} ${json_ocaml}.pretty ${json_c}.pretty" + fi + exit 1 +fi + +echo "PASS: '$testFile' | ${json_ocaml} ${json_c} ${json_ocaml}.pretty ${json_c}.pretty" diff --git a/test/failing/test_rt.sh b/test/failing/test_rt.sh new file mode 100644 index 0000000..81f67f3 --- /dev/null +++ b/test/failing/test_rt.sh @@ -0,0 +1,71 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=../parse_to_json.native +JSON_TO_SHELL_OCAML=../json_to_shell.native + +SHELL_TO_JSON_C=./parse_to_json2 +JSON_TO_SHELL_C=./json_to_shell2 + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" + exit 1 +fi + +"$SHELL_TO_JSON_C" < "$testFile" > /tmp/json_c.$$ +if [ $? -ne 0 ] +then + echo "ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_C" < /tmp/json_c.$$ > /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + echo "ABORT_2: '$testFile' | /tmp/json_c.$$" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_c.$$ +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_c.$$ + if [ $? -ne 0 ] + then + echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + else + echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_c.$$" diff --git a/test/failing/test_rt_py.sh b/test/failing/test_rt_py.sh new file mode 100644 index 0000000..a95e0b3 --- /dev/null +++ b/test/failing/test_rt_py.sh @@ -0,0 +1,65 @@ +#!/bin/sh + + +SHELL_TO_JSON_OCAML=../parse_to_json.native +JSON_TO_SHELL_OCAML=../json_to_shell.native + +RT_PYTHON=./ceda_rt.py + + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + + +testFile="$1" + + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + + +"$SHELL_TO_JSON_OCAML" < "$testFile" > /tmp/json_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_1: '$testFile'" + exit 1 +fi + +"$JSON_TO_SHELL_OCAML" < /tmp/json_ocaml.$$ > /tmp/rt_ocaml.$$ +if [ $? -ne 0 ] +then + echo "REF_ABORT_2: '$testFile' | /tmp/json_ocaml.$$" + exit 1 +fi + +python3 "$RT_PYTHON" < "$testFile" > /tmp/rt_python.$$ +if [ $? -ne 0 ] +then + echo "ABORT_1: '$testFile'" + exit 1 +fi + +diff /tmp/rt_ocaml.$$ /tmp/rt_python.$$ > /dev/null +if [ $? -ne 0 ] +then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_python.$$ > /dev/null + if [ $? -ne 0 ] + then + diff -w /tmp/rt_ocaml.$$ /tmp/rt_python.$$ + echo "FAIL: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" + else + diff /tmp/rt_ocaml.$$ /tmp/rt_python.$$ + echo "FAIL_WHITESPACE: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" + fi + exit 1 +fi + +echo "PASS: '$testFile' | /tmp/rt_ocaml.$$ /tmp/rt_python.$$" diff --git a/test/failing/timing-JSON.sh b/test/failing/timing-JSON.sh new file mode 100644 index 0000000..00485e3 --- /dev/null +++ b/test/failing/timing-JSON.sh @@ -0,0 +1,27 @@ +#!/bin/sh + + +input_script='/pash/compiler/parser/libdash/ltmain.sh' + + +if [ $# -eq 1 ] +then + input_script="$1" +fi + + +echo "Input script: $input_script" +echo + +echo "OCaml:" +time (../parse_to_json.native "$input_script" | tee /tmp/json.$$ | md5sum) +echo + +echo "C:" +time (./parse_to_json2 "$input_script" | tee /tmp/json.$$ | md5sum) +echo + +echo "Python (ROUND-TRIP):" +time (python3 ceda_rt.py "$input_script" | md5sum) +echo + diff --git a/test/failing/timing.sh b/test/failing/timing.sh new file mode 100644 index 0000000..1d79364 --- /dev/null +++ b/test/failing/timing.sh @@ -0,0 +1,31 @@ +#!/bin/sh + + +input_script='/pash/compiler/parser/libdash/ltmain.sh' + + +if [ $# -eq 1 ] +then + input_script="$1" +fi + + +echo "Input script: $input_script" +echo + +echo "OCaml (dash C AST -> libdash OCaml AST -> JSON -> Pash Python AST -> JSON -> shell:" +time (../parse_to_json.native "$input_script" > /tmp/json.$$; cat /tmp/json.$$ | ../json_to_shell.native | md5sum) +echo + +echo "C (dash C AST -> libdash C AST -> JSON -> Pash Python AST -> JSON -> shell):" +time (./parse_to_json2 "$input_script" > /tmp/json.$$ 2>/dev/null; cat /tmp/json.$$ | ./json_to_shell2 | md5sum) +echo + +echo "Python (dash C AST -> libdash C AST -> JSON -> Pash Python AST -> JSON -> shell):" +time (python3 ./parse_to_json2.py "$input_script" > /tmp/json.$$ 2>/dev/null; cat /tmp/json.$$ | python3 ./json_to_shell2.py | md5sum) +echo + +echo "Python (dash C AST -> Pash Python AST -> shell):" +time (python3 ceda_rt.py "$input_script" | md5sum) +echo + diff --git a/test/failing/unzip-1.sh b/test/failing/unzip-1.sh new file mode 100644 index 0000000..df6ae8f --- /dev/null +++ b/test/failing/unzip-1.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# https://gist.github.com/noamross/86fba413e0769069e3955d1c9bc530ae +funzip $1| # uncompress first file in zip +tr -d '\000' | #remove null characters +sed "/^\s*$/d; s/ \{1,\}\t/\t/g; s/\t \{1,\}/\t/g; s/\r//" | #removes empty lines, whitespace around tabs, extra newlines +cut -s -f 1,3,4,5,6,8,12,13,14,15,16,17,18,19,20,21,23,24,25,26,34,35,36,38,40,42,44,45,46,85,86,87,88,89 #| #only select certain columns +pv -N Process -c | +gzip -9 | +pv -N Compress -c > $1.gz diff --git a/test/failing/up.sh b/test/failing/up.sh new file mode 100755 index 0000000..b817491 --- /dev/null +++ b/test/failing/up.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env sh + +# clone and setup pash +# N.b. This is a .sh script + +set -e + +# will install dependencies locally. +PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') +URL='https://github.com/binpash/pash/archive/refs/heads/main.zip' +VERSION='latest' +DL=$(command -v curl >/dev/null 2>&1 && echo curl || echo 'wget -qO-') + +cmd_exists () { + command -v $1 >/dev/null 2>&1 && echo 'true' || echo 'false'; +} + +if [ "$PLATFORM" = "darwin" ]; then + echo 'PaSh is not yet well supported on OS X' + exit 1 +fi + +set +e +git clone git@github.com:binpash/pash.git +if [ $? -ne 0 ]; then + echo 'SSH clone failed; attempting HTTPS' + git clone https://github.com/andromeda/pash.git +fi +set -e + +cd pash/scripts +# git checkout s3 # FIXME only for testing while PR is up + +if [ $(groups $(whoami) | grep -c "sudo\|root\|admin") -ge 1 ]; then + # only run this if we are in the sudo group (or it's doomed to fail) + bash distro-deps.sh +fi +bash setup-pash.sh diff --git a/test/failing/utils.sh b/test/failing/utils.sh new file mode 100755 index 0000000..4c5974b --- /dev/null +++ b/test/failing/utils.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash + +# #Check that we are in the appropriate directory where setup.sh is +# #https://stackoverflow.com/a/246128 +# DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# echo "changing to $DIR to run setup.sh" +# cd $DIR + +# another solution for capturing HTTP status code +# https://superuser.com/a/590170 + +eexit(){ + echo $1 'please email pash-devs@googlegroups.com' + exit 1 +} + +nargs(){ + echo $# $1 $2 +} + +rm-files(){ + echo "${@}" + rm -r "${@}" + exit 0 +} + +append_nl_if_not(){ + ## Adds a newline at the end of a file if it doesn't already end in a newline. + ## Used to prepare inputs for PaSh. + if [ -z "$1" ]; then + echo "No file argument given!" + exit 1 + else + if [ ! -f "$1" ]; then + echo "File $1 doesn't exist!" + exit 1 + else + tail -c 1 "$1" | od -ta | grep -q nl + if [ $? -eq 1 ]; then + echo >> "$1" + fi + fi + fi +} + +install_deps_source_setup() { + # move to the input directory + cd input/ + # check if there are dependencies + if [ -e install-deps.sh ]; then + echo "Installing dependencies" + bash install-deps.sh + fi + # source the setup file + # it contains the fetch dataset function + # and the export variable function for IN, IN_PRE + source setup.sh + # fetch the dataset + setup_dataset $1 > /dev/null + cd .. +} +######################### +# The command line help # +######################### +usage() { + echo "Usage: `basename $0` [option...] -- shell script to build PaSh" + echo + echo " -h, --help Show this help message" + echo " -o, --opt-agg Install g++-10 and switch to it as main compiler. Build the optimized c++ aggregators (run with sudo)" + echo " -s, --show-deps Show all the required dependencies (does not setup/deploy PaSh nor its dependencies)" + echo " -e, --install-eval Install all the dependencies needed for reproducing the evaluation figures (uses sudo, only for Ubuntu/Debian currently)" + echo + exit 1 +} + +########################################## +# Install all the required libraries and # +# dependencies for PaSh evaluation # +########################################## +install_eval_deps() { + echo "Installing evaluation dependencies (needs sudo)" + # needed for majority of the benchmarks (not available in docker instances) + sudo apt-get install unzip + paths="$(find $PASH_TOP/evaluation/benchmarks -name install-deps.sh)" + for f in $(echo $paths); do + path=$(dirname $(readlink -f $f)) + cd $path + bash install-deps.sh + cd - > /dev/null + done + echo "Generating PDF plots of the evaluation results is optional and requires R-packages" + echo "Follow Installation Guide from: $PASH_TOP/evaluation/eval_script/README.md" +} + +########################################## +# parse and read the command line args # +########################################## +read_cmd_args() { + # Transform long options to short ones + for arg in "$@"; do + shift + case "$arg" in + "--opt-agg") set -- "$@" "-o" ;; + "--show-deps") set -- "$@" "-s" ;; + "--install-eval") set -- "$@" "-e" ;; + "--help") set -- "$@" "-h" ;; + *) set -- "$@" "$arg" + esac + done + + while getopts 'opsreh' opt; do + case $opt in + # passthrough the variable to the Makefile for libdash + o) export optimized_agg_flag=1 ;; + s) export show_deps=1 ;; + r) export show_eval_deps=1 ;; + e) export install_eval=1 ;; + h) usage >&2 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac + done +} diff --git a/test/failing/wc.2.sh b/test/failing/wc.2.sh new file mode 100755 index 0000000..deb0db2 --- /dev/null +++ b/test/failing/wc.2.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Part of a distributed-`wc` wrapper, merging two `wc` results +# FIXME needs correct padding + +paste -d '+' + <(cat "$1" | + wc | + tr -s ' ' '\n' | + tail -n +2) + <(cat "$2" | + wc | + tr -s ' ' '\n' | + tail -n +2) | + bc | + tr -s '\n' ' ' | + sed 's/^/ /' | + sed 's/$/\ /' diff --git a/test/failing/web-log-stats.sh b/test/failing/web-log-stats.sh new file mode 100755 index 0000000..a746610 --- /dev/null +++ b/test/failing/web-log-stats.sh @@ -0,0 +1,147 @@ +#!/bin/sh +# Automatically generated file +# Source file example/web-log-stats.sh +#!/usr/bin/env sgsh -s /bin/bash +# +# SYNOPSIS Web log statistics +# DESCRIPTION +# Provides continuous statistics over web log stream data. +# Demonstrates stream processing. +# Provide as an argument either the name of a growing web log file +# or -s and a static web log file, which will be processed at a rate +# of about 10 lines per second. +# +# Copyright 2013 Diomidis Spinellis +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Size of the window to report in seconds +WINDOW=10 +WINDOW_OLD=$(expr $WINDOW \* 2) + +# Update interval in seconds +UPDATE=2 + +# Print the sum of the numbers read from the standard input +sum() +{ + awk '{ sum += $1 } END {print sum}' +} + +# Print the rate of change as a percentage +# between the first (old) and second (new) value +change() +{ + # Can't use bc, because we have numbers in scientific notation + awk "END {OFMT=\"%.2f%%\"; print ($2 - $1) * 100 / $1}" &2 + + # Stop key-value stores + + # Kill processes we have launched in the background + kill $SGPID 2>/dev/null + + # Remove temporary directory + rm -rf "$SGDIR" + + # Propagate real signals and exit with non-0 + if [ $SIGNAL != EXIT ] + then + trap - $SIGNAL EXIT + kill -s $SIGNAL $$ + fi + + # Exit with the original exit value + exit + + } + + for sig in HUP INT QUIT TERM EXIT + do + trap "cleanup $sig" $sig + done + + mkdir $SGDIR + cat <&3 3<&- >$SGDIR/npi-0.0.0 +ln $SGDIR/npi-0.0.0 $SGDIR/npi-0.1.0 +page=$( { awk -Winteractive '{print $7}' +} <$SGDIR/npi-0.0.0 ) + { awk -Winteractive '{print $10}' +} <$SGDIR/npi-0.1.0 >$SGDIR/npi-1.0.0 +ln $SGDIR/npi-1.0.0 $SGDIR/npi-1.1.0 +ln $SGDIR/npi-1.0.0 $SGDIR/npi-1.2.0 +ln $SGDIR/npi-1.0.0 $SGDIR/npi-1.3.0 +total_bytes=$( { awk -Winteractive '{ s += $1; print s}' +} <$SGDIR/npi-1.0.0 ) +total_pages=$( { awk -Winteractive '{print ++n}' +} <$SGDIR/npi-1.1.0 ) +bytes=$( { +} <$SGDIR/npi-1.2.0 ) +bytes_old=$( { +} <$SGDIR/npi-1.3.0 ) + +# Gather the results + # Produce periodic reports + while : + do + WINDOW_PAGES=$(echo ${bytes} -c | wc -l) + WINDOW_BYTES=$(echo ${bytes} -c | sum ) + WINDOW_PAGES_OLD=$(echo ${bytes_old} -c | wc -l) + WINDOW_BYTES_OLD=$(echo ${bytes_old} -c | sum) + clear + cat </dev/null) || call-with-active-ec2 "$@" diff --git a/test/pash_tests/1.sh b/test/pash_tests/1.sh new file mode 100755 index 0000000..5cfe0b9 --- /dev/null +++ b/test/pash_tests/1.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.0: extract the last name +cat $IN1 | cut -d ' ' -f 2 + diff --git a/test/pash_tests/10.sh b/test/pash_tests/10.sh new file mode 100755 index 0000000..30d1f6c --- /dev/null +++ b/test/pash_tests/10.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.4: histogram of Belle's captures (-pawns) by each type of piece +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep '[KQRBN]' | cut -c 1-1 | sort | uniq -c | sort -nr + diff --git a/test/pash_tests/11.sh b/test/pash_tests/11.sh new file mode 100755 index 0000000..46954d8 --- /dev/null +++ b/test/pash_tests/11.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.5: 4.4 + pawns +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort | uniq -c | sort -nr + diff --git a/test/pash_tests/12.sh b/test/pash_tests/12.sh new file mode 100755 index 0000000..8bbb75d --- /dev/null +++ b/test/pash_tests/12.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.6: piece used the most by Belle +cat $IN4 | tr ' ' '\n' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort -r | uniq | head -n 3 | tail -n 1 + diff --git a/test/pash_tests/13.sh b/test/pash_tests/13.sh new file mode 100755 index 0000000..6ba69f7 --- /dev/null +++ b/test/pash_tests/13.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN5=$IN_PRE/5.txt +# 5.1: extract hello world +cat $IN5 | grep 'print' | cut -d "\"" -f 2 | cut -c 1-12 + diff --git a/test/pash_tests/14.sh b/test/pash_tests/14.sh new file mode 100755 index 0000000..b7b54a8 --- /dev/null +++ b/test/pash_tests/14.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN6=$IN_PRE/6.txt +# 6.1: order the bodies by how easy it would be to land on them in Thompson's Space Travel game when playing at the highest simulation scale +cat $IN6 | awk "{print \$2, \$0}" | sort -nr | cut -d ' ' -f 2 + diff --git a/test/pash_tests/15.sh b/test/pash_tests/15.sh new file mode 100755 index 0000000..b23c044 --- /dev/null +++ b/test/pash_tests/15.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN7=$IN_PRE/7.txt +# 7.1: identify number of AT&T unix versions +cat $IN7 | cut -f 1 | grep 'AT&T' | wc -l + diff --git a/test/pash_tests/16.sh b/test/pash_tests/16.sh new file mode 100755 index 0000000..bbcebc2 --- /dev/null +++ b/test/pash_tests/16.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN7=$IN_PRE/7.txt +# 7.2: find most frequently occurring machine +cat $IN7 | cut -f 2 | sort -n | uniq -c | sort -nr | head -n 1 | tr -s ' ' '\n' | tail -n 1 + diff --git a/test/pash_tests/17.sh b/test/pash_tests/17.sh new file mode 100755 index 0000000..289baff --- /dev/null +++ b/test/pash_tests/17.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN7=$IN_PRE/7.txt +# 7.3: all the decades in which a unix version was released +cat $IN7 | cut -f 4 | sort -n | cut -c 3-3 | uniq | sed s/\$/'0s'/ + diff --git a/test/pash_tests/18.sh b/test/pash_tests/18.sh new file mode 100755 index 0000000..260ef13 --- /dev/null +++ b/test/pash_tests/18.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.1: count unix birth-year +cat $IN8 | tr ' ' '\n' | grep 1969 | wc -l + diff --git a/test/pash_tests/19.sh b/test/pash_tests/19.sh new file mode 100755 index 0000000..f36dafe --- /dev/null +++ b/test/pash_tests/19.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.2: find Bell Labs location where Dennis Ritchie had his office +cat $IN8 | grep 'Bell' | awk 'length <= 45' | cut -d ',' -f 2 | awk "{\$1=\$1};1" + diff --git a/test/pash_tests/1_1.sh b/test/pash_tests/1_1.sh new file mode 100755 index 0000000..a4eadbe --- /dev/null +++ b/test/pash_tests/1_1.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# tag: count_words + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/1_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/2.sh b/test/pash_tests/2.sh new file mode 100755 index 0000000..2f95466 --- /dev/null +++ b/test/pash_tests/2.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.1: extract names and sort +cat $IN1 | cut -d ' ' -f 2 | sort + diff --git a/test/pash_tests/2.unrtf.sh b/test/pash_tests/2.unrtf.sh new file mode 100755 index 0000000..ae19227 --- /dev/null +++ b/test/pash_tests/2.unrtf.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +#tag: rtf-to-txt +set -e +IN=${RTF:-$PASH_TOP/evaluation/benchmarks/aliases/input/rtf} +OUT=${OUT:-PASH_TOP/evaluation/benchmarks/aliases/input/out} +find $IN -name '*.rtf' | xargs -I {} unrtf {} --text > /dev/null diff --git a/test/pash_tests/20.sh b/test/pash_tests/20.sh new file mode 100755 index 0000000..50cf615 --- /dev/null +++ b/test/pash_tests/20.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.3: find names of the four people most involved with unix +cat $IN8 | grep '(' | cut -d '(' -f 2 | cut -d ')' -f 1 | head -n 1 + diff --git a/test/pash_tests/21.sh b/test/pash_tests/21.sh new file mode 100755 index 0000000..c0fc2c5 --- /dev/null +++ b/test/pash_tests/21.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# 8.4: find longest words without hyphens +cat $IN8 | tr -c "[a-z][A-Z]" '\n' | sort | awk "length >= 16" + diff --git a/test/pash_tests/22.sh b/test/pash_tests/22.sh new file mode 100755 index 0000000..6a93fa9 --- /dev/null +++ b/test/pash_tests/22.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN8=$IN_PRE/8.txt +# # 8.5: Find second-most-freq 8-character word(s) without hyphens +# cat $IN8 > /dev/null + diff --git a/test/pash_tests/23.sh b/test/pash_tests/23.sh new file mode 100755 index 0000000..d23500d --- /dev/null +++ b/test/pash_tests/23.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN91=$IN_PRE/9.1.txt +# 9.1: extract the word PORT +cat $IN91 | tr ' ' '\n' | grep '[A-Z]' | tr '[a-z]' '\n' | grep '[A-Z]' | tr -d '\n' | cut -c 1-4 + diff --git a/test/pash_tests/24.sh b/test/pash_tests/24.sh new file mode 100755 index 0000000..94d8229 --- /dev/null +++ b/test/pash_tests/24.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN92=$IN_PRE/9.2.txt +# 9.2: extract the word BELL +cat $IN92 | cut -c 1-1 | tr -d '\n' + diff --git a/test/pash_tests/25.sh b/test/pash_tests/25.sh new file mode 100755 index 0000000..4da223c --- /dev/null +++ b/test/pash_tests/25.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN93=$IN_PRE/9.3.txt +# 9.3: animal that used to decorate the Unix room +cat $IN93 | cut -c 1-2 | tr -d '\n' + diff --git a/test/pash_tests/26.sh b/test/pash_tests/26.sh new file mode 100755 index 0000000..dd3aff0 --- /dev/null +++ b/test/pash_tests/26.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN94=$IN_PRE/9.4.txt +# 9.4: four corners with E centered, for an "X" configuration +cat $IN94 | tr ' ' '\n' | grep "\"" | sed 4d | cut -d "\"" -f 2 | tr -d '\n' + diff --git a/test/pash_tests/27.sh b/test/pash_tests/27.sh new file mode 100755 index 0000000..99a34c6 --- /dev/null +++ b/test/pash_tests/27.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN95=$IN_PRE/9.5.txt +# # 9.5: backwards running clock, in a backwards poem +# cat $IN95 > /dev/null + diff --git a/test/pash_tests/28.sh b/test/pash_tests/28.sh new file mode 100755 index 0000000..89798a7 --- /dev/null +++ b/test/pash_tests/28.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN96=$IN_PRE/9.6.txt +# 9.6: Follow the directions for grep +cat $IN96 | tr ' ' '\n' | grep '[A-Z]' | sed 1d | sed 3d | sed 3d | tr '[a-z]' '\n' | grep '[A-Z]' | sed 3d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/29.sh b/test/pash_tests/29.sh new file mode 100755 index 0000000..2ecf13a --- /dev/null +++ b/test/pash_tests/29.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN97=$IN_PRE/9.7.txt +# 9.7: Four corners +cat $IN97 | sed 2d | sed 2d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/2_1.sh b/test/pash_tests/2_1.sh new file mode 100755 index 0000000..1c0f399 --- /dev/null +++ b/test/pash_tests/2_1.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# tag: merge_upper +# set -e + +# Merge upper and lower counts +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/2_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr '[a-z]' '[A-Z]' | tr -sc '[A-Z]' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/2_2.sh b/test/pash_tests/2_2.sh new file mode 100755 index 0000000..95cf055 --- /dev/null +++ b/test/pash_tests/2_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: count_vowel_seq +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/2_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr 'a-z' '[A-Z]' | tr -sc 'AEIOU' '[\012*]'| sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/3.sh b/test/pash_tests/3.sh new file mode 100755 index 0000000..1c53bca --- /dev/null +++ b/test/pash_tests/3.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.2: extract names and sort +cat $IN1 | head -n 2 | cut -d ' ' -f 2 + diff --git a/test/pash_tests/30.sh b/test/pash_tests/30.sh new file mode 100755 index 0000000..c6f6ccf --- /dev/null +++ b/test/pash_tests/30.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN98=$IN_PRE/9.8.txt +# 9.8: TELE-communications +cat $IN98 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 2d | sed 3d | sed 4d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/31.sh b/test/pash_tests/31.sh new file mode 100755 index 0000000..a564879 --- /dev/null +++ b/test/pash_tests/31.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN99=$IN_PRE/9.9.txt +# 9.9: +cat $IN99 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 1d | sed 2d | sed 3d | sed 5d | tr -c '[A-Z]' '\n' | tr -d '\n' + diff --git a/test/pash_tests/32.sh b/test/pash_tests/32.sh new file mode 100755 index 0000000..dd041df --- /dev/null +++ b/test/pash_tests/32.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN10=$IN_PRE/10.txt +# 10.1: count Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 | wc -l + diff --git a/test/pash_tests/33.sh b/test/pash_tests/33.sh new file mode 100755 index 0000000..07f0fe9 --- /dev/null +++ b/test/pash_tests/33.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN10=$IN_PRE/10.txt +# 10.2: list Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 + diff --git a/test/pash_tests/34.sh b/test/pash_tests/34.sh new file mode 100755 index 0000000..55067fc --- /dev/null +++ b/test/pash_tests/34.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN10=$IN_PRE/10.txt +# 10.3: extract Ritchie's username +cat $IN10 | grep 'Bell' | cut -f 2 | head -n 1 | fmt -w1 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + diff --git a/test/pash_tests/35.sh b/test/pash_tests/35.sh new file mode 100755 index 0000000..421267a --- /dev/null +++ b/test/pash_tests/35.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN11=$IN_PRE/11.txt +# 11.1: year Ritchie and Thompson receive the Hamming medal +cat $IN11 | grep 'UNIX' | cut -f 1 + diff --git a/test/pash_tests/36.sh b/test/pash_tests/36.sh new file mode 100755 index 0000000..cdc3fa8 --- /dev/null +++ b/test/pash_tests/36.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN11=$IN_PRE/11.txt +# 11.2: most repeated first name in the list? +cat $IN11 | cut -f 2 | cut -d ' ' -f 1 | sort | uniq -c | sort -nr | head -n 1 | fmt -w1 | sed 1d + diff --git a/test/pash_tests/3_1.sh b/test/pash_tests/3_1.sh new file mode 100755 index 0000000..9d32b82 --- /dev/null +++ b/test/pash_tests/3_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: sort +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | sort -nr > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/3_2.sh b/test/pash_tests/3_2.sh new file mode 100755 index 0000000..0ce3011 --- /dev/null +++ b/test/pash_tests/3_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: sort_words_by_folding +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | sort -f > ${OUT}/${input} +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/3_3.sh b/test/pash_tests/3_3.sh new file mode 100755 index 0000000..f24f2d9 --- /dev/null +++ b/test/pash_tests/3_3.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: sort_words_by_rhyming.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/3_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort | uniq -c | rev | sort | rev > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/4.gitkernel.sh b/test/pash_tests/4.gitkernel.sh new file mode 100755 index 0000000..89cb678 --- /dev/null +++ b/test/pash_tests/4.gitkernel.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# First command is almost always a generator +set -e +IN=${GIT:-$PASH_TOP/evaluation/benchmarks/aliases/input/linux} + +#FIXME define a complex expression +COMPLEX="" +# linux git +cd ${IN}/linux +git ls-tree --name-only -z -r HEAD | grep -z -Z -E '\.(cc|h|cpp|hpp|c|txt|java)$' | xargs -0 -n1 git blame --line-porcelain | grep ${COMPLEX} + diff --git a/test/pash_tests/4.sh b/test/pash_tests/4.sh new file mode 100755 index 0000000..da460c0 --- /dev/null +++ b/test/pash_tests/4.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN1=$IN_PRE/1.txt +# 1.3: sort top first names +cat $IN1 | cut -d ' ' -f 1 | sort | uniq -c | sort -r + diff --git a/test/pash_tests/4_3.sh b/test/pash_tests/4_3.sh new file mode 100755 index 0000000..0ea61f2 --- /dev/null +++ b/test/pash_tests/4_3.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# tag: bigrams.sh +# set -e + +# Bigrams (contrary to our version, this uses intermediary files) +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/4_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.input.words + tail +2 ${OUT}/${input}.input.words > ${OUT}/${input}.input.nextwords + paste ${OUT}/${input}.input.words ${OUT}/${input}.input.nextwords | sort | uniq -c > ${OUT}/${input}.input.bigrams +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/4_3b.sh b/test/pash_tests/4_3b.sh new file mode 100755 index 0000000..36fcbdf --- /dev/null +++ b/test/pash_tests/4_3b.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#tag: count_trigrams.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/4_3b/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.words + tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords + tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords2 + paste ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 | + sort | uniq -c +} +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.trigrams +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/5.apachelog.sh b/test/pash_tests/5.apachelog.sh new file mode 100755 index 0000000..64247e6 --- /dev/null +++ b/test/pash_tests/5.apachelog.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# fetch hit count for each ip ? +set -e +IN=${IN:-$PASH_TOP/evaluation/benchmarks/aliases/input/} + +# 405 original +# cat ${IN}/apache.log | grep "\->" | grep -o "from [^ ]*" | cut -d ' ' -f2 | sort | uniq -c | sort -nr | less +# FIXME need apache error logs .. +cat ${IN}apache.log | grep -o "from [^ ]*" | cut -d ' ' -f2 | sort | uniq -c | sort -nr diff --git a/test/pash_tests/5.sh b/test/pash_tests/5.sh new file mode 100755 index 0000000..015f384 --- /dev/null +++ b/test/pash_tests/5.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN2=$IN_PRE/2.txt +# 2.1: get all Unix utilities +cat $IN2 | cut -d ' ' -f 4 | tr -d ',' + diff --git a/test/pash_tests/6.msg.sh b/test/pash_tests/6.msg.sh new file mode 100755 index 0000000..36b15a5 --- /dev/null +++ b/test/pash_tests/6.msg.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# First command is almost always a generator +set -e +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/aliases/input/out} + + +# grep -iv ': starting\|kernel: .*: Power Button\|watching system buttons\|Stopped Cleaning Up\|Started Crash recovery kernel' /var/log/messages /var/log/syslog /var/log/* 2> /dev/null | grep -iw 'recover[a-z]*\|power[a-z]*\|shut[a-z ]*down\|rsyslogd\|ups' > /tmp/__shutdown.log && echo 'File written to /tmp__shutdown.log' +# doesn't do much :/ +grep -iv ': starting\|kernel: .*: Power Button\|watching system buttons\|Stopped Cleaning Up\|Started Crash recovery kernel' /var/log/messages /var/log/syslog /var/log/* 2> /dev/null | + grep --regex 'recover[a-z]*\|power[a-z]*\|shut[a-z ]*down\|rsyslogd\|ups' > ${OUT}/shutdown.log diff --git a/test/pash_tests/6.sh b/test/pash_tests/6.sh new file mode 100755 index 0000000..9e2ba9d --- /dev/null +++ b/test/pash_tests/6.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN3=$IN_PRE/3.txt +# 3.1: get lowercase first letter of last names (awk) +cat $IN3 | cut -d ' ' -f 2 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + diff --git a/test/pash_tests/6_1.sh b/test/pash_tests/6_1.sh new file mode 100755 index 0000000..1ea70e4 --- /dev/null +++ b/test/pash_tests/6_1.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# tag: trigram_rec +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +trigrams() { + input=$1 + tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.words + tail +2 ${OUT}/${input}.words > ${OUT}/${input}.nextwords + tail +3 ${OUT}/${input}.words > ${OUT}/${input}.nextwords2 + paste ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 | sort | uniq -c + rm -f ${OUT}/${input}.words ${OUT}/${input}.nextwords ${OUT}/${input}.nextwords2 +} +export -f trigrams + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN"/"$input | grep 'the land of' | trigrams $IN/${input} | sort -nr | sed 5q > ${OUT}/${input}.out0 + cat $IN"/"$input | grep 'And he said' | trigrams $IN/${input} | sort -nr | sed 5q > ${OUT}/${input}.out1 +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_1_1.sh b/test/pash_tests/6_1_1.sh new file mode 100755 index 0000000..d62bec1 --- /dev/null +++ b/test/pash_tests/6_1_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: uppercase_by_token +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -c '^[A-Z]' > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/6_1_2.sh b/test/pash_tests/6_1_2.sh new file mode 100755 index 0000000..2e60990 --- /dev/null +++ b/test/pash_tests/6_1_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: uppercase_by_type +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_1_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u | grep -c '^[A-Z]' > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/6_2.sh b/test/pash_tests/6_2.sh new file mode 100755 index 0000000..09f145d --- /dev/null +++ b/test/pash_tests/6_2.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# tag: four-letter words +# set -e + +# the original script has both versions +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -c '^....$' > ${OUT}/${input}.out0 + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u | grep -c '^....$' > ${OUT}/${input}.out1 +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_3.sh b/test/pash_tests/6_3.sh new file mode 100755 index 0000000..4430419 --- /dev/null +++ b/test/pash_tests/6_3.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: words_no_vowels +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | grep -vi '[aeiou]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_4.sh b/test/pash_tests/6_4.sh new file mode 100755 index 0000000..bfd38a3 --- /dev/null +++ b/test/pash_tests/6_4.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: 1-syllable words +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_4/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat ${IN}/${input} | tr -sc '[A-Z][a-z]' '[\012*]' | grep -i '^[^aeiou]*[aeiou][^aeiou]*$' | sort | uniq -c | sed 5q > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_5.sh b/test/pash_tests/6_5.sh new file mode 100755 index 0000000..d4f8a6f --- /dev/null +++ b/test/pash_tests/6_5.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: 2-syllable words +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_5/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' ' [\012*]' | grep -i '^[^aeiou]*[aeiou][^aeiou]*[aeiou][^aeiou]$' | sort | uniq -c | sed 5q > ${OUT}${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/6_7.sh b/test/pash_tests/6_7.sh new file mode 100755 index 0000000..fa93b9a --- /dev/null +++ b/test/pash_tests/6_7.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# tag: verse_2om_3om_2instances +# set -e +# verses with 2 or more, 3 or more, exactly 2 instances of light. + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/6_7/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | grep -c 'light.\*light' > ${OUT}/${input}.out0 + cat $IN/$input | grep -c 'light.\*light.\*light' > ${OUT}/${input}.out1 + cat $IN/$input | grep 'light.\*light' | grep -vc 'light.\*light.\*light' > ${OUT}/${input}.out2 +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/7.nginx.sh b/test/pash_tests/7.nginx.sh new file mode 100755 index 0000000..7711fd6 --- /dev/null +++ b/test/pash_tests/7.nginx.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# fetch hit count for each ip ? +set -e +IN=${IN:-${PASH_TOP}/evaluation/benchmarks/aliases/input} +# original command tail -10000 /var/log/nginx/access.log | cut -d "" "" -f1 | sort | uniq -c | sort -n | tail -n 30 | sort -nrk 1 | awk +cat ${IN}/access.log | cut -d ' ' -f1 | sort | uniq -c | sort -n | tail -n 30 | sort -nrk 1 diff --git a/test/pash_tests/7.sh b/test/pash_tests/7.sh new file mode 100755 index 0000000..7ad1b97 --- /dev/null +++ b/test/pash_tests/7.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.1: find number of rounds +cat $IN4 | tr ' ' '\n' | grep '\.' | wc -l + diff --git a/test/pash_tests/7_1.sh b/test/pash_tests/7_1.sh new file mode 100755 index 0000000..147eac6 --- /dev/null +++ b/test/pash_tests/7_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: count_morphs +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/7_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | sed 's/ly$/-ly/g' | sed 's/ .*//g' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm ${OUT} diff --git a/test/pash_tests/7_2.sh b/test/pash_tests/7_2.sh new file mode 100755 index 0000000..a0baff3 --- /dev/null +++ b/test/pash_tests/7_2.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# set -e +# tag: count_consonant_sequences + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/7_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr '[a-z]' '[A-Z]' | tr -sc 'BCDFGHJKLMNPQRSTVWXYZ' '[\012*]' | sort | uniq -c > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf ${OUT} diff --git a/test/pash_tests/8.2_1.sh b/test/pash_tests/8.2_1.sh new file mode 100755 index 0000000..b9519c7 --- /dev/null +++ b/test/pash_tests/8.2_1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# tag: vowel_sequences_gr_1K.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.2_1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | tr -sc 'AEIOUaeiou' '[\012*]' | sort | uniq -c | awk "\$1 >= 1000" > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.2_2.sh b/test/pash_tests/8.2_2.sh new file mode 100755 index 0000000..169a0fb --- /dev/null +++ b/test/pash_tests/8.2_2.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# tag: bigrams_appear_twice.sh +# set -e + +# Calculate the bigrams (based on 4_3.sh script) +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.2_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + input=$1 + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' > ${OUT}/${input}.input.words + tail +2 ${OUT}/${input}.input.words > ${OUT}/${input}.input.nextwords + paste ${OUT}/${input}.input.words ${OUT}/${input}.input.nextwords | sort | uniq -c > ${OUT}/${input}.input.bigrams + awk "\$1 == 2 {print \$2, \$3}" ${OUT}/${input}.input.bigrams +} + +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.3_2.sh b/test/pash_tests/8.3_2.sh new file mode 100755 index 0000000..3450bd7 --- /dev/null +++ b/test/pash_tests/8.3_2.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# tag: find_anagrams.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.3_2/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + input=$1 + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}.types + rev < ${OUT}/${input}.types > ${OUT}/${input}.types.rev + sort ${OUT}/${input}.types ${OUT}/${input}.types.rev | uniq -c | awk "\$1 >= 2 {print \$2}" +} + +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.3_3.sh b/test/pash_tests/8.3_3.sh new file mode 100755 index 0000000..f774d83 --- /dev/null +++ b/test/pash_tests/8.3_3.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# tag: compare_exodus_genesis.sh +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +INPUT2=${INPUT2:-$PASH_TOP/evaluation/benchmarks/nlp/input/exodus} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.3_3/} +ENTRIES=${ENTRIES:-1060} +mkdir -p $OUT + +run_tests() { + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}1.types + tr -sc '[A-Z][a-z]' '[\012*]' < ${INPUT2} | sort -u > ${OUT}/${input}2.types + sort $OUT/${input}1.types ${OUT}/${input}2.types ${OUT}/${input}2.types | uniq -c | head + +} +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/8.sh b/test/pash_tests/8.sh new file mode 100755 index 0000000..9dde970 --- /dev/null +++ b/test/pash_tests/8.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.2: find pieces captured by Belle +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l + diff --git a/test/pash_tests/8.varlog.sh b/test/pash_tests/8.varlog.sh new file mode 100755 index 0000000..4b808f3 --- /dev/null +++ b/test/pash_tests/8.varlog.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +# 1308; or line above, w/ -vE +# Doesn't do much +find /var/log -type f -exec file {} \; | grep 'text' | cut -d' ' -f1 | sed -e's/:$//g' | grep -v '[0-9]$' | xargs tail diff --git a/test/pash_tests/8_1.sh b/test/pash_tests/8_1.sh new file mode 100755 index 0000000..518e5ed --- /dev/null +++ b/test/pash_tests/8_1.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# tag: sort_words_by_num_of_syllables +# set -e + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/nlp/input/pg/} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/nlp/output/8.1/} +ENTRIES=${ENTRIES:-1060} +mkdir -p "$OUT" + +run_tests() { + cat $IN/$input | tr -sc '[A-Z][a-z]' '[\012*]' | sort -u > ${OUT}/${input}.words + tr -sc '[AEIOUaeiou\012]' ' ' < ${OUT}/${input}.words | awk '{print NF}' > ${OUT}/${input}.syl + paste ${OUT}/${input}.syl ${OUT}/${input}.words | sort -nr | sed 5q +} +export -f run_tests +for input in $(ls ${IN} | head -n ${ENTRIES}) +do + run_tests $input > ${OUT}/${input}.out +done + +echo 'done'; +rm -rf "${OUT}" diff --git a/test/pash_tests/9.sh b/test/pash_tests/9.sh new file mode 100755 index 0000000..d6d1f07 --- /dev/null +++ b/test/pash_tests/9.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export IN_PRE=${IN_PRE:-$PASH_TOP/evaluation/benchmarks/unix50/input} +IN4=$IN_PRE/4.txt +# 4.3: find pieces captured by Belle with a pawn +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep -v '[KQRBN]' | wc -l + diff --git a/test/pash_tests/add.sh b/test/pash_tests/add.sh new file mode 100755 index 0000000..d0ecaa4 --- /dev/null +++ b/test/pash_tests/add.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +paste -d+ "$@" | bc diff --git a/test/pash_tests/alt_bigrams.sh b/test/pash_tests/alt_bigrams.sh new file mode 100644 index 0000000..121abe8 --- /dev/null +++ b/test/pash_tests/alt_bigrams.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Find all 2-grams in a piece of text +# FIXME: does not calculate frequencies + +cat $IN | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + alt_bigrams_aux + diff --git a/test/pash_tests/alt_bigrams_env_test.sh b/test/pash_tests/alt_bigrams_env_test.sh new file mode 100644 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/alt_bigrams_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/alt_bigrams_funs.sh b/test/pash_tests/alt_bigrams_funs.sh new file mode 100644 index 0000000..5e9bbed --- /dev/null +++ b/test/pash_tests/alt_bigrams_funs.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +alt_bigrams_aux() +{ + s2=$(mktemp -u) + ( mkfifo $s2 > /dev/null ) ; + + tee $s2 | + tail -n +2 | + paste $s2 - | + sed '$d' | + sort | + uniq + rm $s2 +} + +alt_bigram_aux_reduce() +{ + IN1=$1 + IN2=$2 + + sort -m $IN1 $IN2 | + uniq +} + +export -f alt_bigrams_aux +export -f alt_bigram_aux_reduce diff --git a/test/pash_tests/ann-agg-2.sh b/test/pash_tests/ann-agg-2.sh new file mode 100755 index 0000000..94a604a --- /dev/null +++ b/test/pash_tests/ann-agg-2.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +## Test contains command aliases with annotations that point to custom aggregators + +FILE="$PASH_TOP/evaluation/tests/input/ab.txt" + +test_uniq_1() { + uniq +} + +test_uniq_2() { + uniq -c +} + +cat $FILE | sort | test_uniq_1 | tr 'a' 'b' | test_uniq_2 + diff --git a/test/pash_tests/ann-agg.sh b/test/pash_tests/ann-agg.sh new file mode 100644 index 0000000..b8030ba --- /dev/null +++ b/test/pash_tests/ann-agg.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +## Test contains command aliases with annotations that point to custom aggregators + +FILE="$PASH_TOP/evaluation/tests/input/1M.txt" + +test_one() { + cat +} + +test_two() { + cat +} + +cat $FILE | test_one | test_two diff --git a/test/pash_tests/archive.sh b/test/pash_tests/archive.sh new file mode 100644 index 0000000..dbfe627 --- /dev/null +++ b/test/pash_tests/archive.sh @@ -0,0 +1,6 @@ +timestamp=`date +"%Y%m%d%H%M%S"` + +tar cf cdash.tar \ + *.c *.h *.sh *.py Makefile + +cp -p cdash.tar "cdash-${timestamp}.tar" diff --git a/test/pash_tests/args_with_spaces.sh b/test/pash_tests/args_with_spaces.sh new file mode 100644 index 0000000..283fd07 --- /dev/null +++ b/test/pash_tests/args_with_spaces.sh @@ -0,0 +1,2 @@ +echo $1 +echo $2 diff --git a/test/pash_tests/autogen.sh b/test/pash_tests/autogen.sh new file mode 100755 index 0000000..bbc5667 --- /dev/null +++ b/test/pash_tests/autogen.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +libtoolize \ +&& aclocal \ +&& autoheader \ +&& automake --add-missing \ +&& autoconf diff --git a/test/pash_tests/bam_to_sam.sh b/test/pash_tests/bam_to_sam.sh new file mode 100644 index 0000000..fec09f6 --- /dev/null +++ b/test/pash_tests/bam_to_sam.sh @@ -0,0 +1,4 @@ +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/bam} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +find . -name "*.bam" | xargs -I {} samtools view -h -o ${OUTPUT} {} diff --git a/test/pash_tests/bell_grep.sh b/test/pash_tests/bell_grep.sh new file mode 100755 index 0000000..031370a --- /dev/null +++ b/test/pash_tests/bell_grep.sh @@ -0,0 +1,54 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +batchSize=10000000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +# mkfifo $file7 +# mkfifo $file8 +# mkfifo $file9 + + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + + +$PASH_TOP/runtime/dgsh-tee -I -i $file1 -o $file5 -b 10M & +$PASH_TOP/runtime/dgsh-tee -I -i $file2 -o $file6 -b 10M & + +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file5 > $file3 & +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file6 > $file4 & +# ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +$PASH_TOP/runtime/r_merge $file3 $file4 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out diff --git a/test/pash_tests/bi-gram.aux.sh b/test/pash_tests/bi-gram.aux.sh new file mode 100755 index 0000000..5f66058 --- /dev/null +++ b/test/pash_tests/bi-gram.aux.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Auxiliary functions for bi-grams + +bigrams_aux() +{ + s2=$(mktemp -u) + mkfifo $s2 + tee $s2 | + tail -n +2 | + paste $s2 - | + sed '$d' + rm $s2 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + ## New way of doing it using an intermediate file. This is slow + ## but doesn't deadlock + cat $IN > $temp + + sed '$d' $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail -n +2 | paste $aux3 - > $OUT & + + # ## Old way of doing it + # cat $IN | + # tee $s2 $aux1 $aux2 | + # tail -n +2 | + # paste $s2 - > $OUT & + + # ## The goal of this is to write the first line of $IN in the $AUX_HEAD + # ## stream and the last line of $IN in $AUX_TAIL + + # cat $aux1 | ( head -n 1 > $AUX_HEAD; $PASH_TOP/evaluation/tools/drain_stream.sh ) & + # # while IFS= read -r line + # # do + # # old_line=$line + # # done < $aux2 + # # echo "$old_line" > $AUX_TAIL + # ( tail -n 1 $aux2 > $AUX_TAIL; $PASH_TOP/evaluation/tools/drain_stream.sh ) & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + +export -f bigrams_aux +export -f bigram_aux_map +export -f bigram_aux_reduce diff --git a/test/pash_tests/bi-grams.sh b/test/pash_tests/bi-grams.sh new file mode 100755 index 0000000..c98f583 --- /dev/null +++ b/test/pash_tests/bi-grams.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Find all 2-grams in a piece of text + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +. bi-gram.aux.sh + +cat $IN | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq + + diff --git a/test/pash_tests/bigrams.sh b/test/pash_tests/bigrams.sh new file mode 100755 index 0000000..a3990e1 --- /dev/null +++ b/test/pash_tests/bigrams.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +bigrams_aux() +{ + s2=$(mktemp -u) + mkfifo $s2 + tee $s2 | + tail -n +2 | + paste $s2 - | + sed "\$d" + rm $s2 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + cat $IN > $temp + + sed "\$d" $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail -n +2 | paste $aux3 - > $OUT & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + +export -f bigrams_aux +export -f bigram_aux_map +export -f bigram_aux_reduce + +cat $IN | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq + + diff --git a/test/pash_tests/bigrams_aux_map.sh b/test/pash_tests/bigrams_aux_map.sh new file mode 100755 index 0000000..bead44f --- /dev/null +++ b/test/pash_tests/bigrams_aux_map.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +## By making tee | tail | paste its own function, we can implement it +## as a pure command separated into a generalized map and a +## reduce. Following the ParSynt work, a generalized map also keeps +## some auxiliary variables (in our case streams) to enable parallelization. + +## +## Map +## + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + + mkfifo "$s2" + mkfifo "$aux1" + mkfifo "$aux2" + cat "$IN" | + tee "$s2" "$aux1" "$aux2" | + tail +2 | + paste "$s2" - > "$OUT" & + + ## The goal of this is to write the first line of $IN in the $AUX_HEAD + ## stream and the last line of $IN in $AUX_TAIL + + ## TODO: I am not sure if using head/tail like this works or breaks + ## the pipes + cat "$aux1" | ( head -n 1 > "$AUX_HEAD"; dd of=/dev/null > /dev/null 2>&1 ) & + tail -n 1 "$aux2" > "$AUX_TAIL" & + + wait + + rm "$s2" + rm "$aux1" + rm "$aux2" +} + +## +## Reduce: +## + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo "$temp" + + cat "$AUX_HEAD1" > "$AUX_HEAD_OUT" & + cat "$AUX_TAIL2" > "$AUX_TAIL_OUT" & + paste "$AUX_TAIL1" "$AUX_HEAD2" > "$temp" & + cat "$IN1" "$temp" "$IN2" > "$OUT" & + + wait + + rm "$temp" +} + +##VTODO: Deplete the aux outputs of the last reduce diff --git a/test/pash_tests/bigrams_env_test.sh b/test/pash_tests/bigrams_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/bigrams_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/bio4.sh b/test/pash_tests/bio4.sh new file mode 100755 index 0000000..9b3413b --- /dev/null +++ b/test/pash_tests/bio4.sh @@ -0,0 +1,30 @@ +# create bam files with regions +################### 1KG SAMPLES +IN=${INPUT:-$PASH_TOP/evaluation/benchmarks/bio} +IN_NAME=${IN_N:-input_all.txt} +OUT=${OUTPUT:-$PASH_TOP/evaluation/benchmarks/bio/output} +cat ${IN}/${IN_NAME}|while read s_line; + do + sample=$(echo $s_line |cut -d " " -f 2); + pop=$(echo $s_line |cut -f 1 -d " "); + link=$(echo $s_line |cut -f 3 -d " "); + ### correcting labeling of chromosomes so that all are 1,2,3.. instead of chr1,chr2 or chromosome1 etc + echo 'Processing Sample '${IN}/input/$sample' '; + # uniform the chromosomes in the file due to inconsistencies + samtools view -H "${IN}/input/$sample".bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' \ + | samtools reheader - "${IN}/input/$sample".bam > "${OUT}/$sample"_corrected.bam ; + # create bai file + samtools index -b "${OUT}/$sample"_corrected.bam ; + ### Isolating each relevant chromosome based on Gen_locs + cut -f 2 ./Gene_locs.txt |sort |uniq |while read chr; + do + echo 'Isolating Chromosome '$chr' from sample '${OUT}/$sample', '; + samtools view -b "${OUT}/$sample"_corrected.bam chr"$chr" > "${OUT}/$pop"_"$sample"_"$chr".bam ; + echo 'Indexing Sample '$pop'_'${OUT}/$sample' '; + samtools index -b "${OUT}/$pop"_"$sample"_"$chr".bam; + #sleep 2 + done; + #rm "${OUT}/$sample"_corrected.bam; + #rm "${OUT}/$sample"_corrected.bam.bai; + #rm "${OUT}/$sample".bam +done; diff --git a/test/pash_tests/braces_amp.sh b/test/pash_tests/braces_amp.sh new file mode 100644 index 0000000..fc82664 --- /dev/null +++ b/test/pash_tests/braces_amp.sh @@ -0,0 +1,5 @@ +for x in foo; do a & b & c & d & done +echo a & echo b +for y in foo; do a & b & done +while false; do a & b & done +until true; do forever & ever & ever & done diff --git a/test/pash_tests/buggy_non_newline_input.sh b/test/pash_tests/buggy_non_newline_input.sh new file mode 100755 index 0000000..a97b6bc --- /dev/null +++ b/test/pash_tests/buggy_non_newline_input.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +## No newline before EOF bug +echo -n "popo" > /tmp/in +IN=/tmp/in + +cat $IN $IN | grep "popopopo" > /tmp/seq.out + +rm -f s1 s2 +mkfifo s1 s2 + +cat $IN | grep "popopopo" > s1 & +cat $IN | grep "popopopo" > s2 & +cat s1 s2 > /tmp/buggy.out + +rm -f s1 s2 + +diff /tmp/seq.out /tmp/buggy.out \ No newline at end of file diff --git a/test/pash_tests/call_distrib_planner_example.sh b/test/pash_tests/call_distrib_planner_example.sh new file mode 100644 index 0000000..694bc60 --- /dev/null +++ b/test/pash_tests/call_distrib_planner_example.sh @@ -0,0 +1 @@ +python3 distr_plan.py "/tmp/dish_temp_ir_file0" diff --git a/test/pash_tests/cat-redir-fail.sh b/test/pash_tests/cat-redir-fail.sh new file mode 100644 index 0000000..fc6116b --- /dev/null +++ b/test/pash_tests/cat-redir-fail.sh @@ -0,0 +1 @@ +cat < no.such.file diff --git a/test/pash_tests/cat_output_files.sh b/test/pash_tests/cat_output_files.sh new file mode 100755 index 0000000..506ab1a --- /dev/null +++ b/test/pash_tests/cat_output_files.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +distr_output_dir=$1 + +cat "$distr_output_dir"/* diff --git a/test/pash_tests/circus.sh b/test/pash_tests/circus.sh new file mode 100755 index 0000000..30eed07 --- /dev/null +++ b/test/pash_tests/circus.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# https://unix.stackexchange.com/questions/193441/how-can-i-implement-a-circular-flow-of-data-among-interconnected-commands + +echo 1 >file + +rm s1 +mkfifo s1 +tail -f file | + sed -u 's/^/1 + /' | + tee -a s1 > /dev/null & + +cat s1 | + xargs -0 -n 1 -d '\n' expr | + tee -a file diff --git a/test/pash_tests/clone_compress_repo.sh b/test/pash_tests/clone_compress_repo.sh new file mode 100755 index 0000000..ecbfa9d --- /dev/null +++ b/test/pash_tests/clone_compress_repo.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +OUTPUT=${1:="pash.tar.gz"} +BRANCH=${2:="master"} + +# TODO: Make the temp_repo_dir be variable and random named + +mkdir temp_repo_dir +cd temp_repo_dir +git clone --recursive git@github.com:andromeda/pash.git +cd pash +git checkout $BRANCH +cd ../ +tar -czf ../$OUTPUT pash +cd ../ +rm -rf temp_repo_dir diff --git a/test/pash_tests/cmd_sbst.sh b/test/pash_tests/cmd_sbst.sh new file mode 100644 index 0000000..e3d8234 --- /dev/null +++ b/test/pash_tests/cmd_sbst.sh @@ -0,0 +1,6 @@ +echo $(Testvar=set + unset Testvar + echo $Testvar${Testvar-sh_352.10}${Testvar+set} + ) +x=$(set one two three; echo sh_352.11 $1 $2 $3 $# $* "$@"); echo "$x" +x=$(set one "twoA twoB"; echo sh_352.12 $1 "$2" $3 $# $* "$@"); echo "$x" \ No newline at end of file diff --git a/test/pash_tests/cmd_sbst_subscript.sh b/test/pash_tests/cmd_sbst_subscript.sh new file mode 100644 index 0000000..f78ef72 --- /dev/null +++ b/test/pash_tests/cmd_sbst_subscript.sh @@ -0,0 +1,2 @@ +echo 'eval echo $?' +exit 123 \ No newline at end of file diff --git a/test/pash_tests/comm-par-test.sh b/test/pash_tests/comm-par-test.sh new file mode 100644 index 0000000..6ec878b --- /dev/null +++ b/test/pash_tests/comm-par-test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Tests the parallelization of comm with a configuration input and a stream input. +mkfifo s1 s2 + +cat $IN > s1 & +cat $IN | grep "king" > s2 & +comm -23 s1 s2 + +rm s1 s2 diff --git a/test/pash_tests/comm-par-test2.sh b/test/pash_tests/comm-par-test2.sh new file mode 100644 index 0000000..6481251 --- /dev/null +++ b/test/pash_tests/comm-par-test2.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Tests the parallelization of comm with a configuration input and a stream input. +mkfifo s1 s2 + +cat $IN > s1 & +cat $IN | grep "king" > s2 & +comm -23 - s2 < s1 + +rm s1 s2 diff --git a/test/pash_tests/comm-par-test2_env_test.sh b/test/pash_tests/comm-par-test2_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/comm-par-test2_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/comm-par-test_env_test.sh b/test/pash_tests/comm-par-test_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/comm-par-test_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/compile.sh b/test/pash_tests/compile.sh new file mode 100755 index 0000000..e53312a --- /dev/null +++ b/test/pash_tests/compile.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Find markdown files in the current directory tree, compile them to HTML, and +# serve them over the network + +# Requires: pandoc + +IN=./input/ +OUT=./output/out.txt + +find $IN -name '*.md' | # Parallelizable, given a distributed FS + xargs pandoc | # xargs is higher-order, pandoc is third-party + gzip > $OUT # Compress the result +# nc -l 80 # netcat could default-but-configurably parallelizable + + diff --git a/test/pash_tests/compress_files.sh b/test/pash_tests/compress_files.sh new file mode 100755 index 0000000..e3b178c --- /dev/null +++ b/test/pash_tests/compress_files.sh @@ -0,0 +1,6 @@ +# compress all the files in a directory using dd and tar +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/rtf} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +cd $INPUT +# get all rtf and compress them +find . -name "*.rtf" | xargs -P16 -I {} sh -c "dd if={} bs=1 status=none > '{}f'; tar -zcf {}.tar.gz {}f; rm {}f; mv {}.tar.gz $OUTPUT" sh {} diff --git a/test/pash_tests/concat.sh b/test/pash_tests/concat.sh new file mode 100755 index 0000000..3c0bf6d --- /dev/null +++ b/test/pash_tests/concat.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +cat "$@" diff --git a/test/pash_tests/convert_to_fast.sh b/test/pash_tests/convert_to_fast.sh new file mode 100644 index 0000000..e66a954 --- /dev/null +++ b/test/pash_tests/convert_to_fast.sh @@ -0,0 +1,6 @@ +# convert fastq to fasta format +# It recognizes the extension .fasta and it converts the input to fasta.gz format +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -o ${OUTPUT}/{}.fasta.gz {} diff --git a/test/pash_tests/count.sh b/test/pash_tests/count.sh new file mode 100755 index 0000000..d1d0ef4 --- /dev/null +++ b/test/pash_tests/count.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +awk '{ count[$2] += $1 } END { for(e in count) print count[e], e }' "$@" diff --git a/test/pash_tests/count_packets.sh b/test/pash_tests/count_packets.sh new file mode 100644 index 0000000..7cba598 --- /dev/null +++ b/test/pash_tests/count_packets.sh @@ -0,0 +1,3 @@ +# count the packet number in a pcap file +INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} +tcpdump -nn -r ${INPUT} | wc -l diff --git a/test/pash_tests/deadlock_test.sh b/test/pash_tests/deadlock_test.sh new file mode 100644 index 0000000..edbc09b --- /dev/null +++ b/test/pash_tests/deadlock_test.sh @@ -0,0 +1 @@ +cat $IN | tr A-Z a-z | head -n 1 diff --git a/test/pash_tests/deadlock_test_env_test.sh b/test/pash_tests/deadlock_test_env_test.sh new file mode 100644 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/deadlock_test_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/demo-spell.sh b/test/pash_tests/demo-spell.sh new file mode 100755 index 0000000..9883c82 --- /dev/null +++ b/test/pash_tests/demo-spell.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +cd "$(dirname $0)" + +[ -z $PASH_TOP ] && { + echo "PASH_TOP not set, maybe $(git rev-parse --show-toplevel)?" + exit +} +FILE="input/100M.txt" +DICT="input/sorted_words" + +cat "$FILE" | tr A-Z a-z | tr -cs A-Za-z '\n' | sort | uniq | comm -13 $DICT - diff --git a/test/pash_tests/dfs_split_reader.sh b/test/pash_tests/dfs_split_reader.sh new file mode 100755 index 0000000..339e948 --- /dev/null +++ b/test/pash_tests/dfs_split_reader.sh @@ -0,0 +1 @@ +"$PASH_TOP/runtime/dspash/file_reader/dfs_split_reader" --config "$@" diff --git a/test/pash_tests/dgsh-raw-sort.sh b/test/pash_tests/dgsh-raw-sort.sh new file mode 100755 index 0000000..4fd349c --- /dev/null +++ b/test/pash_tests/dgsh-raw-sort.sh @@ -0,0 +1,49 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file5 & +$PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file6 & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/dgsh-sort.sh b/test/pash_tests/dgsh-sort.sh new file mode 100755 index 0000000..984fb71 --- /dev/null +++ b/test/pash_tests/dgsh-sort.sh @@ -0,0 +1,52 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file3 -b 64K & +$PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file4 -b 64K & + +$PASH_TOP/runtime/r_unwrap < $file3 > $file5 & +$PASH_TOP/runtime/r_unwrap < $file4 > $file6 & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/dgsh-wc.sh b/test/pash_tests/dgsh-wc.sh new file mode 100755 index 0000000..dbfc6f5 --- /dev/null +++ b/test/pash_tests/dgsh-wc.sh @@ -0,0 +1,45 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +testFile=$PASH_TOP/evaluation/scripts/input/1G.txt +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/dgsh-tee -I -f -i $file1 -o $file3 -b 10M & +$PASH_TOP/runtime/dgsh-tee -I -f -i $file2 -o $file4 -b 10M & +# $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & +# $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & + +wc $file3 > $file5 & +wc $file4 > $file6 & + +./merge-wc.sh $file5 $file6 + + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/diff.sh b/test/pash_tests/diff.sh new file mode 100644 index 0000000..27caa33 --- /dev/null +++ b/test/pash_tests/diff.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Compares two streams element by element +# Taken from https://crashingdaily.wordpress.com/2008/03/06/diff-two-stdout-streams/ +# shuf() { awk 'BEGIN {srand(); OFMT="%.17f"} {print rand(), $0}' "$@" | sort -k1,1n | cut -d ' ' -f2-; } + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +mkfifo s1 s2 + +cat $IN | + # shuf | + tr [:lower:] [:upper:] | + sort > s1 & + +cat $IN | + # shuf | + tr [:upper:] [:lower:] | + sort > s2 & + +diff -B s1 s2 +rm s1 s2 diff --git a/test/pash_tests/diff_env_test.sh b/test/pash_tests/diff_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/diff_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/distributed.sh b/test/pash_tests/distributed.sh new file mode 100755 index 0000000..a57ebe6 --- /dev/null +++ b/test/pash_tests/distributed.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Schematic for distributing computations +# To try server client: + +./client.js local 'ls' +./client.js local | jq . +./client.js local 'sleep 5; echo "yo"' +./client.js local | jq . +sleep 5 +./client.js local | jq . + +# The format is as follows: +# cat FIFOs > $OUT & +# nc -l -p STAR_RESULT_PORT > FIFO & +# ./client.js WORKER 'nc -l WORKER_DATA_PORT | PROGRAM | nc -C 158.130.4.212 STAR_RESULT_PORT' +# cat $IN | nc -N WORKER WORKER_DATA_PORT + + +cat fifo5555 fifo5556 > "$OUT" +nc -l -p 5555 > fifo5555 & +nc -l -p 5556 > fifo5556 & +# beta runs: `nc -l 5000 | grep -v "onetwo" | tr '[:lower:]' '[:upper:]' | nc -C 158.130.4.212 5555` +./client.js beta 'nc -l 5000 | grep -v "onetwo" | tr "[:lower:]" "[:upper:]" | nc -C 158.130.4.212 5555' +# gamma runs: `nc -l 5000 | grep -v "onetwo" | tr '[:lower:]' '[:upper:]' | nc -C 158.130.4.212 5555` +./client.js gamma 'nc -l 5000 | grep -v "onetwo" | tr "[:lower:]" "[:upper:]" | nc -C 158.130.4.212 5556' +cat "$IN" | nc -N beta.ndr.md 5000 +cat "$IN" | nc -N gamma.ndr.md 5000 + + +# Collect results +# Implement: `socat` can listen for multiple connections +nc -l -p 5555 > s1 & +nc -l -p 5556 > s2 & +nc -l -p 5557 > r3 & +nc -l -p 5558 > r4 & + +# Things are complicated by the fact that machines default to different +# versions of the BSD netcat (not sure why Debian and Ubuntu default to +# the BSD version of `nc`) +# -N stops after EOF + +# Receiver should run +nc -l -p 5000 | tr '[:lower:]' '[:upper:]' | nc "$DSTAR" 5555 + +# Distribute load +cat ./a/b | tr 'x' 'x' | nc "$B" 5000 +cat ./a/b | tr 'x' 'x' | nc "$C" 5000 +cat ./a/b | tr 'x' 'x' | nc "$D" 5000 diff --git a/test/pash_tests/distro-deps.sh b/test/pash_tests/distro-deps.sh new file mode 100755 index 0000000..65e1cb4 --- /dev/null +++ b/test/pash_tests/distro-deps.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +set -e +cd $(dirname $0) +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +. "$PASH_TOP/scripts/utils.sh" + +if [[ $(uname) == 'Darwin' ]]; then + echo 'Currently pash can run only on Linux' + exit 1 +fi + +read_cmd_args $@ +cd $PASH_TOP + +LOG_DIR=install_logs +mkdir -p $LOG_DIR + +# if we aren't running in docker, use sudo to install packages +if [ ! -f /.dockerenv ]; then + export SUDO="sudo" +fi + +if type lsb_release >/dev/null 2>&1 ; then + distro=$(lsb_release -i -s) +elif [ -e /etc/os-release ] ; then + distro=$(awk -F= '$1 == "ID" {print $2}' /etc/os-release) +fi + +# convert to lowercase +distro=$(printf '%s\n' "$distro" | LC_ALL=C tr '[:upper:]' '[:lower:]') +# compile the list of the shared required packages +pkgs="automake bc curl gcc git graphviz libtool m4 python sudo wget" +# now do different things depending on distro +case "$distro" in + ubuntu*) + pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config python3 python3-pip python3-setuptools python3-testresources wamerican-insane" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Running preparation apt install:" + echo "|-- running apt update..." + $SUDO apt-get update &> $LOG_DIR/apt_update.log + echo "|-- running apt install..." + $SUDO apt-get install -y $pkgs &>> $LOG_DIR/apt_install.log + if [[ "$optimized_agg_flag" == 1 ]]; then + echo "|-- installing g++-10..." + $SUDO apt-get install software-properties-common -y &> $LOG_DIR/apt_install.log + $SUDO add-apt-repository ppa:ubuntu-toolchain-r/test -y &> $LOG_DIR/apt_install.log + $SUDO apt-get install g++-10 -y &> $LOG_DIR/apt_install.log + $SUDO update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100 &> $LOG_DIR/apt_install.log + $SUDO update-alternatives --set g++ /usr/bin/g++-10 &> $LOG_DIR/apt_install.log + fi + ;; + debian*) + pkgs="$pkgs bsdmainutils libffi-dev locales locales-all netcat-openbsd pkg-config procps python3 python3-pip python3-setuptools python3-testresources wamerican-insane" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Running preparation apt install:" + echo "|-- running apt update..." + $SUDO apt-get update &> $LOG_DIR/apt_update.log + echo "|-- running apt install..." + $SUDO apt-get install -y $pkgs &> $LOG_DIR/apt_install.log + ;; + fedora*) + pkgs="$pkgs autoconf diffutils gcc-c++ glibc-langpack-en hostname libjpeg-devel make nc pip procps python-devel python3-pip python3-setuptools python3-setuptools python3-testresources zlib-devel" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "|-- running dnf install...." + $SUDO dnf install -y $pkgs &> $LOG_DIR/dnf_install.log + ;; + arch*) + pkgs="$pkgs autoconf inetutils libffi make openbsd-netcat pkg-config python-pip" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Updating mirrors" + $SUDO pacman -Sy &> $LOG_DIR/pacman_update.log + echo "|-- running pacman install...." + yes | $SUDO pacman -S $pkgs &> $LOG_DIR/pacman_install.log + ;; + freebsd*) + pkgs="$pkgs autoconf gmake gsed libffi py38-pip" + if [[ "$show_deps" == 1 ]]; then + echo "$pkgs" | sort + exit 0 + fi + echo "Updating mirrors" + $SUDO pkg update &> $LOG_DIR/pkg_update.log + echo "|-- running pkg install...." + # TODO add python3-testresources dep + yes | $SUDO pkg install $pkgs + ;; + *) echo "unknown distro: '$distro'" ; exit 1 ;; +esac diff --git a/test/pash_tests/distrotest.sh b/test/pash_tests/distrotest.sh new file mode 100644 index 0000000..5f0440a --- /dev/null +++ b/test/pash_tests/distrotest.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# This script runs 'buildtest' on each of several distros +# via Docker. +set -o pipefail + +exec 3>&1 4>&2 + +[ -e "${SHELLCHECK_DIR}/ShellCheck.cabal" ] || die "ShellCheck.cabal not in this dir" + +# [ "$1" = "--run" ] || { +# cat << EOF +# This script pulls multiple distros via Docker and compiles +# ShellCheck and dependencies for each one. It takes hours, +# and is still highly experimental. + +# Make sure you're plugged in and have screen/tmux in place, +# then re-run with $0 --run to continue. + +# Also note that dist* will be deleted. +# EOF +# exit 0 +# } + +echo "Deleting 'dist' and 'dist-newstyle'..." +rm -rf dist dist-newstyle + +log=$(mktemp) || die "Can't create temp file" +date >> "$log" || die "Can't write to log" + +echo "Logging to $log" >&3 +## If I keep this on, the script output (together with Dish output) is +## redirected +# exec >> "$log" 2>&1 + +final=0 + +cat $IN | distrotest_loop + +# distrotest_loop << EOF +# # Docker tag Setup command +# debian:stable apt-get update && apt-get install -y cabal-install +# debian:testing apt-get update && apt-get install -y cabal-install +# ubuntu:latest apt-get update && apt-get install -y cabal-install +# haskell:latest true +# opensuse/leap:latest zypper install -y cabal-install ghc +# fedora:latest dnf install -y cabal-install ghc-template-haskell-devel findutils +# archlinux/base:latest pacman -S -y --noconfirm cabal-install ghc-static base-devel + +# # Other versions we want to support +# ubuntu:18.04 apt-get update && apt-get install -y cabal-install + +# # Misc Haskell including current and latest Stack build +# ubuntu:18.04 set -e; apt-get update && apt-get install -y curl && curl -sSL https://get.haskellstack.org/ | sh -s - -f && cd /mnt && exec test/stacktest +# EOF + +exit "$final" diff --git a/test/pash_tests/distrotest_env.sh b/test/pash_tests/distrotest_env.sh new file mode 100644 index 0000000..098b025 --- /dev/null +++ b/test/pash_tests/distrotest_env.sh @@ -0,0 +1,2 @@ +SHELLCHECK_DIR=/home/nikos/shellcheck/ +IN=../evaluation/usecases/shellcheck/temp_input.txt diff --git a/test/pash_tests/distrotest_funs.sh b/test/pash_tests/distrotest_funs.sh new file mode 100644 index 0000000..2849273 --- /dev/null +++ b/test/pash_tests/distrotest_funs.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +die() +{ + echo "$*" >&4; + exit 1; +} + +distrotest_loop() +{ + while read -r distro setup + do + [[ "$distro" = "#"* || -z "$distro" ]] && continue + + printf '%s ' "$distro" # >&3 + docker pull "$distro" || die "Can't pull $distro" + printf 'pulled. ' # >&3 + + tmp=$(mktemp -d) || die "Can't make temp dir" + cp -r "${SHELLCHECK_DIR}" "$tmp/" || die "Can't populate test dir" + printf 'Result: ' # >&3 + < /dev/null docker run -v "$tmp:/mnt" "$distro" sh -c " + $setup + cd /mnt/shellcheck || exit 1 + test/buildtest + " + ret=$? + if [ "$ret" = 0 ] + then + echo "OK" # >&3 + else + echo "FAIL with $ret. See $log" # >&3 + final=1 + fi + rm -rf "$tmp" + done +} + +export -f die +export -f distrotest_loop diff --git a/test/pash_tests/diverge.sh b/test/pash_tests/diverge.sh new file mode 100755 index 0000000..1cf8ce5 --- /dev/null +++ b/test/pash_tests/diverge.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +while true; do true; done diff --git a/test/pash_tests/double_sort.sh b/test/pash_tests/double_sort.sh new file mode 100644 index 0000000..16b4c81 --- /dev/null +++ b/test/pash_tests/double_sort.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cat $IN | tr A-Z a-z | sort | sort -r diff --git a/test/pash_tests/double_sort_env_test.sh b/test/pash_tests/double_sort_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/double_sort_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/drain_stream.sh b/test/pash_tests/drain_stream.sh new file mode 100755 index 0000000..84c05ae --- /dev/null +++ b/test/pash_tests/drain_stream.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +## This command drains a stream. It is used if we want a prefix of a +## stream that was written by tee. Since tee writes in both streams +## "almost" in lockstep, if we get a prefix on one side, the other +## side cannot progress. +dd of=/dev/null > /dev/null 2>&1 diff --git a/test/pash_tests/eager-no-task-par.sh b/test/pash_tests/eager-no-task-par.sh new file mode 100755 index 0000000..9c8858a --- /dev/null +++ b/test/pash_tests/eager-no-task-par.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# $input="${1}" +# $output="${2}" +# $temp="${3}" + +touch "$3" + +cat "$1" > "$3" +cat "$3" > "$2" +rm "$3" diff --git a/test/pash_tests/eager.sh b/test/pash_tests/eager.sh new file mode 100755 index 0000000..d1a3772 --- /dev/null +++ b/test/pash_tests/eager.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +input=${1?"ERROR: Eager: No input file given"} +output=${2?"ERROR: Eager: No output file given"} +intermediate_file=${3?"ERROR: Eager: No intermediate file given"} + +# Set a default DISH_TOP in this directory if it doesn't exist +PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +# TODO: Doable check if this is still needed. Turned off for distributed exection. +# PR https://github.com/binpash/pash/pull/495 might've resolved it. +# cleanup() +# { +# kill -SIGPIPE $eager_pid > /dev/null 2>&1 +# } +# trap cleanup EXIT + +# $PASH_TOP/runtime/eager "$input" "$output" "$intermediate_file" & +# eager_pid=$! +# wait $eager_pid +"$PASH_TOP"/runtime/eager "$input" "$output" "$intermediate_file" +rm "$intermediate_file" diff --git a/test/pash_tests/eager_test.sh b/test/pash_tests/eager_test.sh new file mode 100755 index 0000000..f62b732 --- /dev/null +++ b/test/pash_tests/eager_test.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +mkfifo s1 s2 + +# IN=test_in.txt +IN=../scripts/input/1G.txt + +cat "$IN" > s1 & +cat s2 > test_out.txt & +./eager s1 s2 intermediate & + +wait + +rm s1 s2 + +# diff -s $IN test_out.txt diff --git a/test/pash_tests/echo_args.sh b/test/pash_tests/echo_args.sh new file mode 100644 index 0000000..5938c62 --- /dev/null +++ b/test/pash_tests/echo_args.sh @@ -0,0 +1,2 @@ +echo "$# $@" +echo $0 diff --git a/test/pash_tests/encrypt_files.sh b/test/pash_tests/encrypt_files.sh new file mode 100755 index 0000000..a2b123c --- /dev/null +++ b/test/pash_tests/encrypt_files.sh @@ -0,0 +1,5 @@ +# compress and encrypt all files in a directory +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/rtf} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +cd $INPUT +find . -name "*.rtf" | xargs -I {} sh -c "tar -czf - {} | openssl enc -e -pbkdf2 -out {}.enc; mv {}.enc $OUTPUT" sh {} diff --git a/test/pash_tests/escape-madness.sh b/test/pash_tests/escape-madness.sh new file mode 100644 index 0000000..80cf253 --- /dev/null +++ b/test/pash_tests/escape-madness.sh @@ -0,0 +1,3 @@ +echo "$(echo *)" +echo "$(echo "*")" +echo "${unset-*}" diff --git a/test/pash_tests/exec-redirections.sh b/test/pash_tests/exec-redirections.sh new file mode 100644 index 0000000..114e89c --- /dev/null +++ b/test/pash_tests/exec-redirections.sh @@ -0,0 +1,3 @@ +exec < exec-redirections.in > exec-redirections.out 2> exec-redirections.err +touch +cat diff --git a/test/pash_tests/execute_unix_benchmarks.sh b/test/pash_tests/execute_unix_benchmarks.sh new file mode 100755 index 0000000..e76b7db --- /dev/null +++ b/test/pash_tests/execute_unix_benchmarks.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +## Necessary to set PASH_TOP +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +## This sets up to what extent we run the evaluation. +## There are 2 levels: +## 1. Small inputs (1GB) | --width 4 +## 2. Big inputs (10GB) | --width 16 (EuroSys evaluation) +evaluation_level=1 + +while getopts 'slh' opt; do + case $opt in + s) evaluation_level=1 ;; + l) evaluation_level=2 ;; + h) echo "There are two possible execution levels:" + echo "option -s: Small inputs (1GB) | --width 4" + echo "option -l: Big inputs (10GB) | --width 16 (EuroSys evaluation)" + exit 0 ;; + *) echo 'Error in command line parsing' >&2 + exit 1 + esac +done +shift "$(( OPTIND - 1 ))" + + +unix50_dir="$PASH_TOP/evaluation/unix50/" +unix50_intermediary="${unix50_dir}/intermediary/" +intermediary_dir="$PASH_TOP/evaluation/intermediary/" +results_subdir_prefix="unix50" + +if [ "$evaluation_level" -eq 1 ]; then + echo "Executing Unix50 scripts with 1GB inputs and --width 4" + maximum_input_size="$((1024 * 1024 * 1024))" # 1 GB + n_in=4 +elif [ "$evaluation_level" -eq 2 ]; then + echo "Executing Unix50 scripts with 10GB inputs and --width 16" + maximum_input_size="$((10 * 1024 * 1024 * 1024))" # 10 GB + n_in=16 +else + echo "Unrecognizable execution level: $evaluation_level" + exit 1 +fi + +results_subdir="${results_subdir_prefix}_${n_in}_${maximum_input_size}" + +rm -r $unix50_intermediary +mkdir -p $unix50_intermediary +mkdir -p $intermediary_dir +mkdir -p "$PASH_TOP/evaluation/results/${results_subdir}/" + +## Make inputs larger and generate scripts and their envs +python3 generate_unix50_scripts.py $unix50_dir $unix50_intermediary $maximum_input_size + +for unix50_pipeline in $(ls ${unix50_intermediary} | grep -v "_env" | cut -f 1 -d '.' | sort); do + echo $unix50_pipeline + + echo "Generating input and intermediary scripts... be patient..." + python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \ + $unix50_intermediary $unix50_pipeline $n_in $intermediary_dir + + echo "Executing script with bash and pash..." + "$PASH_TOP/evaluation/execute_compile_evaluation_script.sh" -s -a "${unix50_pipeline}" "${n_in}" "${results_subdir}" > /dev/null 2>&1 + rm -f /tmp/eager* +done diff --git a/test/pash_tests/exit_code.sh b/test/pash_tests/exit_code.sh new file mode 100644 index 0000000..898a080 --- /dev/null +++ b/test/pash_tests/exit_code.sh @@ -0,0 +1,4 @@ +if read -r && read -r +then + exit 1 +fi < log_results/failed.log diff --git a/test/pash_tests/expand-u-positional.sh b/test/pash_tests/expand-u-positional.sh new file mode 100644 index 0000000..5b6a69d --- /dev/null +++ b/test/pash_tests/expand-u-positional.sh @@ -0,0 +1,2 @@ +set -u +echo $1 diff --git a/test/pash_tests/expand-u.sh b/test/pash_tests/expand-u.sh new file mode 100644 index 0000000..f01ee88 --- /dev/null +++ b/test/pash_tests/expand-u.sh @@ -0,0 +1,3 @@ +unset foobar +set -u +echo ${foobar} \ No newline at end of file diff --git a/test/pash_tests/export_var_script.sh b/test/pash_tests/export_var_script.sh new file mode 100755 index 0000000..5e738f6 --- /dev/null +++ b/test/pash_tests/export_var_script.sh @@ -0,0 +1,2 @@ +export N=1000 +seq 1 $N | sort -rn diff --git a/test/pash_tests/for-echo.sh b/test/pash_tests/for-echo.sh new file mode 100644 index 0000000..a8aebe3 --- /dev/null +++ b/test/pash_tests/for-echo.sh @@ -0,0 +1,6 @@ +N=${N:-100} +for i in $(seq $N) +do + echo $i +done +echo "end" diff --git a/test/pash_tests/for-loop.sh b/test/pash_tests/for-loop.sh new file mode 100644 index 0000000..090539f --- /dev/null +++ b/test/pash_tests/for-loop.sh @@ -0,0 +1,8 @@ +mkdir -p temp-out + +for i in $(seq 100) +do + cat $PASH_TOP/README.md | grep pash | grep pash > temp-out/$i.out +done + +echo done diff --git a/test/pash_tests/for_loop_simple.sh b/test/pash_tests/for_loop_simple.sh new file mode 100644 index 0000000..c267cd6 --- /dev/null +++ b/test/pash_tests/for_loop_simple.sh @@ -0,0 +1,3 @@ +for _ in $times; do + cat $IN | tr A-Z a-z | sort +done diff --git a/test/pash_tests/for_loop_simple_env_test.sh b/test/pash_tests/for_loop_simple_env_test.sh new file mode 100644 index 0000000..a60ab38 --- /dev/null +++ b/test/pash_tests/for_loop_simple_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt +times="1 2 3" diff --git a/test/pash_tests/for_spaces.sh b/test/pash_tests/for_spaces.sh new file mode 100755 index 0000000..ed36e7a --- /dev/null +++ b/test/pash_tests/for_spaces.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +oldifs=$IFS +IFS=$(echo -e "\t") +for f in `ls *`; do + echo $f +done +IFS=$oldifs diff --git a/test/pash_tests/fun-def.sh b/test/pash_tests/fun-def.sh new file mode 100644 index 0000000..999ef74 --- /dev/null +++ b/test/pash_tests/fun-def.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +custom_sort() { + sort $@ +} + +custom_tr() { + tr A-Z a-z +} + +export -f custom_tr + +FILES="$PASH_TOP/evaluation/tests/input/1M.txt ../evaluation/tests/input/1M.txt" + +cat $FILES | custom_tr | custom_sort diff --git a/test/pash_tests/gen_data.sh b/test/pash_tests/gen_data.sh new file mode 100644 index 0000000..9101fd2 --- /dev/null +++ b/test/pash_tests/gen_data.sh @@ -0,0 +1,126 @@ +DATA=final.csv +rm -f $DATA +replace_string() { + sed -i -e 's/'$1'/'"$2"'/g' .tmp +} + +prepare_run_data() { + DATA_FILE=$1.data.csv + FILES=$1.log.dat + echo $1.tmp + rm -f $DATA_FILE $FILES + + # gather all results + find $1 -name "*.res" -type f > $FILES + + # read each line of the file + while read p; do + # echo "Fixing: $p" + python3 ../prep_temp.py $p > .tmp + # break + # lines=$(cat $p | wc -l) + # if [[ $lines -gt 3 ]]; then + # echo "Head1:" + # tail -n +2 $p | head + # file="$(tail -n +2 $p)" + # else + # echo "Head2:" + # tail -n +2 $p | head + # file="$(tail -n +2 $p)" + # fi + # file="$(tail -n +2 $p)" + # echo "file: $file" + # echo "$file" > .tmp + bench=$(echo $p | awk -F '/' '{print $3}') + mode=$(echo $p | awk -F'/' '{print $2}') + # echo "Bench: $bench, mode: $mode" + # if [[ $bench == max-temp ]]; then + # cat .tmp | sed -E 's/^([a-zA-Z_0-9\-]+):.*([0-9]+.[0-9]+\n)$/\1\t\2/g' #| cut -f 1 + # fi + # read the contents of each execution file + while read l; do + #l=$(echo $l | sed 's/ //g') + echo $l | grep --quiet : + res=$? + if [[ $res == 1 ]]; then + perf=$(echo $l | grep -Eo '[0-9]+.[0-9]+$') + # echo "Perf 1: $perf" + script=$(echo $l | sed -e 's/'$perf'//g') + else + # get script name and performance + # strip the .sh and get fetch the script name + script=$(echo $l | awk -F ':' '{print $1}' | sed 's/...$//') + # get the execution time + perf=$(echo $l | awk -F ':' '{print $2}') + # echo "Perf 2: $perf" + fi + echo $bench,$script,$mode,$perf | sed 's/ //g'>> $DATA_FILE + done < .tmp + done < $FILES + sort $DATA_FILE > $1.tmp + rm $DATA_FILE rm -f $FILES +} + +cd eval_results +prepare_run_data run +# merge all the results +cat run.tmp | sed -s 's/,/ /g' | awk '{print $1,$2,$3,$4}' | awk ' {print $1','$2','$3','$4}' | tr ' ' ',' > .tmp +# cleanup +replace_string dependency_untangling for-loops +replace_string nlp NLP +replace_string oneliners Classics +replace_string unix50 Unix50 +replace_string analytics-mts COVID-mts +replace_string web-index WebIndex +replace_string max-temp AvgTemp +replace_string temp-analytics AvgTemp +replace_string Genomics_Computation Genomics +replace_string Program_Inference ProgInf +replace_string pash_jit_no_prof_no_du 'pash_jit -prof -par_pipe' +replace_string pash_jit_no_prof 'pash_jit -prof' +replace_string blish pash_jit +perf='' +# calculate the ratios +while read p; do + # is this the bash entry + echo $p | grep --quiet bash + res=$? + # fetch the performance + if [[ $res == 0 ]]; then + perf=$(echo $p | awk -F ',' '{print $4}') + fi + # get the bench + bench=$(echo $p | awk -F ',' '{print $1}') + # get the script + script=$(echo $p | awk -F ',' '{print $2}') + # get the mode + mode=$(echo $p | awk -F ',' '{print $3}') + # get the time of the pash/blish configs + current_perf=$(echo $p | awk -F ',' '{print $4}') + # calculate the ratio + if [[ $res == 0 ]]; then + ratio=$perf + else + ratio=$(echo "$perf $current_perf" | awk '{print $1/$2}' ) + fi + # replace the pash/blish time with the ratio + echo $bench,$script,$mode,$ratio >> $DATA +done < .tmp +rm -f .tmp +mv $DATA .. + +# in docker container, we are running with the CI +if [ -f /.dockerenv ]; then + exit 0 +fi +cd .. +# replace all the lines that are not needed in figure5 +sed 's/for-loops,AurPkg,pash_aot,.*/for-loops,AurPkg,pash_aot,0/g' $DATA > data_final.csv +sed -i 's/for-loops,FileEnc1,pash_aot,.*/for-loops,FileEnc1,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,FileEnc2,pash_aot,.*/for-loops,FileEnc2,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,LogAnalysis1,pash_aot,.*/for-loops,LogAnalysis1,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,LogAnalysis2,pash_aot,.*/for-loops,LogAnalysis2,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,MediaConv1,pash_aot,.*/for-loops,MediaConv1,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,MediaConv2,pash_aot,.*/for-loops,MediaConv2,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,ProgInf,pash_aot,.*/for-loops,ProgInf,pash_aot,0/g' data_final.csv +sed -i 's/for-loops,Genomics,pash_aot,.*/for-loops,Genomics,pash_aot,0/g' data_final.csv diff --git a/test/pash_tests/gen_pl.sh b/test/pash_tests/gen_pl.sh new file mode 100644 index 0000000..7773f24 --- /dev/null +++ b/test/pash_tests/gen_pl.sh @@ -0,0 +1,4 @@ +# generate a playlist +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +find $1 -type f -name *.mp3 -o -name *.wav | sort > $OUTPUT/playlist.pls diff --git a/test/pash_tests/generate_single_chrom.sh b/test/pash_tests/generate_single_chrom.sh new file mode 100644 index 0000000..364c28a --- /dev/null +++ b/test/pash_tests/generate_single_chrom.sh @@ -0,0 +1,19 @@ +# Here are sample steps to generate a single paired read from hg19: +# https://www.biostars.org/p/150010/ +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +# filter out a single chromosome and index it, e.g. +samtools faidx ${INPUT}/human_g1k_v37.fasta 20 > ${OUTPUT}/human_g1k_v37_chr20.fasta +bowtie2-build ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/homo_chr20 +#simulate a single read sample, e.g. here is for a single (-N 1) paired read: +${INPUT}/wgsim/wgsim -N 1 ${OUTPUT}/human_g1k_v37_chr20.fasta ${OUTPUT}/single.read1.fq ${OUTPUT}/single.read2.fq > ${OUTPUT}/wgsim.out +#generate the sam, e.g. +bowtie2 -x ${OUTPUT}/homo_chr20 -1 ${OUTPUT}/single.read1.fq -2 ${OUTPUT}/single.read2.fq -S ${OUTPUT}/single_pair.sam +#generate a bam +samtools view -b -S -o ${OUTPUT}/single_pair.bam ${OUTPUT}/single_pair.sam +#sort and index it +samtools sort ${OUTPUT}/single_pair.bam -o ${OUTPUT}/single_pair.sorted.bam +# this seems to not affect the file, but in other cases, its indeed needed +samtools index ${OUTPUT}/single_pair.sorted.bam + diff --git a/test/pash_tests/genomics.sh b/test/pash_tests/genomics.sh new file mode 100755 index 0000000..71833c3 --- /dev/null +++ b/test/pash_tests/genomics.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# create bam files with regions +################### 1KG SAMPLES +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input} +SAMTOOLS_BIN=${IN}/deps/samtools-1.7/samtools +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/bio} +LOGS=${OUT}/logs +IN_NAME=${IN}/bio/100G.txt +GENE_LOCS=${IN}/bio/Gene_locs.txt +mkdir -p ${LOGS} +run_tests() { + s_line=$(echo $1 | tr '@' ' ') + pop=$(echo $s_line |cut -f 1 -d " "); + sample=$(echo $s_line |cut -d " " -f 2); + link=$(echo $s_line |cut -f 3 -d " "); + ### correcting labeling of chromosomes so that all are 1,2,3.. instead of chr1,chr2 or chromosome1 etc + echo 'Processing Sample '${IN}/bio/$sample' '; + # uniform the chromosomes in the file due to inconsistencies + $SAMTOOLS_BIN view -H "${IN}/bio/$sample".bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' \ + | $SAMTOOLS_BIN reheader - "${IN}/bio/$sample".bam > "${OUT}/$sample"_corrected.bam 2> /dev/null + # create bai file + $SAMTOOLS_BIN index -b "${OUT}/$sample"_corrected.bam 2> /dev/null + ### Isolating each relevant chromosome based on Gen_locs + cut -f 2 ${IN}/bio/Gene_locs.txt |sort |uniq |while read chr; + do + echo 'Isolating Chromosome '$chr' from sample '${OUT}/$sample', '; + $SAMTOOLS_BIN view -b "${OUT}/$sample"_corrected.bam chr"$chr" > "${OUT}/$pop"_"$sample"_"$chr".bam 2> /dev/null + echo 'Indexing Sample '$pop'_'${OUT}/$sample' '; + $SAMTOOLS_BIN index -b "${OUT}/$pop"_"$sample"_"$chr".bam 2> /dev/null + done; +} + +export -f run_tests +data=$(cat ${IN_NAME} | tr ' ' '@') +pkg_count=0 +for item in $data; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > "${LOGS}"/"${pkg_count}.log" +done + +echo 'done'; diff --git a/test/pash_tests/genquality.sh b/test/pash_tests/genquality.sh new file mode 100755 index 0000000..64c777f --- /dev/null +++ b/test/pash_tests/genquality.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Identify the top 10 reasons why genome assemblies don't make it into GenBank +# -- NIH's genetic sequence database, an annotated collection of all publicly +# available DNA sequences +# http://thegenomefactory.blogspot.com/2019/09/25-reasons-assemblies-dont-make-it-into.html + +# Require: csvkit +# Data: http://ndr.md/data/bio/genbank.txt + +IN=./input/genbank.txt +OUT=./output/out.txt + +cat $IN | + csvcut -t -K 1 -c 'excluded_from_refseq' | + tail -n +2 | tr ";" "\n" | + sed -e 's/^ //' -e 's/ $//' | + grep -v '""' | + sort | + uniq -c | + sort -nr | + head -n 10 | + nl > $OUT + +# More bio pipelines for +# # Strains with Complete Genome +# cat assembly_summary.tsv \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# | wc -l +# +# # Most sequenced species with Complete Genome +# cat assembly_summary.tsv \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# | csvtk cut -t -f organism_name \ +# | cut -d ' ' -f 1,2 \ +# | csvtk freq -t -n -r | head -n 20 | csvtk pretty -t +# +# # Number of species, by organism name +# +# # Filter by species (organism_name) +# cat assembly_summary.tsv \ +# | csvtk grep -t -f organism_name -i -r -p "Mycobacterium tuberculosis" \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# > mt.tsv +# +# # Filter (complete genome) by species_taxid +# cat assembly_summary.tsv \ +# | csvtk grep -t -f species_taxid -p 239935,1280 \ +# | csvtk grep -t -f assembly_level -i -p "Complete Genome" \ +# > bytaxid.tsv +# +# # Download genome sequence and annotation files +# cat mt.tsv | csvtk cut -t -f ftp_path | sed 1d \ +# | rush -v prefix='{}/{%}' \ +# ' \ +# wget -c {prefix}_genomic.fna.gz; \ +# wget -c {prefix}_genomic.gbff.gz; \ +# wget -c {prefix}_genomic.gff.gz; \ +# wget -c {prefix}_cds_from_genomic.fna.gz \ +# wget -c {prefix}_protein.faa.gz; \ +# ' \ +# -j 10 -c -C download.rush +# +# #Get GenBank assembly summary file +# wget ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/assembly_summary_genbank.txt +# +# #Get all lines that have "Mycobacter", if 12th field is "Complete Genome", print the 20th field (url to file). +# #But the actual filename ends _genomic.fna.gz so include that too.. +# grep Mycobacter assembly_summary_genbank.txt \ +# | awk 'BEGIN{FS="\t"}{if($12=="Complete Genome"){print $20}}' \ +# | awk 'BEGIN{OFS=FS="/"}{print $0,$NF"_genomic.fna.gz"}' \ +# > urls.txt +# +# #Now you can go through your urls file +# IFS=$'\n'; for NEXT in $(cat urls.txt); do wget "$NEXT"; done diff --git a/test/pash_tests/get-summary.sh b/test/pash_tests/get-summary.sh new file mode 100755 index 0000000..2d24856 --- /dev/null +++ b/test/pash_tests/get-summary.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +echo 'GNU Coreutils ('$(cat coreutils-summary.txt | wc -l | awk '{$1=$1};1') 'commands):' +echo ' S:' $(cat coreutils-summary.txt | grep ' S ' | wc -l) +echo ' P:' $(cat coreutils-summary.txt | grep ' P ' | wc -l) +echo ' N:' $(cat coreutils-summary.txt | grep ' N ' | wc -l) +echo ' E:' $(cat coreutils-summary.txt | grep ' E ' | wc -l) + +echo 'POSIX ('$(( $(cat posix-summary.txt | wc -l | awk '{$1=$1};1') + $(cat ../c_stats/posix.txt | grep -v Mandatory | wc -l) )) 'commands):' +echo ' S:' $(cat posix-summary.txt | grep ' S ' | wc -l) +echo ' P:' $(cat posix-summary.txt | grep ' P ' | wc -l) +echo ' N:' $(cat posix-summary.txt | grep ' N ' | wc -l) +echo ' E:' $(( $(cat posix-summary.txt | grep ' E ' | wc -l) + $(cat ../c_stats/posix.txt | grep -v Mandatory | wc -l) )) + diff --git a/test/pash_tests/get_hash.sh b/test/pash_tests/get_hash.sh new file mode 100644 index 0000000..1ecbf21 --- /dev/null +++ b/test/pash_tests/get_hash.sh @@ -0,0 +1,2 @@ +# calculate a hash? can we change it to calculate hashes for all the files? +head -c32 /dev/urandom | openssl dgst -sha256 -binary -hmac $(xxd -p -l32 -c32 /dev/urandom) | base64 | cut -b-32 diff --git a/test/pash_tests/get_results.sh b/test/pash_tests/get_results.sh new file mode 100755 index 0000000..591bbd9 --- /dev/null +++ b/test/pash_tests/get_results.sh @@ -0,0 +1,38 @@ +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} +rm -rf log_results +mkdir log_results + +stats() ( + test_results_dir=$1 + grep "are identical" "$test_results_dir"/result_status | + sed "s,^$PASH_TOP/,," > log_results/$2_passed.log + cat log_results/$2_passed.log >> log_results/passed.log + grep "are not identical" "$test_results_dir"/result_status | + sed "s,^$PASH_TOP/,," > log_results/$2_failed.log + # if the file has data, append it + if [ -s log_results/$2_failed.log ] + then + cat log_results/$2_failed.log >> log_results/failed.log + else + # remove since it's empty + rm log_results/$2_failed.log + fi + TOTAL_TESTS=$(cat "$test_results_dir"/result_status | wc -l) + PASSED_TESTS=$(grep "are identical" "$test_results_dir"/result_status | wc -l) + echo "$2: ${PASSED_TESTS}/${TOTAL_TESTS} tests passed." +) + +echo "Below follow the identical outputs:" > log_results/passed.log +echo "Below follow the non-identical outputs:" > log_results/failed.log +# +## intro tests +stats "$PASH_TOP/evaluation/intro/output" intro +# +## Interface Tests +stats "$PASH_TOP/evaluation/tests/interface_tests/output" interface +# +## compiler Tests +stats "${PASH_TOP}/evaluation/tests/results" compiler +# +## aggregator tests +stats "${PASH_TOP}/evaluation/tests/agg/output" agg diff --git a/test/pash_tests/get_type_count.sh b/test/pash_tests/get_type_count.sh new file mode 100644 index 0000000..54899fe --- /dev/null +++ b/test/pash_tests/get_type_count.sh @@ -0,0 +1,4 @@ +# count how many times each file type exist in a directory +INPUT=${INPUT:-$PASH_TOP/evaluation/aliases/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/aliases/output} +find $INPUT -type f | while read f; do echo ""${f##*.}""; done | sed ""/^\s*$/d"" | sort | uniq -c | sort -rn > $OUTPUT/get_type_count_res diff --git a/test/pash_tests/grab_submissions.sh b/test/pash_tests/grab_submissions.sh new file mode 100755 index 0000000..3d2370c --- /dev/null +++ b/test/pash_tests/grab_submissions.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ -d "$1" ]; then + echo "Grading directory already exists" + exit 2 +fi + +mkdir $1 +mkdir $1/submissions +cp ../dropbox/$1/* $1/submissions diff --git a/test/pash_tests/grade.sh b/test/pash_tests/grade.sh new file mode 100755 index 0000000..a0f2ad2 --- /dev/null +++ b/test/pash_tests/grade.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +score=0 +total=0 + +if [ -d output ]; then + echo "output directory already exists, aborting" + exit 1 +fi + +mkdir output + +echo "LEXER/PARSER AUTOGRADER RESULTS" +echo + +# check success cases +for i in right/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 0 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +# check failure cases +for i in wrong/*.lc; do + file=$(basename $i) + output=$(mktemp output/$file.XXXX) + + echo -n "$file: " + + ./Main $i >$output 2>&1 + if [ $? -eq 1 ] + then + let score+=1 + echo "1/1" + else + echo "0/1" + fi + + let total+=1 +done + +echo +echo "TOTAL: $score / $total" +echo +echo "PROBLEM 1: XXX / 5" +echo +let total=total+5 +echo "FINAL GRADE: $score + XXX / $total" diff --git a/test/pash_tests/grep-test.sh b/test/pash_tests/grep-test.sh new file mode 100644 index 0000000..34efeae --- /dev/null +++ b/test/pash_tests/grep-test.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +## This test contains all occurences of tr (to test the annotation) + +FILE="$PASH_TOP/evaluation/tests/input/1M.txt" + +cat $FILE | grep "the" +cat $FILE | grep -c "the" diff --git a/test/pash_tests/grep.sh b/test/pash_tests/grep.sh new file mode 100755 index 0000000..5699e84 --- /dev/null +++ b/test/pash_tests/grep.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cat $IN | grep 'the' diff --git a/test/pash_tests/grep_env_test.sh b/test/pash_tests/grep_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/grep_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/grep_f_script.sh b/test/pash_tests/grep_f_script.sh new file mode 100755 index 0000000..2a29820 --- /dev/null +++ b/test/pash_tests/grep_f_script.sh @@ -0,0 +1,28 @@ +mkfifo s1 s2 s3 s4 s5 + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +IN="$PASH_TOP/evaluation/tests/input/1M.txt" + +sorted_in="/tmp/sorted.in" + +sort $IN > $sorted_in + +echo " king" | tee s4 >s3 & +grep -vx -f s3 - > s1 < $sorted_in & +grep -vx -f s4 - > s2 < $sorted_in & +## The eager is essential here or after tee to ensure non-deadlocks +{ "$PASH_TOP/runtime/eager.sh" s2 s5 "/tmp/eager_intermediate_#file1" & } +cat s1 s5 > grep-f.out + +echo " king" | tee s4 >s3 & +comm -13 s3 - > s1 < $sorted_in & +comm -13 s4 - > s2 < $sorted_in & +## The eager is essential here or after tee to ensure non-deadlocks +{ "$PASH_TOP/runtime/eager.sh" s2 s5 "/tmp/eager_intermediate_#file1" & } +cat s1 s5 > comm.out + +rm s1 s2 s3 s4 s5 + +diff grep-f.out comm.out + diff --git a/test/pash_tests/head.sh b/test/pash_tests/head.sh new file mode 100755 index 0000000..6f217eb --- /dev/null +++ b/test/pash_tests/head.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# FIXME missing head parameters + +cat "${1}" diff --git a/test/pash_tests/head_deadlock.sh b/test/pash_tests/head_deadlock.sh new file mode 100755 index 0000000..a42fe20 --- /dev/null +++ b/test/pash_tests/head_deadlock.sh @@ -0,0 +1,9 @@ +mkfifo s1 s2 + +cat ../evaluation/scripts/input/1M.txt > s1 & +cat ../evaluation/scripts/input/1M.txt > s2 & +cat s1 s2 | head -n 1 & + +wait + +rm s1 s2 diff --git a/test/pash_tests/head_deadlock_fixed.sh b/test/pash_tests/head_deadlock_fixed.sh new file mode 100755 index 0000000..05a74de --- /dev/null +++ b/test/pash_tests/head_deadlock_fixed.sh @@ -0,0 +1,9 @@ +mkfifo s1 s2 + +cat ../evaluation/scripts/input/1M.txt > s1 & +cat ../evaluation/scripts/input/1M.txt > s2 & +cat s1 s2 | (head -n 1; ../evaluation/tools/drain_stream.sh) & + +wait + +rm s1 s2 diff --git a/test/pash_tests/head_deadlock_fixed3.sh b/test/pash_tests/head_deadlock_fixed3.sh new file mode 100755 index 0000000..e914c18 --- /dev/null +++ b/test/pash_tests/head_deadlock_fixed3.sh @@ -0,0 +1,39 @@ +mkfifo s1 s2 + +## This way of fixing the problem suffers from some issues. +## +## - First of all, gathering the children after the end of the graph +## seems to gather more than just the alive nodes. This could lead +## to killing some random pid in the system. This could potentially +## be solved by gathering all pids incrementally. +## +## - In addition, this way of getting the last pid does not work if +## there is more than one output. (This is never the case in our +## tests, but could be. +## +## - Finally, it is not local, since all of the monitoring happens +## globally. Ideally, it should be done by a wrapper in each - +## node. The wrapper should monitor if the node dies, and if so it - +## should send SIGPIPE to all its producers. + +cat ../evaluation/scripts/input/1M.txt > s1 & +echo "Current node: $!" +cat ../evaluation/scripts/input/1M.txt > s2 & +echo "Current node: $!" +cat s1 s2 | head -n 1 & + +last=$! + +echo "Children pids" +ps --ppid $$ | awk '{print $1}' | grep -E '[0-9]' + +echo "Alternative children pids" +jobs -l | awk '{print $1}' + +wait $last + +echo "Last pid: $last" + +ps --ppid $$ | awk '{print $1}' | grep -E '[0-9]' | xargs -n 1 kill -SIGPIPE + +rm s1 s2 diff --git a/test/pash_tests/head_deadlock_fixed_2.sh b/test/pash_tests/head_deadlock_fixed_2.sh new file mode 100755 index 0000000..fa66f85 --- /dev/null +++ b/test/pash_tests/head_deadlock_fixed_2.sh @@ -0,0 +1,9 @@ +mkfifo s1 s2 + +cat ../evaluation/scripts/input/1M.txt > s1 & +cat ../evaluation/scripts/input/1M.txt > s2 & +(cat s1 s2; head -n 1 s2 > /dev/null) | head -n 1 & + +wait + +rm s1 s2 diff --git a/test/pash_tests/hello-world.sh b/test/pash_tests/hello-world.sh new file mode 100755 index 0000000..21498d3 --- /dev/null +++ b/test/pash_tests/hello-world.sh @@ -0,0 +1,8 @@ +[ $(uname) = 'Darwin' ] && a=/usr/share/dict/web2 || a=/usr/share/dict/words + +if [ -f $a ]; then + cat $a $a $a $a $a $a $a $a | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' | wc -l +else + echo "Dictionary file $a not found.." +fi + diff --git a/test/pash_tests/heredoc1.sh b/test/pash_tests/heredoc1.sh new file mode 100644 index 0000000..fe5ecb3 --- /dev/null +++ b/test/pash_tests/heredoc1.sh @@ -0,0 +1,3 @@ +cat << foo +line one +foo \ No newline at end of file diff --git a/test/pash_tests/identity.sh b/test/pash_tests/identity.sh new file mode 100755 index 0000000..5e61350 --- /dev/null +++ b/test/pash_tests/identity.sh @@ -0,0 +1,36 @@ +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out + + +batchSize=10000 +testFile="/home/ubuntu/pash/evaluation/scripts/input/100M.txt" +if ![ $1 -eq 0 ]; then + testFile=@1 +fi +if ![ $2 -eq 0 ]; then + testFile=@2 +fi + + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 + +../r_split $testFile $batchSize $file1 $file2 & + +../r_wrap cat < $file1 > $file3 & +../r_wrap cat < $file2 > $file4 & + +../r_merge $file3 $file4 > $file5 + +if cmp -s "$testFile" "$file5"; then + printf 'The file "%s" is the same as "%s"\n' "$testFile" "$file5" +else + printf 'The file "%s" is different from "%s"\n' "$testFile" "$file5" +fi + +rm -rf *.out \ No newline at end of file diff --git a/test/pash_tests/img_convert.sh b/test/pash_tests/img_convert.sh new file mode 100755 index 0000000..ae908f4 --- /dev/null +++ b/test/pash_tests/img_convert.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# tag: resize image +IN=${JPG:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/jpg} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/jpg} +mkdir -p ${OUT} +for i in $IN/*.jpg; +do + out=$OUT/$(basename -- $i) + convert -resize 70% "$i" "$out"; +done + +echo 'done'; diff --git a/test/pash_tests/incr.sh b/test/pash_tests/incr.sh new file mode 100755 index 0000000..c2654e6 --- /dev/null +++ b/test/pash_tests/incr.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# https://unix.stackexchange.com/questions/193441/how-can-i-implement-a-circular-flow-of-data-among-interconnected-commands +F="temp.txt" +[ -f $F ] && (rm $F && echo 1 >$F ) +tail -f $F | while read n; do echo $((n+1)); sleep 1; done | tee -a $F diff --git a/test/pash_tests/innefficient_auto_split.sh b/test/pash_tests/innefficient_auto_split.sh new file mode 100755 index 0000000..f806c7d --- /dev/null +++ b/test/pash_tests/innefficient_auto_split.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +## Running it with PaSh: +## time ./pa.sh -w 4 -d 1 --output_time evaluation/scripts/innefficient_auto_split.sh +## +## is slower than running it with bash: +## time ./evaluation/scripts/innefficient_auto_split.sh +## +## because the script doesn't do a lot of processing so + +FILE="$PASH_TOP/evaluation/scripts/input/1G.txt" +cat $FILE | sed 1d | grep 'Bell' | cut -f 2 | wc -l + +## If instead we run the following, we get the expected results +# cat $FILE $FILE | grep 'Bell' | cut -f 2 | wc -l diff --git a/test/pash_tests/install-deps.sh b/test/pash_tests/install-deps.sh new file mode 100755 index 0000000..dc1a9a7 --- /dev/null +++ b/test/pash_tests/install-deps.sh @@ -0,0 +1,6 @@ +# install dependencies +pkgs='ffmpeg unrtf imagemagick' +if ! dpkg -s $pkgs >/dev/null 2>&1; then + sudo apt-get install $pkgs -y +fi + diff --git a/test/pash_tests/ldconfig.sh b/test/pash_tests/ldconfig.sh new file mode 100755 index 0000000..334ac7b --- /dev/null +++ b/test/pash_tests/ldconfig.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +cd _build/lib + +trylink() { + [ -f "$2" ] || ln -sf $1 $2 +} + +trylink dlldash.so.0.0.0 dlldash.so +trylink dlldash.so.0.0.0 dlldash.so.0 + +trylink libdash.so.0.0.0 libdash.so +trylink libdash.so.0.0.0 libdash.so.0 + diff --git a/test/pash_tests/longest-man.sh b/test/pash_tests/longest-man.sh new file mode 100755 index 0000000..6adc986 --- /dev/null +++ b/test/pash_tests/longest-man.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +# Find the 10 largest man pages + +find /usr/share/man -type f | xargs du -scb | sort -rn | head -n 10 diff --git a/test/pash_tests/loop1.sh b/test/pash_tests/loop1.sh new file mode 100644 index 0000000..6e8693b --- /dev/null +++ b/test/pash_tests/loop1.sh @@ -0,0 +1,3 @@ +for idFor1 in A B ; do + echo $idFor1 +done diff --git a/test/pash_tests/make-ec2.sh b/test/pash_tests/make-ec2.sh new file mode 100755 index 0000000..f1b59bc --- /dev/null +++ b/test/pash_tests/make-ec2.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +# Pair with ./suggest-ec2.sh + +main() { + set -x + aws ec2 run-instances \ + --output text \ + --query "Instances[0].InstanceId" \ + --image-id "$PASH_AWS_EC2_AMI" \ + --instance-type "$PASH_AWS_EC2_INSTANCE_TYPE" \ + --key-name "$PASH_AWS_EC2_KEY_NAME" \ + --security-group-ids "$PASH_AWS_EC2_SECURITY_GROUP" \ + --monitoring "Enabled=false" \ + --subnet-id "$PASH_AWS_EC2_SUBNET" \ + --query 'Instances[0].InstanceId' \ + --block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=$PASH_AWS_EC2_DISK_SIZE_GB}" \ + --output text +} + +main diff --git a/test/pash_tests/max-temp-preprocess.sh b/test/pash_tests/max-temp-preprocess.sh new file mode 100755 index 0000000..e3d4b98 --- /dev/null +++ b/test/pash_tests/max-temp-preprocess.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;$;/;' | + xargs -r -n 1 curl -s | + grep gz | + tr -s ' \n' | + cut -d ' ' -f9 | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed 's;^;http://ndr.md/data/noaa/;' | + xargs -n1 curl -s | + gunzip diff --git a/test/pash_tests/max-temp-process.sh b/test/pash_tests/max-temp-process.sh new file mode 100755 index 0000000..510bb1d --- /dev/null +++ b/test/pash_tests/max-temp-process.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +## Processing +cat $IN | + cut -c 89-92 | + grep -v 999 | + sort -rn | + head -n1 diff --git a/test/pash_tests/max-temp.sh b/test/pash_tests/max-temp.sh new file mode 100755 index 0000000..b0c18aa --- /dev/null +++ b/test/pash_tests/max-temp.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +FROM=${FROM:-2015} +TO=${TO:-2015} +IN=${IN:-'http://ndr.md/data/noaa/'} +fetch=${fetch:-"curl -s"} + +seq $FROM $TO | + sed "s;^;$IN;" | + sed 's;$;/;' | + xargs -r -n 1 $fetch | + grep gz | + tr -s ' \n' | + cut -d ' ' -f9 | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed "s;^;$IN;" | + xargs -n1 curl -s | + gunzip | + ## Processing + cut -c 89-92 | + grep -v 999 | + sort -rn | + head -n1 diff --git a/test/pash_tests/merge-uniq.sh b/test/pash_tests/merge-uniq.sh new file mode 100755 index 0000000..1e53c00 --- /dev/null +++ b/test/pash_tests/merge-uniq.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# This is how to merge results of `uniq -c`, contained in {1,2,3}.txt +# I am using 3 inputs to stress it works with more than just pairs:-) + +A=${1:-1.txt} +B=${1:-2.txt} +C=${1:-3.txt} +awk '{ count[$2] += $1 } END { for(e in count) print count[e], e }' "$A" "$B" "$C" diff --git a/test/pash_tests/merge-wc.sh b/test/pash_tests/merge-wc.sh new file mode 100755 index 0000000..1ce6779 --- /dev/null +++ b/test/pash_tests/merge-wc.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +A="paste -d '+' " +for i in "$@"; do + # cat "$i" | tr -s ' ' '\n' | tail -n +2 + A="$A <(cat $i | tr -s ' ' '\n' | tail -n +2) " +done +A="$A | head -n +3 | bc | tr -s '\n' ' ' | sed 's/$/\ /'" + +eval $A + diff --git a/test/pash_tests/micro_10.sh b/test/pash_tests/micro_10.sh new file mode 100644 index 0000000..8d70d87 --- /dev/null +++ b/test/pash_tests/micro_10.sh @@ -0,0 +1,11 @@ +cat $IN | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " diff --git a/test/pash_tests/micro_1000.sh b/test/pash_tests/micro_1000.sh new file mode 100644 index 0000000..7278a5a --- /dev/null +++ b/test/pash_tests/micro_1000.sh @@ -0,0 +1,1002 @@ +cat $IN | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " | +tr " " " " diff --git a/test/pash_tests/micro_1000_env_test.sh b/test/pash_tests/micro_1000_env_test.sh new file mode 100644 index 0000000..b648f0f --- /dev/null +++ b/test/pash_tests/micro_1000_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt diff --git a/test/pash_tests/micro_10_env_test.sh b/test/pash_tests/micro_10_env_test.sh new file mode 100644 index 0000000..b648f0f --- /dev/null +++ b/test/pash_tests/micro_10_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt diff --git a/test/pash_tests/minimal_grep.sh b/test/pash_tests/minimal_grep.sh new file mode 100644 index 0000000..2a65106 --- /dev/null +++ b/test/pash_tests/minimal_grep.sh @@ -0,0 +1,54 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/10M.txt" +batchSize=100000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +# mkfifo $file7 +# mkfifo $file8 +# mkfifo $file9 + + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + + +$PASH_TOP/runtime/dgsh-tee -I -i $file1 -o $file5 -b 10M & +$PASH_TOP/runtime/dgsh-tee -I -i $file2 -o $file6 -b 10M & + +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file5 > $file3 & +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file6 > $file4 & +# ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +$PASH_TOP/runtime/r_merge $file3 $file4 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out diff --git a/test/pash_tests/minimal_grep_env_test.sh b/test/pash_tests/minimal_grep_env_test.sh new file mode 100755 index 0000000..b648f0f --- /dev/null +++ b/test/pash_tests/minimal_grep_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt diff --git a/test/pash_tests/minimal_grep_stdin.sh b/test/pash_tests/minimal_grep_stdin.sh new file mode 100644 index 0000000..7dee616 --- /dev/null +++ b/test/pash_tests/minimal_grep_stdin.sh @@ -0,0 +1 @@ +tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' diff --git a/test/pash_tests/minimal_sort.sh b/test/pash_tests/minimal_sort.sh new file mode 100644 index 0000000..1f4cb65 --- /dev/null +++ b/test/pash_tests/minimal_sort.sh @@ -0,0 +1,2 @@ +#!/bin/bash +cat $IN | tr A-Z a-z | sort diff --git a/test/pash_tests/minimal_sort_env_test.sh b/test/pash_tests/minimal_sort_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/minimal_sort_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/mk_dot_install.sh b/test/pash_tests/mk_dot_install.sh new file mode 100755 index 0000000..0ac9473 --- /dev/null +++ b/test/pash_tests/mk_dot_install.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e + +libdash_files=$(ls _build/lib) +bindings_files="META dash.cmxa dash.cma dash.a dash.mli dash.cmi dash.cmo dash.cmx ast.mli ast.cmi ast.cmo ast.cmx" + +files= +for f in ${libdash_files} +do + files="${files} \"_build/lib/${f}\"" +done + +for f in ${bindings_files} +do + files="${files} \"ocaml/${f}\"" +done + +cat >libdash.install < sh_352.18tmp && echo sh_352.18 line 2 >> sh_352.18tmp && cat sh_352.18tmp ); echo "$x" \ No newline at end of file diff --git a/test/pash_tests/nfa-regex.sh b/test/pash_tests/nfa-regex.sh new file mode 100755 index 0000000..6431aa5 --- /dev/null +++ b/test/pash_tests/nfa-regex.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Match complex regular-expression over input + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' diff --git a/test/pash_tests/nginx.sh b/test/pash_tests/nginx.sh new file mode 100755 index 0000000..2a4e68e --- /dev/null +++ b/test/pash_tests/nginx.sh @@ -0,0 +1,22 @@ +############################### +### awk not working on pash ### +############################### +# sort by reponse codes +#pash 36 sec, bash 7 sec +INPUT=${PASH_TOP}/evaluation/scripts/input/access.log +cat ${INPUT} | cut -d "\"" -f3 | cut -d ' ' -f2 | sort | uniq -c | sort -rn > /dev/null +# awk alternative, too slow +awk '{print $9}' ${INPUT} | sort | uniq -c | sort -rn > /dev/null +# find broken links broken links +awk '($9 ~ /404/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -rn > /dev/null +# for 502 (bad-gateway) we can run following command: +awk '($9 ~ /502/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -r > /dev/null +# Who are requesting broken links (or URLs resulting in 502) +awk -F\" '($2 ~ "/wp-admin/install.php"){print $1}' ${INPUT} | awk '{print $1}' | sort | uniq -c | sort -r > /dev/null +# 404 for php files -mostly hacking attempts +awk '($9 ~ /404/)' ${INPUT} | awk -F\" '($2 ~ "^GET .*\.php")' | awk '{print $7}' | sort | uniq -c | sort -r | head -n 20 > /dev/null +############################## +# Most requested URLs ######## +awk -F\" '{print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r > /dev/null +# Most requested URLs containing XYZ +awk -F\" '($2 ~ "ref"){print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r > /dev/null diff --git a/test/pash_tests/no_in_script.sh b/test/pash_tests/no_in_script.sh new file mode 100755 index 0000000..f1357a9 --- /dev/null +++ b/test/pash_tests/no_in_script.sh @@ -0,0 +1,2 @@ +N=100 +seq 1 $N | sort -rn diff --git a/test/pash_tests/p1.sh b/test/pash_tests/p1.sh new file mode 100644 index 0000000..73daeb3 --- /dev/null +++ b/test/pash_tests/p1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +PROXY=$([ "$(hostname)" == "deathstar" ] && echo "gamma.ndr.md" || echo "localhost") +WIKI="$HOME/wikipedia/" +export WIKI +# Squash all HTML for each URL into a single line, streaming fashion +# It also prefixes with the URL + +page_per_line () { + cat "$WIKI/$0" | tr -d "\n\r" | tr -d '\n' | sed -e '/.$/a\' +} + +export -f page_per_line + +# xargs: +# add `-t` for debugging +cat $WIKI/index_h_100.txt | xargs -0 -d '\n' -n 1 bash -c 'page_per_line "$@"' diff --git a/test/pash_tests/p2.sh b/test/pash_tests/p2.sh new file mode 100644 index 0000000..3075b98 --- /dev/null +++ b/test/pash_tests/p2.sh @@ -0,0 +1,8 @@ + sed "s#^#$HOME/wikipedia/#" | + xargs cat | + iconv -c -t ascii//TRANSLIT | + pandoc +RTS -K64m -RTS --from html --to plain --quiet | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + grep -vwFf ../evaluation/scripts/web-index/stopwords.txt | + ../evaluation/scripts/web-index/stem-words.js diff --git a/test/pash_tests/pa.sh b/test/pash_tests/pa.sh new file mode 100755 index 0000000..d031d73 --- /dev/null +++ b/test/pash_tests/pa.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +export PASH_TOP=${PASH_TOP:-${BASH_SOURCE%/*}} +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" +# point to the local downloaded folders +export PYTHONPATH=${PASH_TOP}/python_pkgs/ +## Register the signal handlers, we can add more signals here +trap kill_all SIGTERM SIGINT + +## kill all the pending processes that are spawned by this shell +kill_all() { + # kill all my subprocesses only + kill -s SIGKILL 0 + # kill pash_daemon + kill -s SIGKILL "$daemon_pid" +} +## Save the umask to first create some files and then revert it +old_umask=$(umask) + +## Restore the umask to create files etc +umask u=rwx,g=rx,o=rx + +if [ "$#" -eq 1 ] && [ "$1" = "--init" ]; then + "$PASH_TOP"/compiler/superoptimize.sh + exit +fi + +if ! command -v python3 &> /dev/null +then + echo "Python >=3 could not be found" + exit +fi + +## Create a temporary directory where PaSh can use for temporary files and logs +export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/" + +## Create a timestamp that PaSh can use for log directories +## (should not be used to create critical directories/files, only logs/monitors/etc, +## all the cricial pash temp files should go in PASH_TMP_PREFIX) +export PASH_TIMESTAMP="$(date +"%y-%m-%d-%T")" + +## Create the input and output fifo that the runtime will use for communication +export RUNTIME_IN_FIFO="${PASH_TMP_PREFIX}/runtime_in_fifo" +export RUNTIME_OUT_FIFO="${PASH_TMP_PREFIX}/runtime_out_fifo" +## TODO: Get rid of these two commands if possible +rm -f "$RUNTIME_IN_FIFO" "$RUNTIME_OUT_FIFO" +mkfifo "$RUNTIME_IN_FIFO" "$RUNTIME_OUT_FIFO" +export DAEMON_SOCKET="${PASH_TMP_PREFIX}/daemon_socket" +export DSPASH_SOCKET="${PASH_TMP_PREFIX}/dspash_socket" + +## Initialize all things necessary for pash to execute (logging/functions/etc) +source "$PASH_TOP/compiler/pash_init_setup.sh" "$@" + +if [ "$pash_daemon" -eq 1 ] && [ "$show_version" -eq 0 ]; then + ## TODO: If possible, move the daemon start as easly as possible to reduce waiting + python3 -S "$PASH_TOP/compiler/pash_runtime_daemon.py" "$@" & + daemon_pid=$! + ## Wait until daemon has established connection + ## + ## TODO: Can we get rid of the `sleep` in this wait? + pash_wait_until_daemon_listening +fi + +## Restore the umask before executing +umask "$old_umask" +PASH_FROM_SH="pa.sh" python3 -S "$PASH_TOP/compiler/pash.py" "$@" +pash_exit_code=$? +if [ "$pash_daemon" -eq 1 ] && [ "$show_version" -eq 0 ]; then + ## Only wait for daemon if it lives (it might be dead, rip) + if ps -p "$daemon_pid" > /dev/null + then + ## Send and receive from daemon + msg="Done" + daemon_response=$(pash_communicate_daemon "$msg") + if [ "$distributed_exec" -eq 1 ]; then + # kill $worker_manager_pid + manager_response=$(pash_communicate_worker_manager "$msg") + fi + wait 2> /dev/null 1>&2 + fi +fi + + + +## Don't delete the temporary directory if we are debugging +if [ "$PASH_DEBUG_LEVEL" -eq 0 ]; then + rm -rf "${PASH_TMP_PREFIX}" +fi + +(exit "$pash_exit_code") diff --git a/test/pash_tests/pacaur.sh b/test/pash_tests/pacaur.sh new file mode 100755 index 0000000..5cde8dd --- /dev/null +++ b/test/pash_tests/pacaur.sh @@ -0,0 +1,40 @@ +#!/bin/bash +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/packages} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/packages} +LOGS=${OUT}/logs +mkdir -p ${OUT} ${LOGS} + +info() { echo -e "\e[1m--> $@\e[0m"; } +mkcd() { mkdir -p "$1" && cd "$1"; } + +# check if not running as root +# test "$UID" -gt 0 || { info "don't run this as root!"; exit; } + +# set link to plaintext PKGBUILDs +pkgbuild="https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h" + +run_tests() { + pgk=$1 + info "create subdirectory for $pkg" + mkcd "${OUT}/$pkg" + + info "fetch PKGBUILD for $pkg" + curl --insecure -o PKGBUILD "$pkgbuild=$pkg" 2> /dev/null|| echo ' ' + + #info "fetch required pgp keys from PKGBUILD" + #gpg --recv-keys $(sed -n "s:^validpgpkeys=('\([0-9A-Fa-fx]\+\)').*$:\1:p" PKGBUILD) + info "make and install ..." + timeout 100 makedeb-makepkg --format-makedeb -d 2>/dev/null|| echo 'failed' + cd - +} + +export -f run_tests +pkg_count=0 +# loop over required packages +for pkg in $(cat ${IN} | tr '\n' ' ' ); +do + pkg_count=$((pkg_count + 1)) + run_tests $pkg > "${LOGS}"/"$pkg_count.log" +done + +echo 'done'; diff --git a/test/pash_tests/page-count.sh b/test/pash_tests/page-count.sh new file mode 100755 index 0000000..b4a3326 --- /dev/null +++ b/test/pash_tests/page-count.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# A bash script for determining how many pages are in a folder of OpenOffice documents +# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 + +# Require: libimage-exiftool-perl, bc +# Data: +# http://ndr.md/data/dummy/large.pdf +# More data: +# https://arxiv.org/help/bulk_data + +IN=./input/large.pdf +OUT=./output/out.txt + +echo "$(exiftool $IN | + grep Page-count | + cut -d ":" -f2 | + tr '\n' '+')""0" | + bc | + sed 's/^/\n/' > $OUT diff --git a/test/pash_tests/page-per-line.sh b/test/pash_tests/page-per-line.sh new file mode 100755 index 0000000..4a0c10b --- /dev/null +++ b/test/pash_tests/page-per-line.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# Squash all HTML for each URL into a single line, streaming fashion +# It also prefixes with the URL + +page_per_line () { + curl -s "$1" | tr -d "\n\r" | tr -d '\n' | sed "s/^/$0 /" | sed -e '/.$/a\' +} + +export -f page_per_line + +# xargs: +# add `-t` for debugging +cat ./urls.txt | xargs -0 -d '\n' -n 1 bash -c 'page_per_line "$@"' _ diff --git a/test/pash_tests/parse.sh b/test/pash_tests/parse.sh new file mode 100755 index 0000000..48ab720 --- /dev/null +++ b/test/pash_tests/parse.sh @@ -0,0 +1,17 @@ +bash ./get_results.sh > out +mv out log_results +cat log_results/out +while read p; do + PASSED=$(echo $p | awk -F'[^0-9]+' '{ print $2 }') + TOTAL=$(echo $p | awk -F'[^0-9]+' '{ print $3 }') + FAILED=$((passed - failed)) + # failed, print to stdout + if [ $PASSED -ne $TOTAL ]; then + # get the benchmark name + f=${p%% *} + # strip the : + f="${f%?}" + # dump the failed tests + cat log_results/${f}_failed.log + fi +done < log_results/out diff --git a/test/pash_tests/pash_declare_vars.sh b/test/pash_tests/pash_declare_vars.sh new file mode 100644 index 0000000..9b1290f --- /dev/null +++ b/test/pash_tests/pash_declare_vars.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +vars_file="${1?File not given}" + +pash_redir_output echo "Writing vars to: $vars_file" + +declare -p > "$vars_file" +## KK 2021-11-23 We don't actually need to export functions in the vars file. +## We never expand them in the compiler +## declare -f >> "$vars_file" diff --git a/test/pash_tests/pash_ptempfile_name.sh b/test/pash_tests/pash_ptempfile_name.sh new file mode 100755 index 0000000..ff815aa --- /dev/null +++ b/test/pash_tests/pash_ptempfile_name.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +distro=${1??Distro not given} +# echo "$PASH_TMP_PREFIX/pash_$RANDOM$RANDOM$RANDOM" +mktemp -u "$PASH_TMP_PREFIX/pash_XXXXXXXXXX" diff --git a/test/pash_tests/pash_runtime_complete_execution.sh b/test/pash_tests/pash_runtime_complete_execution.sh new file mode 100644 index 0000000..10cbad1 --- /dev/null +++ b/test/pash_tests/pash_runtime_complete_execution.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +## +## Completes execution by measuring and logging execution times and restoring state +## + +## +## (6) +## + +pash_exec_time_end=$(date +"%s%N") + +## TODO: Maybe remove the temp file after execution + +## We want the execution time in milliseconds +if [ "$pash_output_time_flag" -eq 1 ]; then + pash_exec_time_ms=$(echo "scale = 3; ($pash_exec_time_end-$pash_exec_time_start)/1000000" | bc) + pash_redir_output echo "Execution time: $pash_exec_time_ms ms" +fi + +## Source back the output variables of the compiled script. +## In all cases we should have executed a script +pash_redir_output echo "$$: (7) Recovering BaSh variables from: $pash_output_var_file" +source "$RUNTIME_DIR/pash_source_declare_vars.sh" "$pash_output_var_file" + +## Save the previous `set` state to a variable +pash_redir_output echo "$$: (7) Reading current BaSh set state from: ${pash_output_set_file}" + +pash_redir_output echo "$$: (7) Current BaSh set state: $(cat "$pash_output_set_file")" +## WARNING: This has to happen after sourcing the variables so that it overwrites it +pash_previous_set_status=$(cat "$pash_output_set_file") + +export pash_input_args +pash_redir_output echo "$$: (7) Arguments (might) have been updated to be: $pash_input_args" + +## Propagate the `set` state after running the script to the outer script +## TODO: Maybe move this to the end to avoid spurious failures +pash_redir_output echo "$$: (7) Current PaSh set state: $-" +source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$(cat "$pash_output_set_file")" +pash_redir_output echo "$$: (7) Reverted to BaSh set state before exiting: $-" + +pash_redir_output echo "$$: (7) Reverting last BaSh exit code: $pash_runtime_final_status" +(exit "$pash_runtime_final_status") diff --git a/test/pash_tests/pash_runtime_shell_to_pash.sh b/test/pash_tests/pash_runtime_shell_to_pash.sh new file mode 100644 index 0000000..7780a9c --- /dev/null +++ b/test/pash_tests/pash_runtime_shell_to_pash.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +## +## This currently performs (5), i.e., reverting bash state to get back to pash mode. +## + +## TODO: Use that for (1) too + +output_vars_file=${1?Output var file not given} +output_set_file=${2?Output set file not given} + +pash_exec_status=${internal_exec_status} +pash_redir_output echo "$$: (5) BaSh script exited with ec: $pash_exec_status" + +## Save the current set options to a file so that they can be recovered +pash_final_set_vars=$- +pash_redir_output echo "$$: (5) Writing current BaSh set state to: $output_set_file" +pash_redir_output echo "$$: (5) Current BaSh shell: $-" +echo "$pash_final_set_vars" > "$output_set_file" + +## Revert to the old set state to avoid spurious fails +source "$RUNTIME_DIR/pash_set_from_to.sh" "$-" "$pash_current_set_state" +pash_redir_output echo "$$: (5) Reverted to PaSh set state to: $-" + + +## Save the current variables +source "$RUNTIME_DIR/pash_declare_vars.sh" "$output_vars_file" +# pash_redir_output echo "$$: (5) Exiting from BaSh with BaSh status: $pash_exec_status" +# (exit "$pash_exec_status") diff --git a/test/pash_tests/pcap.sh b/test/pash_tests/pcap.sh new file mode 100755 index 0000000..cc855c6 --- /dev/null +++ b/test/pash_tests/pcap.sh @@ -0,0 +1,25 @@ +#!/bin/bash +#tag: pcap analysis +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/pcap_data} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/pcap-analysis} +LOGS=${OUT}/logs +mkdir -p ${LOGS} +run_tests() { + INPUT=$1 + /usr/sbin/tcpdump -nn -r ${INPUT} -A 'port 53' 2> /dev/null | sort | uniq |grep -Ev '(com|net|org|gov|mil|arpa)' 2> /dev/null + # extract URL + /usr/sbin/tcpdump -nn -r ${INPUT} -s 0 -v -n -l 2> /dev/null | egrep -i "POST /|GET /|Host:" 2> /dev/null + # extract passwords + /usr/sbin/tcpdump -nn -r ${INPUT} -s 0 -A -n -l 2> /dev/null | egrep -i "POST /|pwd=|passwd=|password=|Host:" 2> /dev/null +} +export -f run_tests + +pkg_count=0 + +for item in ${IN}/*; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > ${LOGS}/${pkg_count}.log +done + +echo 'done'; diff --git a/test/pash_tests/pcap_bench.sh b/test/pash_tests/pcap_bench.sh new file mode 100755 index 0000000..a99790e --- /dev/null +++ b/test/pash_tests/pcap_bench.sh @@ -0,0 +1,8 @@ +INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} +INPUT2=${INPUT2:-$PASH_TOP/evaluation/scripts/input/2018-07-20-17-31-20-192.168.100.108.pcap} +tcpdump -nn -r ${INPUT} -A 'port 53'| sort | uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null +tcpdump -nn -r ${INPUT} -A 'port 53'| sort |uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null +# without the pipes, bash takes 11 sec, with pipes, it takes 12 sec, same performance +# with pash +time tcpdump -nn -r ${INPUT2} -A -c 1000000 > /dev/null +time tcpdump -nn -r ${INPUT2} -A -c 1000000 | sort |uniq |grep -Ev '(com|net|org|gov|mil|arpa)' > /dev/null diff --git a/test/pash_tests/pkg.sh b/test/pash_tests/pkg.sh new file mode 100755 index 0000000..19b9028 --- /dev/null +++ b/test/pash_tests/pkg.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Package several versions of PaSh: +# * a shallow-clone version for a quick-install from `up` +# * a deep-clone version for other environments TODO +# * a docker image running on ubuntu 18.04 TODO + +set -ex + +echo $(pwd) +REV=0 + +REV=$(git rev-parse --short HEAD) +cd ../../ + +# # Shallow clone --- might not be ideal for development +# git clone --depth 1 git@github.com:andromeda/pash.git +# mv pash pash-shallow +# tar -cvzf pash-shallow.tar.gz pash-shallow/ > /dev/null +# # uncomment the following line to keep all versions +# # mv pash.tar.gz get/pash-${REV}.tar.gz +# # ln -sf ./pash-${REV}.tar.gz get/latest +# mv pash-shallow.tar.gz get/ +# ln -sf ./pash-shallow.tar.gz get/latest +# rm -rf pash-shallow + +cd pash +git pull +cd .. +tar -cvzf pash.tar.gz ./pash > /dev/null +mv pash.tar.gz get/ +ln -sf ./pash.tar.gz get/latest + +# in the future, we might want to have versions +# ln -s pash.tar.gz latest + + +# TODO: for a clear release, remove all versioning artifacts +# cp -r pash release +# cd release +# rm -rf .gitignore .gitsubmodules .git +# cd .. + diff --git a/test/pash_tests/pretty_print_json.sh b/test/pash_tests/pretty_print_json.sh new file mode 100755 index 0000000..d9da823 --- /dev/null +++ b/test/pash_tests/pretty_print_json.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +sed 's//}/g' | \ + sed 's/(/[/g' | \ + sed 's/)/]/g' | \ + python -m json.tool diff --git a/test/pash_tests/proginf.sh b/test/pash_tests/proginf.sh new file mode 100755 index 0000000..3c2a80d --- /dev/null +++ b/test/pash_tests/proginf.sh @@ -0,0 +1,18 @@ +#!/bin/bash +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/node_modules} +MIR_BIN=${MIR_BIN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/mir-sa/.bin/mir-sa} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/mir} +mkdir -p ${OUT}/ +pkg_count=0 +run_tests() { + cd $1; + ${MIR_BIN} -p 2>>${OUT}/error.log +} +export -f run_tests +for item in ${IN}/*; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > ${OUT}/$pkg_count.log +done + +echo 'done'; diff --git a/test/pash_tests/r-bell_grep.sh b/test/pash_tests/r-bell_grep.sh new file mode 100755 index 0000000..fe5f641 --- /dev/null +++ b/test/pash_tests/r-bell_grep.sh @@ -0,0 +1,50 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/100M.txt" +batchSize=10000000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +# mkfifo $file7 +# mkfifo $file8 +# mkfifo $file9 + + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file1 > $file3 & +$PASH_TOP/runtime/r_wrap grep 'Bell' < $file2 > $file4 & +# ../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +$PASH_TOP/runtime/r_merge $file3 $file4 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out diff --git a/test/pash_tests/r-minimal_grep.sh b/test/pash_tests/r-minimal_grep.sh new file mode 100755 index 0000000..2dbc19b --- /dev/null +++ b/test/pash_tests/r-minimal_grep.sh @@ -0,0 +1,46 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out + +testFile="$PASH_TOP/evaluation/scripts/input/10M.txt" +batchSize=1000000 +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/r_wrap tr A-Z a-z < $file1 > $file3 & +$PASH_TOP/runtime/r_wrap tr A-Z a-z < $file2 > $file4 & + +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file3 > $file5 & +$PASH_TOP/runtime/r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file4 > $file6 & + +$PASH_TOP/runtime/r_merge $file5 $file6 +# cat $testFile | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > t2.out +# if cmp -s t1.out t2.out; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/r-shortest-scripts.sh b/test/pash_tests/r-shortest-scripts.sh new file mode 100644 index 0000000..2da7a68 --- /dev/null +++ b/test/pash_tests/r-shortest-scripts.sh @@ -0,0 +1,110 @@ +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file32" +rm -f "#file31" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" +mkfifo "#file2" +mkfifo "#file4" +mkfifo "#file6" +mkfifo "#file8" +mkfifo "#file10" +mkfifo "#file12" +mkfifo "#file14" +mkfifo "#file17" +mkfifo "#file18" +mkfifo "#file19" +mkfifo "#file20" +mkfifo "#file21" +mkfifo "#file22" +mkfifo "#file23" +mkfifo "#file24" +mkfifo "#file25" +mkfifo "#file26" +mkfifo "#file27" +mkfifo "#file28" +mkfifo "#file29" +mkfifo "#file30" +mkfifo "#file32" +mkfifo "#file31" +mkfifo "#file33" +mkfifo "#file34" +mkfifo "#file35" +mkfifo "#file36" +mkfifo "#file37" +mkfifo "#file38" +{ cat /home/tamlu/pash/evaluation/scripts/input/all_cmds_x100.txt >"#file2" & } +{ /home/tamlu/pash/runtime/r_split "#file2" 100000 "#file17" "#file18" & } +{ /home/tamlu/pash/runtime/r_wrap xargs file <"#file17" >"#file20" & } +{ /home/tamlu/pash/runtime/r_wrap xargs file <"#file18" >"#file21" & } +{ /home/tamlu/pash/runtime/r_wrap grep "shell script" <"#file20" >"#file22" & } +{ /home/tamlu/pash/runtime/r_wrap grep "shell script" <"#file21" >"#file23" & } +{ /home/tamlu/pash/runtime/r_wrap cut -d: -f1 <"#file22" >"#file24" & } +{ /home/tamlu/pash/runtime/r_wrap cut -d: -f1 <"#file23" >"#file25" & } +{ /home/tamlu/pash/runtime/r_wrap xargs -L 1 wc -l <"#file24" >"#file26" & } +{ /home/tamlu/pash/runtime/r_wrap xargs -L 1 wc -l <"#file25" >"#file27" & } +{ /home/tamlu/pash/runtime/r_wrap grep -v "^0$" <"#file26" >"#file28" & } +{ /home/tamlu/pash/runtime/r_wrap grep -v "^0$" <"#file27" >"#file29" & } +{ /home/tamlu/pash/runtime/r_unwrap <"#file28" >"#file32" & } +{ sort -n <"#file35" >"#file30" & } +{ /home/tamlu/pash/runtime/r_unwrap <"#file29" >"#file33" & } +{ sort -n <"#file36" >"#file31" & } +{ sort -n -m "#file37" "#file38" >"#file14" & } +{ /home/tamlu/pash/runtime/eager.sh "#file32" "#file35" "/tmp/pash_eager_intermediate_#file1" & } +{ /home/tamlu/pash/runtime/eager.sh "#file33" "#file36" "/tmp/pash_eager_intermediate_#file2" & } +{ /home/tamlu/pash/runtime/eager.sh "#file30" "#file37" "/tmp/pash_eager_intermediate_#file3" & } +{ /home/tamlu/pash/runtime/eager.sh "#file31" "#file38" "/tmp/pash_eager_intermediate_#file4" & } +{ head -15 <"#file14" & } +source /home/tamlu/pash/runtime/wait_for_output_and_sigpipe_rest.sh ${!} +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file32" +rm -f "#file31" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" \ No newline at end of file diff --git a/test/pash_tests/r-sort.sh b/test/pash_tests/r-sort.sh new file mode 100755 index 0000000..1978189 --- /dev/null +++ b/test/pash_tests/r-sort.sh @@ -0,0 +1,53 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +testFile=../../evaluation/scripts/input/100M.txt +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/r_unwrap < $file1 > $file3 & +$PASH_TOP/runtime/r_unwrap < $file2 > $file4 & + +$PASH_TOP/runtime/eager.sh $file3 $file5 "/tmp/pash_eager_intermediate_#file1" & +$PASH_TOP/runtime/eager.sh $file4 $file6 "/tmp/pash_eager_intermediate_#file2" & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/r-wc.sh b/test/pash_tests/r-wc.sh new file mode 100755 index 0000000..aaf1422 --- /dev/null +++ b/test/pash_tests/r-wc.sh @@ -0,0 +1,44 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +testFile=$PASH_TOP/evaluation/scripts/input/1G.txt +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 + + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file3 $file4 & + + +# $PASH_TOP/runtime/r_unwrap < $file1 > $file3 & +# $PASH_TOP/runtime/r_unwrap < $file2 > $file4 & + +wc $file3 > $file5 & +wc $file4 > $file6 & + +./merge-wc.sh $file5 $file6 + + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/raw-r-sort.sh b/test/pash_tests/raw-r-sort.sh new file mode 100755 index 0000000..b9a7920 --- /dev/null +++ b/test/pash_tests/raw-r-sort.sh @@ -0,0 +1,49 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out + +rm -f *.out + +batchSize=10000000 +testFile="$PASH_TOP/evaluation/scripts/input/1G.txt" +if [ "$#" -gt "0" ] + then + testFile=$1 +fi +if [ "$#" -gt "1" ]; then + batchSize=$2 +fi + +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +mkfifo $file5 +mkfifo $file6 +mkfifo $file7 +mkfifo $file8 + +$PASH_TOP/runtime/r_split -r $testFile $batchSize $file1 $file2 & + +$PASH_TOP/runtime/eager.sh $file1 $file5 "/tmp/pash_eager_intermediate_#file1" & +$PASH_TOP/runtime/eager.sh $file2 $file6 "/tmp/pash_eager_intermediate_#file2" & + +sort < $file5 > $file7 & +sort < $file6 > $file8 & + +sort -m $file7 $file8 + +# cat $testFile | sort > $file8 +# if cmp -s "$file7" "$file8"; then +# printf 'The file "%s" is the same as "%s"\n' "$file7" "$file8" +# else +# printf 'The file "%s" is different from "%s"\n' "$file7" "$file8" +# fi + +rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/readonly.sh b/test/pash_tests/readonly.sh new file mode 100644 index 0000000..8a3d950 --- /dev/null +++ b/test/pash_tests/readonly.sh @@ -0,0 +1,8 @@ +var1=value1 +readonly var1 var2=value2 +var1=foo +var2=foo +echo $var1 $var2 +unset var1 +unset var2 +echo $var1 $var2 diff --git a/test/pash_tests/redir-var-test.sh b/test/pash_tests/redir-var-test.sh new file mode 100644 index 0000000..e82ffd7 --- /dev/null +++ b/test/pash_tests/redir-var-test.sh @@ -0,0 +1,10 @@ +#!/bin/sh +func_emit_tests_Makefile_am () +{ + ofd=3 + { + echo hi + } >&$ofd +} +fd=1 +echo hi >&$fd diff --git a/test/pash_tests/redirect.sh b/test/pash_tests/redirect.sh new file mode 100755 index 0000000..0fa3da7 --- /dev/null +++ b/test/pash_tests/redirect.sh @@ -0,0 +1,2 @@ +echo hello 1>&9 +# ls -laL /dev/fd \ No newline at end of file diff --git a/test/pash_tests/redirect_stdin_to.sh b/test/pash_tests/redirect_stdin_to.sh new file mode 100755 index 0000000..f43c9a6 --- /dev/null +++ b/test/pash_tests/redirect_stdin_to.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +## TODO: Is this a hack? +{ cat > "${1?No file to redirect to}" <&3 3<&- & } 3<&0 diff --git a/test/pash_tests/redirect_wrapper.sh b/test/pash_tests/redirect_wrapper.sh new file mode 100644 index 0000000..003b9df --- /dev/null +++ b/test/pash_tests/redirect_wrapper.sh @@ -0,0 +1 @@ +exec $1 redirect.sh 9>&1 \ No newline at end of file diff --git a/test/pash_tests/remote_read.sh b/test/pash_tests/remote_read.sh new file mode 100755 index 0000000..bc4577c --- /dev/null +++ b/test/pash_tests/remote_read.sh @@ -0,0 +1 @@ +"$PASH_TOP/runtime/dspash/file_reader/datastream_client" --type read "$@" diff --git a/test/pash_tests/remote_write.sh b/test/pash_tests/remote_write.sh new file mode 100755 index 0000000..3c5e724 --- /dev/null +++ b/test/pash_tests/remote_write.sh @@ -0,0 +1 @@ +"$PASH_TOP"/runtime/dspash/file_reader/datastream_client --type write "$@" diff --git a/test/pash_tests/remove_adapter.sh b/test/pash_tests/remove_adapter.sh new file mode 100644 index 0000000..006de5d --- /dev/null +++ b/test/pash_tests/remove_adapter.sh @@ -0,0 +1,3 @@ +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +# remove adapter +find ${INPUT} -name "*.fastq" | sort | uniq | xargs -I {} cutadapt -a AGATCGGAAGAGCACAC {} > /dev/null diff --git a/test/pash_tests/round_trip.sh b/test/pash_tests/round_trip.sh new file mode 100755 index 0000000..1aa1648 --- /dev/null +++ b/test/pash_tests/round_trip.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +if [ $# -ne 2 ]; then + echo "Usage: ${0##*/} program target" + exit 2 +fi + +p=$1 +tgt=$2 + +orig=$(${p} ${tgt} 2>&1) +if [ "$?" -ne 0 ]; +then echo "${tgt} FAILED, couldn't run (output: ${orig})"; exit 2 +fi + +rt=$(${p} ${tgt} | ${p} 2>&1) +if [ "$?" -ne 0 ]; +then echo "${tgt} FAILED round trip, couldn't run (output: $rt)"; exit 3 +fi + +if [ "${orig}" = "${rt}" ]; +then echo ${tgt} OK; exit 0 +else + echo ${tgt} FAILED + echo ${orig} + echo ========== + echo ${rt} + exit 1 +fi diff --git a/test/pash_tests/run_all_benchmarks.sh b/test/pash_tests/run_all_benchmarks.sh new file mode 100755 index 0000000..ef43816 --- /dev/null +++ b/test/pash_tests/run_all_benchmarks.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +## Determines whether the experimental pash flags will be tested. +## By default they are not. +export EXPERIMENTAL=0 +export DEBUG=0 + +for item in $@ +do + if [ "--experimental" == "$item" ]; then + export EXPERIMENTAL=1 + fi + + if [ "--debug" == "$item" ]; then + export DEBUG=1 + fi +done + +## This script is necessary to ensure that sourcing happens with bash +source run.seq.sh +source run.par.sh + +compare_outputs(){ + dir=$1 + outputs=$(ls $dir | grep "seq" | sed 's/.seq.out$//') + for out in $outputs; + do + seq_output="${dir}/${out}.seq.out" + pash_output="${dir}/${out}.par.out" + diff -q "$seq_output" "$pash_output" + done +} + +if [ "$EXPERIMENTAL" -eq 1 ]; then + export PASH_FLAGS="--r_split --dgsh_tee --r_split_batch_size 1000000" + # --speculation quick_abort is not maintained at the moment +else + export PASH_FLAGS="" +fi + +## Add the debug flag +if [ "$DEBUG" -eq 1 ]; then + export PASH_FLAGS="$PASH_FLAGS -d 1" +fi + + +oneliners +oneliners_pash + +compare_outputs "oneliners/outputs" + +unix50 +unix50_pash + +compare_outputs "unix50/outputs" + +nlp +nlp_pash + +compare_outputs "nlp/outputs" + +web-index +web-index_pash + +compare_outputs "web-index/outputs" + +analytics-mts +analytics-mts_pash + +compare_outputs "analytics-mts/outputs" diff --git a/test/pash_tests/run_evaluation.sh b/test/pash_tests/run_evaluation.sh new file mode 100644 index 0000000..bb9ead9 --- /dev/null +++ b/test/pash_tests/run_evaluation.sh @@ -0,0 +1,243 @@ +#!/bin/bash + +## TODO: Set up $PASH_TOP in the beginning or run the install script. + + +echo "This script runs the whole EuroSys 2021 PaSh evaluation" + +echo "" +echo "Section 6.1: Common Unix One-liners" + +## TODO: Also save aggregates (avg, etc) in a file + +## Note that input files that are used as inputs for this script are generated +## using the `gen*` scripts in `evaluation/scripts/input/`. +## ``` +## cd $PASH_TOP/evaluation/scripts/input/ +## ./gen.sh +## ./gen.sh # Warning: This requires more than 100GB of space. +## ``` +## +## If you just want to run the scripts with small inputs (the main conclusions still hold) +## you only need to run `./gen.sh`. +## +## The one-liner scripts are included in `evaluation/microbenchmarks` +## The list of scripts (and their correspondence to the names in the paper) are seen below: +## - minimal_grep.sh # EuroSys: nfa-regex +## - minimal_sort.sh # EuroSys: sort +## - topn.sh # EuroSys: top-n +## - wf.sh # EuroSys: wf +## - spell.sh # EuroSys: spell +## - diff.sh # EuroSys: difference +## - bigrams.sh # EuroSys: bi-grams +## - set-diff.sh # EuroSys: set-difference +## - double_sort.sh # EuroSys: sort-sort +## - shortest_scripts.sh # EuroSys: shortest-scripts +## +## The inputs that we are going to run them on are defined in +## - *_env_small.sh (for the small input) +## - *_env.sh (for the large EuroSys eval input, usually 10x larger than the small) +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The script that runs PaSh on these programs is: `evaluation/eurosys/execute_eurosys_one_liners.sh` +## There are three modes of execution (can be seen by calling the script with the -h flag): +## 1. Small inputs | --width 2, 16 | Only full PaSh config +## 2. Small inputs | --width 2, 16 | All PaSh configs +## 3. Big inputs | -- width 2, 4, 8, 16, 32, 64 | All PaSh configs +## +## The script `evaluation/eurosys/execute_eurosys_one_liners.sh` is based on +## `evaluation/execute_compile_evaluation_script.sh` that correctly sets up PaSh for the different configurations. +## +## If you just want to check that PaSh achieves speedups as presented in the paper +## you can just run 1 with option `-s`. +## +## If you are interested in seeing the improvements by PaSh's runtime primitives +## (all lines in Figure 9), you can run 2 with option `-m`. +## This should take a couple hours and should validate the trends between different PaSh +## configurations as shown in Figure 9. +## +## If you want to reproduce the complete results from Figure 9, you need to run 3 with option `-l`. +## Note that this should take more than a day to execute. +## Also this requires several hundred GBs of free space (due to intermediate inputs, outputs, and buffering). +## +## To plot the results from any of the above experiments, do the following: +## ``` +## cd $PASH_TOP/compiler +## python3 gather_results.py +## ``` +## +## This will create plots for all invocations of +## `evaluation/eurosys/execute_eurosys_one_liners.sh`, one for each flag. +## +## The plots are: +## - for `-s`: evaluation/plots/small_tiling_throughput_scaleup.pdf +## - for `-m`: evaluation/plots/medium_tiling_throughput_scaleup.pdf +## - for `-l`: evaluation/plots/tiling_throughput_scaleup.pdf +## +## Note that `-m` supersedes `-s` but `-l` does not supersede any of the two. +## +## Also note that if you run a script partially, it might end up saving partial results, +## therefore having 0 speedups in some points of the plots. + +echo "" +echo "Section 6.2: Unix50 from Bell Labs" + +## TODO: Also save aggregates (avg, etc) in a file + +## All of the Unix50 pipelines are in `evaluation/unix50/unix50.sh`. +## The inputs of the pipelines are in `evaluation/unix50/`. +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The script that runs PaSh on these programs is: `evaluation/eurosys/execute_unix_benchmarks.sh` +## There are two modes of execution (can be seen by calling the script with the -h flag): +## 1. Small inputs (1GB) | --width 4 +## 2. Big inputs (10GB) | --width 16 (EuroSys evaluation) +## +## The first one, called with `-s`, uses pash on the unix50 scripts with 1GB input and width 4 +## and should be done in less than an hour. +## The trend shown in the paper (Fig 10) should be visible in the results from this script. +## +## If you are interested in running the complete evaluation to reproduce Figure 10, +## you need to run the script with `-l`. This should take several hours. +## +## To plot the results from any of the above experiments, do the following: +## ``` +## cd $PASH_TOP/compiler +## python3 gather_results.py +## ``` +## +## This will create plots for both "1GB --width 4" and for "10GB --width 16". +## +## The plots are in: +## - for `-s`: evaluation/plots/unix50_1GB_individual_speedups_4.pdf +## - for `-l`: evaluation/plots/unix50_10GB_individual_speedups_16.pdf +## +## Note that the pipelines in the plot are sorted with respect to speedup, and not by their ID. +## So the first pipeline does not necessarily correspond to the first pipeline in `evaluation/unix50`. +## +## There are two small differences of these plots compared to Figure 10. +## These differences are due to the evolution of PaSh and the refinement of its annotations. +## - First, the first pipeline has higher speedup that 4 and 16 in both cases. This is because +## this pipeline is not very CPU intensive and contains an initial `cat`. PaSh has evolved +## to perform an optimization that removes `cat` occurences that only contain a single file, +## and therefore removes it, improving performance significantly. +## - Second, the slowdown in the last 3 scripts is more significant than the one reported in the paper. +## This is because these scripts contain `tr -d '\n'`, the annotation for which was refined recently due to additional testing. +## The initial annotation for `tr` considered this invocation of `tr` to be stateless while it isn't, +## since it removes all lines and therefore cannot be parallelized based on lines. The refinement in the annotation +## leads to additional splits to be added after `tr -d '\n'` (since it is non parallelizable pure). +## The issue with these splits is that they do not manage to split the file (since there is only one line) +## leaving the rest of the script to run sequentially. +## + + +echo "" +echo "Section 6.3: Use Case: NOAA Weather Analysis" + +## Note that input files that are needed by this script +## are `curl`ed from a server in the local network and therefore +## cannot be accessed from elsewhere. +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The program that we run, described in Section 6.3, can be seen in `evaluation/scripts/max-temp-complete.sh`. +## It takes as input a sequence of lines each containing a year (e.g. using `seq 2000 2004`). +## +## To run the script with a single year of input use: +## `./execute_max_temp_dish_evaluation.sh -s` +## +## These should take less than 10 minutes. +## +## It runs the script on: +## - bash +## - pa.sh --width 16 +## +## The results are saved in: +## - `evaluation/results/max-temp-complete-2000-2000-seq.time` +## - `evaluation/results/max-temp-complete-2000-2000-16-pash.time` +## +## If you want to run the program with 5 years of input (as is done in Section 6.3) +## you need to use the following: +## `./execute_max_temp_dish_evaluation.sh -l` +## +## It should take less than an hour. +## It also runs the script with bash and pash --width 16. +## +## The results are saved in: +## - `evaluation/results/max-temp-complete-2000-2004-seq.time` +## - `evaluation/results/max-temp-complete-2000-2004-16-pash.time` +## +## If you want to separate the preprocessing and processing (as done in Section 6.3) +## you need to add the `-e` flag to either 1 or 5 year execution, e.g.: +## `./execute_max_temp_dish_evaluation.sh -l -e` +## +## This runs: +## - `evaluation/scripts/max-temp-preprocess.sh` +## - `evaluation/scripts/max-temp-process.sh` +## +## with bash, and pash --width 16. It saves results in: +## - `evaluation/results/max-temp-preprocess-2000-2000-seq.time` +## - `evaluation/results/max-temp-preprocess-2000-2000-16-pash.time` +## - `evaluation/results/max-temp-process-2000-2000-seq.time` +## - `evaluation/results/max-temp-process-2000-2000-16-pash.time` +## +## and similarly for the large inputs (2000-2004). +## +## Note that PaSh's speedup for the complete script 2000-2004 with width 16 +## is actually higher than what is reported in the paper since it doesn't +## have to write the intermediate files (between preprocessing and processing) to disk. +## + +echo "" +echo "Section 6.4: Use Case: Wikipedia Web Indexing" + +## Note that input files that are needed by this script (complete Wikipedia) +## are saved locally on the server and therefore this program cannot be run from elsewhere. +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## The program that we run, described in Section 6.4, can be seen in `evaluation/scripts/web-index.sh`. +## It requires having set the `$IN`, `$WIKI`, and `$WEB_INDEX_DIR` variables. +## +## To run the script for a 1000 wikipedia links use: +## `./execute_web_index_dish_evaluation.sh -s` +## +## This sets up the required variables and should take less than 5 minutes. +## It runs the script with bash, pash --width 2, pash --width 16. +## +## The results are saved in: +## - `evaluation/results/web-index-1000-seq.time` +## - `evaluation/results/web-index-1000-2-pash.time` +## - `evaluation/results/web-index-1000-16-pash.time` +## +## If you want to run with the EuroSys evaluation inputs (100k links), use: +## `./execute_web_index_dish_evaluation.sh -l` +## +## This should take a couple hours and the results are saved in: +## - `evaluation/results/web-index-100000-seq.time` +## - `evaluation/results/web-index-100000-2-pash.time` +## - `evaluation/results/web-index-100000-16-pash.time` + +echo "" +echo "Section 6.5: Further Micro-benchmarks" + +## To run the comparison with sort --parallel, just use `evaluation/eurosys/execute_baseline_sort.sh` +## +## Before running the script we first need to move to the correct directory +## `cd $PASH_TOP/evaluation/eurosys` +## +## There are two modes of execution: +## 1. option: -s Small input | --width 2, 16 +## 2. option: -l Big input | -- width 2, 4, 8, 16, 32, 64 +## +## Note that this script executes sort --parallel with double the value of --width +## since we noticed that it grows slightly slower (as shown in the Figure in Section 6.5). + + +## TODO(@nikos): Run and explain the GNU Parallel diff --git a/test/pash_tests/run_grader.sh b/test/pash_tests/run_grader.sh new file mode 100755 index 0000000..10017b9 --- /dev/null +++ b/test/pash_tests/run_grader.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/grading" ]; then + echo "Couldn't find grading directory (looked in $1/grading)" + exit 2 +fi + +cd $1/grading + +errors="" +for s in `ls`; do + echo "GRADING $s" + (cd $s; make) + if [ "$?" != "0" ]; then + errors+=" $s" + fi +done + +echo +echo "There were errors for the following students:${errors}" +echo ${errors} >"$1/grading/errors.log" diff --git a/test/pash_tests/run_lda.sh b/test/pash_tests/run_lda.sh new file mode 100755 index 0000000..a2e8698 --- /dev/null +++ b/test/pash_tests/run_lda.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +export PYTHONIOENCODING=utf8 + +if test $# -ne 0; +then + KS="$*"; +else + KS="50 75 100 125 150 175 200" +fi + +DIR=`date "+%Y-%m-%d_%H:%M"` +START=`date "+%Y-%m-%d %H:%M"` + +# TODO error handling + +echo "SETTING UP" +mkdir ${DIR} + +echo "PARSING" +python parse.py + +for dat in abstracts.dat vocab.dat docs.dat; do + mv ${dat} ${DIR} +done + +# we don't want to lose this one! +cp stopwords.dat ${DIR} + +echo "RUNNING LDA" + +ABS=${DIR}/abstracts.dat + +for k in ${KS}; do + lda est 1/50 ${k} settings.txt ${ABS} seeded ${DIR}/lda${k} & + echo lda${k} >>${DIR}/.gitignore +done + +wait +echo "PROCESSING TOPICS" + +for k in ${KS}; do + python debug_topics.py ${DIR} ${k} > ${DIR}/lda${k}_topics.txt +done + +echo "GENERATING CSV" + +for i in ${DIR}/lda*; do + test -d ${i} && python post.py ${i}/final.gamma ${DIR}/docs.dat > ${i}.csv + test -d ${i} && python by_year.py ${i}/final.gamma ${DIR}/docs.dat > ${i}_by_year.csv +done + +echo "MOVING TO OUTPUT DIRECTORY" +mv ${DIR} ../out + +echo "DONE" +echo All done. Started at ${START}, done at `date "+%Y-%m-%d %H:%M"`. diff --git a/test/pash_tests/run_tests.sh b/test/pash_tests/run_tests.sh new file mode 100755 index 0000000..08fec79 --- /dev/null +++ b/test/pash_tests/run_tests.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -x e + +export PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel --show-superproject-working-tree)} + +echo "Running intro tests..." +cd "$PASH_TOP/evaluation/intro" +./test.sh + +echo "Running interface tests..." +cd "$PASH_TOP/evaluation/tests/interface_tests" +./run.sh + +echo "Running compiler tests..." +cd "$PASH_TOP/compiler" +./test_evaluation_scripts.sh + +echo "Running aggregator tests..." +cd "$PASH_TOP/evaluation/tests/agg/" +./run.sh + +echo "Running aggregator tests..." +cd "$PASH_TOP/runtime/agg/cpp/tests" +./test.sh diff --git a/test/pash_tests/safe0.sh b/test/pash_tests/safe0.sh new file mode 100644 index 0000000..4a3e55b --- /dev/null +++ b/test/pash_tests/safe0.sh @@ -0,0 +1 @@ +echo nothing to expand diff --git a/test/pash_tests/safe1.sh b/test/pash_tests/safe1.sh new file mode 100644 index 0000000..e213fee --- /dev/null +++ b/test/pash_tests/safe1.sh @@ -0,0 +1 @@ +echo ~ is always safe diff --git a/test/pash_tests/safe2.sh b/test/pash_tests/safe2.sh new file mode 100644 index 0000000..2d66d10 --- /dev/null +++ b/test/pash_tests/safe2.sh @@ -0,0 +1 @@ +echo "quoting safe stuff is safe" diff --git a/test/pash_tests/safe3.sh b/test/pash_tests/safe3.sh new file mode 100644 index 0000000..bfc7420 --- /dev/null +++ b/test/pash_tests/safe3.sh @@ -0,0 +1 @@ +echo $((2 + 2)) = 4, safely diff --git a/test/pash_tests/safe4.sh b/test/pash_tests/safe4.sh new file mode 100644 index 0000000..5f91750 --- /dev/null +++ b/test/pash_tests/safe4.sh @@ -0,0 +1 @@ +echo ${PWD} is fine to show diff --git a/test/pash_tests/safe5.sh b/test/pash_tests/safe5.sh new file mode 100644 index 0000000..50ea6bc --- /dev/null +++ b/test/pash_tests/safe5.sh @@ -0,0 +1 @@ +echo ${#PWD} is also cool, as is ${x-default} and ${x+alt} and and ${x%%a*} ${x%a*} ${x#a*} ${x##a*} diff --git a/test/pash_tests/safe7.sh b/test/pash_tests/safe7.sh new file mode 100644 index 0000000..aa6b8d1 --- /dev/null +++ b/test/pash_tests/safe7.sh @@ -0,0 +1 @@ +echo ${#PWD} is also cool, as is ${x-default} and "${x+alt}" and ${x=set now} and ${x?won\'t run} diff --git a/test/pash_tests/search.sh b/test/pash_tests/search.sh new file mode 100755 index 0000000..12efebe --- /dev/null +++ b/test/pash_tests/search.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Complicated grep expression + +IN=./input/1G.txt # Change G to M for small input +OUT=./output/out.txt + +cat $IN | + grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $OUT diff --git a/test/pash_tests/sed-test.sh b/test/pash_tests/sed-test.sh new file mode 100644 index 0000000..f5ba0ac --- /dev/null +++ b/test/pash_tests/sed-test.sh @@ -0,0 +1,11 @@ +cat $PASH_TOP/evaluation/tests/input/1M.txt | + sed 's;^d;da;' | + sed 's;^;http://ndr.md/data/noaa/;' | + sed 's;$;/;' | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed 's;^;http://ndr.md/data/noaa/;' | + sed "s#^#$WIKI#" | + sed s/\$/'0s'/ | + sed 1d | + sed 4d | + sed "\$d" \ No newline at end of file diff --git a/test/pash_tests/send_emails.sh b/test/pash_tests/send_emails.sh new file mode 100755 index 0000000..9e3515f --- /dev/null +++ b/test/pash_tests/send_emails.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ "$#" != "1" ]; then + echo "Usage: $0 [hwXX]" + exit 1 +fi + +if [ ! -d "$1/mail" ]; then + echo "Couldn't find mail directory (looked in $1/grading)" + exit 2 +fi + +cd $1/mail + +for s in `ls`; do + ../../mail.scpt "[cs131] $1 grade" $s +done diff --git a/test/pash_tests/set-dash-v-x.sh b/test/pash_tests/set-dash-v-x.sh new file mode 100644 index 0000000..168efba --- /dev/null +++ b/test/pash_tests/set-dash-v-x.sh @@ -0,0 +1,4 @@ +set - +echo hello +echo $# $1 $2 $3 $4 $5 + diff --git a/test/pash_tests/set-diff.sh b/test/pash_tests/set-diff.sh new file mode 100755 index 0000000..2c1afd0 --- /dev/null +++ b/test/pash_tests/set-diff.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Show the set-difference between two streams (i.e., elements in the first that are not in the second). +# https://stackoverflow.com/questions/2509533/bash-linux-set-difference-between-two-text-files + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +mkfifo s1 s2 + +cat $IN | + cut -d ' ' -f 1 | + tr [:lower:] [:upper:] | + sort > s1 & + +cat $IN | + cut -d ' ' -f 1 | + sort > s2 & + +comm -23 s1 s2 + +rm s1 s2 diff --git a/test/pash_tests/set-diff_env_test.sh b/test/pash_tests/set-diff_env_test.sh new file mode 100644 index 0000000..3e777b2 --- /dev/null +++ b/test/pash_tests/set-diff_env_test.sh @@ -0,0 +1,2 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/set-e-2.sh b/test/pash_tests/set-e-2.sh new file mode 100644 index 0000000..a280386 --- /dev/null +++ b/test/pash_tests/set-e-2.sh @@ -0,0 +1,4 @@ +set -e +( { false; } + { echo one; } ) | cat +echo two diff --git a/test/pash_tests/set-e-3.sh b/test/pash_tests/set-e-3.sh new file mode 100644 index 0000000..54b10e4 --- /dev/null +++ b/test/pash_tests/set-e-3.sh @@ -0,0 +1,21 @@ +set -e +# individual command in a multi-command pipeline +false | : +echo passed pipeline +# part of a compound list of an 'elif' +if false; then :; elif false; then :; fi +echo passed elif +# non-subshell compound command whose exit status was the result +# of a failure while -e was being ignored +{ false && : ; } +echo passed compound-brace +for i in a; do false && : ; done +echo passed compound-for +# case x in x) false && : ;; esac +# echo passed compound-case +if :; then false && : ; fi +echo passed compound-if +cont=y; while [ $cont = y ]; do cont=n; false && : ; done +echo passed compound-while +end=n; until [ $end = y ]; do end=y; false && : ; done +echo passed compound-until \ No newline at end of file diff --git a/test/pash_tests/set-e.sh b/test/pash_tests/set-e.sh new file mode 100644 index 0000000..982c20c --- /dev/null +++ b/test/pash_tests/set-e.sh @@ -0,0 +1,18 @@ +set -e +# part of a compound list of a 'while', 'until' or 'if' +while false; do break; done +echo passed while +until false; do break; done +echo passed until +if false; then :; fi +echo passed if +# any command of an AND-OR list other than the last +false && : +echo passed AND list +false || : +echo passed OR list +: && false || false && : +echo passed AND-OR list +# part of a pipeline preceded by the '!' reserved word +! false +echo passed negated pipeline \ No newline at end of file diff --git a/test/pash_tests/set-v.sh b/test/pash_tests/set-v.sh new file mode 100644 index 0000000..f5b3edf --- /dev/null +++ b/test/pash_tests/set-v.sh @@ -0,0 +1,2 @@ +set -v +echo hello diff --git a/test/pash_tests/set.sh b/test/pash_tests/set.sh new file mode 100644 index 0000000..cae4de7 --- /dev/null +++ b/test/pash_tests/set.sh @@ -0,0 +1,7 @@ +dotFile=set.sh.tempfile +variable="value value" + +# the problem is that this returns more things (we have functions that are exported in set) +set | grep variable > $dotFile +. ./$dotFile +# set diff --git a/test/pash_tests/set_bug.sh b/test/pash_tests/set_bug.sh new file mode 100755 index 0000000..55d8d14 --- /dev/null +++ b/test/pash_tests/set_bug.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +f() { + echo "f: $@" +} + +set -- a b c +echo "$@" +f +echo "$@" + diff --git a/test/pash_tests/setup-dspash.sh b/test/pash_tests/setup-dspash.sh new file mode 100755 index 0000000..0a81473 --- /dev/null +++ b/test/pash_tests/setup-dspash.sh @@ -0,0 +1,64 @@ + +# TODO: install any extra needed python debs + +# Get PASH_TOP +if git rev-parse --git-dir > /dev/null 2>&1; then + # set PASH_TOP + PASH_TOP=${PASH_TOP:-$(git rev-parse --show-toplevel)} +else + # set PASH_TOP to the root folder of the project if it is not available + PASH_TOP=${PASH_TOP:-$PWD/..} +fi + +# Install Go +wget https://go.dev/dl/go1.17.7.linux-amd64.tar.gz +rm -rf /usr/local/go && tar -C /usr/local -xzf go1.17.7.linux-amd64.tar.gz +echo -e '\nexport PATH=$PATH:/usr/local/go/bin' >> ~/.bashrc +export PATH=$PATH:/usr/local/go/bin +rm go1.17.7.linux-amd64.tar.gz + +# Install deps +GO111MODULE=on go get github.com/urfave/cli/v2 + +# Protobuf +apt-get update && apt-get install -y zip +PB_REL="https://github.com/protocolbuffers/protobuf/releases" +PROTOBUF_VER="3.15.8" +PROTOBUF_PACKAGE="protoc-$PROTOBUF_VER-linux-x86_64.zip" +curl -LO $PB_REL/download/v$PROTOBUF_VER/$PROTOBUF_PACKAGE +unzip $PROTOBUF_PACKAGE -d $HOME/.local +rm $PROTOBUF_PACKAGE +export PATH="$PATH:$HOME/.local/bin" +echo -e "\nPATH=\$PATH:$HOME/.local/bin" >> ~/.bashrc + +# Go protobuf deps +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +echo -e "\nexport PATH=\$PATH:$(go env GOPATH)/bin" >> ~/.bashrc +export PATH="$PATH:$(go env GOPATH)/bin" + +# Protobuf +apt-get update && apt-get install -y zip +PB_REL="https://github.com/protocolbuffers/protobuf/releases" +PROTOBUF_VER="3.15.8" +PROTOBUF_PACKAGE="protoc-$PROTOBUF_VER-linux-x86_64.zip" +curl -LO $PB_REL/download/v$PROTOBUF_VER/$PROTOBUF_PACKAGE +unzip $PROTOBUF_PACKAGE -d $HOME/.local +rm $PROTOBUF_PACKAGE +export PATH="$PATH:$HOME/.local/bin" +echo -e "\nPATH=\$PATH:$HOME/.local/bin" >> ~/.bashrc + +# Go protobuf deps +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +echo -e "\nexport PATH=\$PATH:$(go env GOPATH)/bin" >> ~/.bashrc +export PATH="$PATH:$(go env GOPATH)/bin" + +# Compile runtime +cd $PASH_TOP/runtime/dspash +go build socket_pipe.go +cd file_reader +go build client/dfs_split_reader.go +go build -o filereader_server server/server.go +go build -o discovery_server server/discovery_server.go +go build -o datastream_client client/datastream.go diff --git a/test/pash_tests/setup.sh b/test/pash_tests/setup.sh new file mode 100755 index 0000000..351e227 --- /dev/null +++ b/test/pash_tests/setup.sh @@ -0,0 +1,10 @@ +#!/bin/bash +setup_dataset() { + echo 'This experiment is expected to fetch data from a remote server' + echo 'To fetch the original dataset, use an FTP client' + echo 'e.g., "lftp ftp://ftp.ncdc.noaa.gov/pub/data/noaa"' +} + +source_var() { + export IN= +} diff --git a/test/pash_tests/shortest-scripts.sh b/test/pash_tests/shortest-scripts.sh new file mode 100755 index 0000000..92c6b87 --- /dev/null +++ b/test/pash_tests/shortest-scripts.sh @@ -0,0 +1,221 @@ +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file31" +rm -f "#file32" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" +rm -f "#file39" +rm -f "#file40" +rm -f "#file41" +rm -f "#file42" +rm -f "#file46" +rm -f "#file43" +rm -f "#file47" +rm -f "#file44" +rm -f "#file48" +rm -f "#file45" +rm -f "#file49" +rm -f "#file50" +rm -f "#file51" +rm -f "#file52" +rm -f "#file53" +rm -f "#file54" +rm -f "#file55" +rm -f "#file56" +rm -f "#file57" +rm -f "#file58" +rm -f "#file59" +rm -f "#file60" +rm -f "#file61" +rm -f "#file62" +mkfifo "#file2" +mkfifo "#file4" +mkfifo "#file6" +mkfifo "#file8" +mkfifo "#file10" +mkfifo "#file12" +mkfifo "#file14" +mkfifo "#file17" +mkfifo "#file18" +mkfifo "#file19" +mkfifo "#file20" +mkfifo "#file21" +mkfifo "#file22" +mkfifo "#file23" +mkfifo "#file24" +mkfifo "#file25" +mkfifo "#file26" +mkfifo "#file27" +mkfifo "#file28" +mkfifo "#file29" +mkfifo "#file30" +mkfifo "#file31" +mkfifo "#file32" +mkfifo "#file33" +mkfifo "#file34" +mkfifo "#file35" +mkfifo "#file36" +mkfifo "#file37" +mkfifo "#file38" +mkfifo "#file39" +mkfifo "#file40" +mkfifo "#file41" +mkfifo "#file42" +mkfifo "#file46" +mkfifo "#file43" +mkfifo "#file47" +mkfifo "#file44" +mkfifo "#file48" +mkfifo "#file45" +mkfifo "#file49" +mkfifo "#file50" +mkfifo "#file51" +mkfifo "#file52" +mkfifo "#file53" +mkfifo "#file54" +mkfifo "#file55" +mkfifo "#file56" +mkfifo "#file57" +mkfifo "#file58" +mkfifo "#file59" +mkfifo "#file60" +mkfifo "#file61" +mkfifo "#file62" + +mkfifo "#file63" +mkfifo "#file64" +mkfifo "#file65" +mkfifo "#file66" + +{ cat $PASH_TOP/evaluation/scripts/input/1G.txt >"#file2" & } +{ $PASH_TOP/runtime/r_split "#file2" 10000000 "#file63" "#file64" "#file65" "#file66" & } + +{ $PASH_TOP/runtime/dgsh_tee.sh "#file63" "#file17" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file64" "#file18" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file65" "#file19" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file66" "#file20" -I -f & } + +{ $PASH_TOP/runtime/r_wrap xargs file <"#file17" >"#file22" & } +{ $PASH_TOP/runtime/r_wrap xargs file <"#file18" >"#file23" & } +{ $PASH_TOP/runtime/r_wrap xargs file <"#file19" >"#file24" & } +{ $PASH_TOP/runtime/r_wrap xargs file <"#file20" >"#file25" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file22" >"#file26" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file23" >"#file27" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file24" >"#file28" & } +{ $PASH_TOP/runtime/r_wrap grep "shell script" <"#file25" >"#file29" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file26" >"#file30" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file27" >"#file31" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file28" >"#file32" & } +{ $PASH_TOP/runtime/r_wrap cut -d: -f1 <"#file29" >"#file33" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file30" >"#file34" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file31" >"#file35" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file32" >"#file36" & } +{ $PASH_TOP/runtime/r_wrap xargs -L 1 wc -l <"#file33" >"#file37" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file34" >"#file38" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file35" >"#file39" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file36" >"#file40" & } +{ $PASH_TOP/runtime/r_wrap grep -v "^0$" <"#file37" >"#file41" & } +{ $PASH_TOP/runtime/r_unwrap <"#file38" >"#file46" & } +{ sort -n <"#file53" >"#file42" & } +{ $PASH_TOP/runtime/r_unwrap <"#file39" >"#file47" & } +{ sort -n <"#file54" >"#file43" & } +{ $PASH_TOP/runtime/r_unwrap <"#file40" >"#file48" & } +{ sort -n <"#file55" >"#file44" & } +{ $PASH_TOP/runtime/r_unwrap <"#file41" >"#file49" & } +{ sort -n <"#file56" >"#file45" & } +{ sort -n -m "#file57" "#file58" >"#file50" & } +{ sort -n -m "#file59" "#file60" >"#file51" & } +{ sort -n -m "#file61" "#file62" >"#file14" & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file46" "#file53" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file47" "#file54" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file48" "#file55" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file49" "#file56" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file42" "#file57" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file43" "#file58" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file44" "#file59" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file45" "#file60" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file50" "#file61" -I -f & } +{ $PASH_TOP/runtime/dgsh_tee.sh "#file51" "#file62" -I -f & } +{ head -15 <"#file14" & } +source $PASH_TOP/runtime/wait_for_output_and_sigpipe_rest.sh ${!} +rm -f "#file2" +rm -f "#file4" +rm -f "#file6" +rm -f "#file8" +rm -f "#file10" +rm -f "#file12" +rm -f "#file14" +rm -f "#file17" +rm -f "#file18" +rm -f "#file19" +rm -f "#file20" +rm -f "#file21" +rm -f "#file22" +rm -f "#file23" +rm -f "#file24" +rm -f "#file25" +rm -f "#file26" +rm -f "#file27" +rm -f "#file28" +rm -f "#file29" +rm -f "#file30" +rm -f "#file31" +rm -f "#file32" +rm -f "#file33" +rm -f "#file34" +rm -f "#file35" +rm -f "#file36" +rm -f "#file37" +rm -f "#file38" +rm -f "#file39" +rm -f "#file40" +rm -f "#file41" +rm -f "#file42" +rm -f "#file46" +rm -f "#file43" +rm -f "#file47" +rm -f "#file44" +rm -f "#file48" +rm -f "#file45" +rm -f "#file49" +rm -f "#file50" +rm -f "#file51" +rm -f "#file52" +rm -f "#file53" +rm -f "#file54" +rm -f "#file55" +rm -f "#file56" +rm -f "#file57" +rm -f "#file58" +rm -f "#file59" +rm -f "#file60" +rm -f "#file61" +rm -f "#file62" + +rm -f "#file63" +rm -f "#file64" +rm -f "#file65" +rm -f "#file66" \ No newline at end of file diff --git a/test/pash_tests/shortest_scripts.sh b/test/pash_tests/shortest_scripts.sh new file mode 100644 index 0000000..0d39131 --- /dev/null +++ b/test/pash_tests/shortest_scripts.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# A bash script for finding the shortest scripts +# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 +# +p.95 multiple sed +# +p.XX crawler + +cat $IN | xargs file | grep "shell script" | cut -d: -f1 | xargs -L 1 wc -l | grep -v '^0$' | sort -n | head -15 diff --git a/test/pash_tests/shortest_scripts_env_test.sh b/test/pash_tests/shortest_scripts_env_test.sh new file mode 100644 index 0000000..35e627d --- /dev/null +++ b/test/pash_tests/shortest_scripts_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/all_cmds.txt diff --git a/test/pash_tests/sine.sh b/test/pash_tests/sine.sh new file mode 100755 index 0000000..4b928e1 --- /dev/null +++ b/test/pash_tests/sine.sh @@ -0,0 +1,4 @@ +#!/bin/bash +F="temp.txt" +[ -f $F ] && (rm $F && echo 1 >$F ) +tail -f temp.txt | while read n; do echo "1+s(3*$n)" | bc -l; sleep 1; done | tee -a temp.txt diff --git a/test/pash_tests/sort-opt.sh b/test/pash_tests/sort-opt.sh new file mode 100755 index 0000000..a5af02e --- /dev/null +++ b/test/pash_tests/sort-opt.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +sort --buffer-size=30% --parallel=$1 $IN diff --git a/test/pash_tests/sort-opt_env.sh b/test/pash_tests/sort-opt_env.sh new file mode 100755 index 0000000..fdaa642 --- /dev/null +++ b/test/pash_tests/sort-opt_env.sh @@ -0,0 +1 @@ +IN=../evaluation/scripts/input/10G.txt diff --git a/test/pash_tests/sort-sort.sh b/test/pash_tests/sort-sort.sh new file mode 100755 index 0000000..a03e889 --- /dev/null +++ b/test/pash_tests/sort-sort.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Calculate sort twice + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr A-Z a-z | sort | sort -r diff --git a/test/pash_tests/sort.sh b/test/pash_tests/sort.sh new file mode 100755 index 0000000..7e457bf --- /dev/null +++ b/test/pash_tests/sort.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Sort input + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | sort + diff --git a/test/pash_tests/sort_env.sh b/test/pash_tests/sort_env.sh new file mode 100644 index 0000000..a65bd56 --- /dev/null +++ b/test/pash_tests/sort_env.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/scripts/input/10G.txt diff --git a/test/pash_tests/sort_env_small.sh b/test/pash_tests/sort_env_small.sh new file mode 100644 index 0000000..902f841 --- /dev/null +++ b/test/pash_tests/sort_env_small.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/scripts/input/1G.txt diff --git a/test/pash_tests/sort_env_test.sh b/test/pash_tests/sort_env_test.sh new file mode 100755 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/sort_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/spell-grep.sh b/test/pash_tests/spell-grep.sh new file mode 100755 index 0000000..cd87ca0 --- /dev/null +++ b/test/pash_tests/spell-grep.sh @@ -0,0 +1,17 @@ +set_diff() +{ + grep -vx -f $1 - +} + +dict=$PASH_TOP/evaluation/tests/input/sorted_words +IN=$PASH_TOP/evaluation/tests/input/1M.txt + +cat $IN | + # groff -t -e -mandoc -Tascii | # remove formatting commands + col -bx | # remove backspaces / linefeeds + tr -cs A-Za-z '\n' | + tr A-Z a-z | # map upper to lower case + tr -d '[:punct:]' | # remove punctuation + sort | # put words in alphabetical order + uniq | # remove duplicate words + set_diff $dict # report words not in dictionary diff --git a/test/pash_tests/spell.sh b/test/pash_tests/spell.sh new file mode 100644 index 0000000..9e38b4b --- /dev/null +++ b/test/pash_tests/spell.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Calculate mispelled words in an input +# https://dl.acm.org/doi/10.1145/3532.315102 +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} +dict=${dict:-$PASH_TOP/evaluation/benchmarks/oneliners/input/dict.txt} + +cat $IN | + iconv -f utf-8 -t ascii//translit | # remove non utf8 characters + # groff -t -e -mandoc -Tascii | # remove formatting commands + col -bx | # remove backspaces / linefeeds + tr -cs A-Za-z '\n' | + tr A-Z a-z | # map upper to lower case + tr -d '[:punct:]' | # remove punctuation + sort | # put words in alphabetical order + uniq | # remove duplicate words + comm -23 - $dict # report words not in dictionary diff --git a/test/pash_tests/spell_env_test.sh b/test/pash_tests/spell_env_test.sh new file mode 100755 index 0000000..7152781 --- /dev/null +++ b/test/pash_tests/spell_env_test.sh @@ -0,0 +1,3 @@ +dict=$PASH_TOP/evaluation/tests/input/sorted_words +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/split_pcap.sh b/test/pash_tests/split_pcap.sh new file mode 100644 index 0000000..7c5e7b1 --- /dev/null +++ b/test/pash_tests/split_pcap.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# To process large pcap file, usually it is better to split it into small chunks first, +# then process every chunk in parallel. +INPUT=${INPUT:-$PASH_TOP/evaluation/scripts/input/201011271400.dump} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/scripts/input/out.pcap} +split_size=1000 +output_index=1 +loop_count=10 +exit_flag=0 + +command() { + echo "$1" "$2" +} + +tcpdump -r ${INPUT} -w ${OUTPUT} -C ${split_size} + +command ${OUTPUT} + +while : +do + loop_index=0 + while test ${loop_index} -lt ${loop_count} + do + if test -e ${OUTPUT}${output_index} + then + command ${OUTPUT} ${output_index} + output_index=$((output_index + 1)) + loop_index=$((loop_index + 1)) + else + exit_flag=1 + break + fi + done + wait + + if test ${exit_flag} -eq 1 + then + exit 0 + fi +done diff --git a/test/pash_tests/star-escape.sh b/test/pash_tests/star-escape.sh new file mode 100644 index 0000000..d7222cc --- /dev/null +++ b/test/pash_tests/star-escape.sh @@ -0,0 +1 @@ +x=$(echo "*" '*' \*); echo "$x" diff --git a/test/pash_tests/suggest-ec2.sh b/test/pash_tests/suggest-ec2.sh new file mode 100755 index 0000000..01e5241 --- /dev/null +++ b/test/pash_tests/suggest-ec2.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Suggests envvars for use in ./make-ec2.sh + +main() { + local vpc_id="$(aws ec2 describe-vpcs --output text --query 'Vpcs[0].VpcId')"; + local key_name="$(aws ec2 describe-key-pairs --output text --query 'KeyPairs[0].KeyName')"; + local subnet="$(aws ec2 describe-subnets --output text --query 'Subnets[0].SubnetId' --filter Name=vpc-id,Values=$vpc_id)"; + local sg="$(aws ec2 describe-security-groups --output text --filter Name=vpc-id,Values=$vpc_id --query 'SecurityGroups[0].GroupId')"; + + echo "export PASH_AWS_EC2_AMI='ami-0d739ceed1874f156';"; + echo "export PASH_AWS_EC2_INSTANCE_TYPE='t2.micro';"; + echo "export PASH_AWS_EC2_VPC_ID='$vpc_id';"; + echo "export PASH_AWS_EC2_KEY_NAME='$key_name';"; + echo "export PASH_AWS_EC2_SUBNET='$subnet';"; + echo "export PASH_AWS_EC2_SECURITY_GROUP='$sg';"; + echo "export PASH_AWS_EC2_DISK_SIZE_GB='10';"; +} + +main diff --git a/test/pash_tests/symtab-sha.sh b/test/pash_tests/symtab-sha.sh new file mode 100755 index 0000000..f460648 --- /dev/null +++ b/test/pash_tests/symtab-sha.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# To build and sign an SGX enclave, this script extracts the executable's symbol +# table and calculates its SHA256 hashsum. + +# Require +# Data: /usr/lib/libz3.so + +IN=/usr/lib/libz3.so +OUT=./output/out.txt + +readelf -x .symtab $IN | + tail -n +3 | + head -n -1 | # next three implement `awk '{print $2$3$4$5}'` + sed 's/^[[:space:]]*//' | + cut -d ' ' -f2-5 | + tr -d ' ' | + tr -d "\n" | + xxd -r -p | + sha256sum > $OUT diff --git a/test/pash_tests/tail.sh b/test/pash_tests/tail.sh new file mode 100755 index 0000000..e4277fe --- /dev/null +++ b/test/pash_tests/tail.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# FIXME missing tail parameters + +cat "${@: -1}" diff --git a/test/pash_tests/tailprogs.sh b/test/pash_tests/tailprogs.sh new file mode 100755 index 0000000..eae9e15 --- /dev/null +++ b/test/pash_tests/tailprogs.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# A bash script for finding the 10 longest scripts +# (TODO: `group_by` script type?) + +# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7 + +# Data: +# Assumes a full list of commands +# +# # simple, from a single dir: +# echo "$( +# ls /usr/bin/* +# )" > all_cmds.txt +# +# # Or more complicated, from $PATH: +# +# echo "$( +# case "$PATH" in +# (*[!:]:) PATH="$PATH:" ;; +# esac +# +# set -f; IFS=: +# for dir in $PATH; do +# set +f +# [ -z "$dir" ] && dir="." +# for file in "$dir"/*; do +# if [ -x "$file" ] && ! [ -d "$file" ]; then +# printf '%s = %s\n' "${file##*/}" "$file" +# fi +# done +# done +# )" > ./input/allcmds.txt + +IN=./input/cmds10x.txt +OUT=./output/out.txt + +ls /usr/bin/* > $IN + +cat $IN | + xargs file | + grep "shell script" | + cut -d: -f1 | + xargs wc -l | + sort -rn | + head -n 25 > $OUT diff --git a/test/pash_tests/tee_web_index_bug.sh b/test/pash_tests/tee_web_index_bug.sh new file mode 100644 index 0000000..05e3587 --- /dev/null +++ b/test/pash_tests/tee_web_index_bug.sh @@ -0,0 +1,25 @@ +IN=$PASH_TOP/evaluation/tests/input/1M.txt + +mkfifo {1,2,3}grams + +cat "$IN" | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + tee 3grams 2grams 1grams > /dev/null & + +cat 1grams | + sort | + uniq -c | + sort -rn > 1-grams.txt & + +cat 2grams | + sort | + uniq -c | + sort -rn > 2-grams.txt & + +cat 3grams | + sort | + uniq -c | + sort -rn # >> 3-grams.txt + +rm {1,2,3}grams {1,2,3}-grams.txt diff --git a/test/pash_tests/temp-analytics.sh b/test/pash_tests/temp-analytics.sh new file mode 100755 index 0000000..319a8f0 --- /dev/null +++ b/test/pash_tests/temp-analytics.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +FROM=${FROM:-2015} +TO=${TO:-2015} +IN=${IN:-'http://ndr.md/data/noaa/'} +fetch=${fetch:-"curl -s"} + +data_file=temperatures.txt + +## Downloading and extracting +seq $FROM $TO | + sed "s;^;$IN;" | + sed 's;$;/;' | + xargs -r -n 1 $fetch | + grep gz | + tr -s ' \n' | + cut -d ' ' -f9 | + sed 's;^\(.*\)\(20[0-9][0-9]\).gz;\2/\1\2\.gz;' | + sed "s;^;$IN;" | + xargs -n1 curl -s | + gunzip > "${data_file}" + +## Processing +cat "${data_file}" | + cut -c 89-92 | + grep -v 999 | + sort -rn | + head -n1 > max.txt + +cat "${data_file}" | + cut -c 89-92 | + grep -v 999 | + sort -n | + head -n1 > min.txt + +cat "${data_file}" | + cut -c 89-92 | + grep -v 999 | + awk "{ total += \$1; count++ } END { print total/count }" > average.txt diff --git a/test/pash_tests/temp_test.sh b/test/pash_tests/temp_test.sh new file mode 100755 index 0000000..88f78f1 --- /dev/null +++ b/test/pash_tests/temp_test.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +func() { read -r distro setup; echo $distro $setup; } + +export -f func + +cat ../evaluation/usecases/shellcheck/temp_input.txt | + func diff --git a/test/pash_tests/test-common.sh b/test/pash_tests/test-common.sh new file mode 100755 index 0000000..63924fa --- /dev/null +++ b/test/pash_tests/test-common.sh @@ -0,0 +1,19 @@ +CMD="$1" +FLG="$2" +AGG="$3" + +cat $IN1 $IN2 | $CMD $FLG > ./temp/reference +cat $IN1 | $CMD $FLG > ./temp/partial1 +cat $IN2 | $CMD $FLG > ./temp/partial2 + +$AGG ./temp/partial1 ./temp/partial2 $FLG > ./temp/aggregated + +diff ./temp/aggregated ./temp/reference > ./temp/log +if [ $? -ne 0 ]; then + cat ./temp/log | head + echo $CMD "$FLG ...FAIL" +else + echo $CMD "$FLG ...pass" +fi + +rm -f ./temp/partial1 ./temp/partial2 ./temp/aggregated ./temp/reference ./temp/log diff --git a/test/pash_tests/test.sh b/test/pash_tests/test.sh new file mode 100755 index 0000000..54240a9 --- /dev/null +++ b/test/pash_tests/test.sh @@ -0,0 +1,5 @@ +# this is a comment + +ls +cd .. + diff --git a/test/pash_tests/test1.sh b/test/pash_tests/test1.sh new file mode 100755 index 0000000..3f091ac --- /dev/null +++ b/test/pash_tests/test1.sh @@ -0,0 +1,48 @@ +#!/bin/bash +file1=1.out +file2=2.out +file3=3.out +file4=4.out +file5=5.out +file6=6.out +file7=7.out +file8=8.out +file9=9.out +rm -f *.out +testFile=../../evaluation/scripts/input/10M.txt +batchSize=100000 +mkfifo $file1 +mkfifo $file2 +mkfifo $file3 +mkfifo $file4 +# mkfifo $file5 + +mkfifo $file7 +mkfifo $file8 +mkfifo $file9 + +# mkfifo $file6 +# cat $testFile > $file9 & +# ../auto-split.sh $file9 $file1 $file2 & +# grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file3 > $file4 & +# grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file2 > $file6 & +# ../eager.sh $file1 $file3 temp & +# ../eager.sh $file6 $file7 temp2 & +# cat $file4 $file7 > $file5 + +../r_split $testFile $batchSize $file1 $file2 $file7 & + +../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file1 > $file3 & +../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file2 > $file4 & +../r_wrap grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' < $file7 > $file8 & + +../r_merge $file3 $file4 $file8> $file5 + +# cat $testFile | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4' > $file6 +# if cmp -s "$file6" "$file5"; then +# printf 'The file "%s" is the same as "%s"\n' "$file6" "$file5" +# else +# printf 'The file "%s" is different from "%s"\n' "$file6" "$file5" +# fi + +# rm -rf *out \ No newline at end of file diff --git a/test/pash_tests/tilde.sh b/test/pash_tests/tilde.sh new file mode 100644 index 0000000..5265eba --- /dev/null +++ b/test/pash_tests/tilde.sh @@ -0,0 +1,4 @@ +HOME='abc xyz' +printf '%s\n' ~ +HOME='test.*' +printf '%s\n' ~ \ No newline at end of file diff --git a/test/pash_tests/to_mp3.sh b/test/pash_tests/to_mp3.sh new file mode 100755 index 0000000..79a6931 --- /dev/null +++ b/test/pash_tests/to_mp3.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# tag: wav-to-mp3 +IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/wav} +OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/mp3} +LOGS=${OUT}/logs +mkdir -p ${LOGS} +run_tests(){ + FILE=$1 + ffmpeg -y -i $FILE -f mp3 -ab 192000 $OUT/$(basename $FILE).mp3 2>/dev/null +} + +export -f run_tests + +pkg_count=0 +for item in ${IN}/*; +do + pkg_count=$((pkg_count + 1)); + run_tests $item > ${LOGS}/${pkg_count}.log +done + +echo 'done'; diff --git a/test/pash_tests/top-n.sh b/test/pash_tests/top-n.sh new file mode 100755 index 0000000..d4373f7 --- /dev/null +++ b/test/pash_tests/top-n.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Top-N (1000) terms +# from https://dl.acm.org/doi/10.1145/5948.315654 + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed 100q + diff --git a/test/pash_tests/topn.sh b/test/pash_tests/topn.sh new file mode 100755 index 0000000..6825938 --- /dev/null +++ b/test/pash_tests/topn.sh @@ -0,0 +1,2 @@ +# Top-N (1000) terms +cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed ${N}q diff --git a/test/pash_tests/topn_env_test.sh b/test/pash_tests/topn_env_test.sh new file mode 100644 index 0000000..0e8bdb7 --- /dev/null +++ b/test/pash_tests/topn_env_test.sh @@ -0,0 +1,3 @@ +N=1000 +IN=$PASH_TOP/evaluation/tests/input/10M.txt + diff --git a/test/pash_tests/tr-test.sh b/test/pash_tests/tr-test.sh new file mode 100644 index 0000000..becbb84 --- /dev/null +++ b/test/pash_tests/tr-test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +## This test contains all occurences of tr (to test the annotation) + +FILE="$PASH_TOP/evaluation/tests/input/1M.txt" + +cat $FILE | tr -d ',' +cat $FILE | tr '[A-Z]' '[a-z]' +cat $FILE | tr -s ' ' '\n' +cat $FILE | tr '[a-z]' 'P' +cat $FILE | tr -c "[a-z][A-Z]" '\n' +cat $FILE | tr ' ' '\n' +cat $FILE | tr '[a-z]' '\n' +## This is a bit tricky but `tr -d '\n'` is pure because after it is done there is only one line. +cat $FILE | tr -d '\n' | grep "the" +cat $FILE | tr -c '[A-Z]' '\n' +cat $FILE | tr " " " " +cat $FILE | tr -cs A-Za-z '\n' +cat $FILE | tr A-Z a-z +cat $FILE | tr -d '[:punct:]' +cat $FILE | tr [:lower] [:upper] +cat $FILE | tr [:lower:] [:upper:] +cat $FILE | tr -s ' ' +cat $FILE | tr -s ' \n' +cat $FILE | tr -d '\012' | sort diff --git a/test/pash_tests/tr_cs_wc_test.sh b/test/pash_tests/tr_cs_wc_test.sh new file mode 100755 index 0000000..b8fc1fc --- /dev/null +++ b/test/pash_tests/tr_cs_wc_test.sh @@ -0,0 +1,17 @@ +## This script is used to experiment with how to get parallelism benefits from a bunch of Unix50 pipelines + +## You have to run the following before running this script. +## The output should be 439M long +## Warning: Takes a long time +## cat $PASH_TOP/evaluation/unix50/4.txt | $PASH_TOP/runtime/multiply.sh -m 1000000 | pv > $PASH_TOP/evaluation/unix50/big_4.txt + +FILE="${PASH_TOP}/evaluation/unix50/big_4.txt" + +# cat $FILE | tr -s ' ' '\n' | grep 'x' | grep '\.' | wc -l + +cat $FILE | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l + +## Possible solutions: +## 1. Make an aggregator for tr -s (This is the best solutoin) +## 2. Remove the -s since it is not actually necessary +## 3. Make an aggregator for wc (?) \ No newline at end of file diff --git a/test/pash_tests/trap.sh b/test/pash_tests/trap.sh new file mode 100644 index 0000000..f959731 --- /dev/null +++ b/test/pash_tests/trap.sh @@ -0,0 +1,7 @@ +myfunction() +{ + echo myfunction invoked +} +trap myfunction EXIT +echo hello one +echo hello two diff --git a/test/pash_tests/trigrams.sh b/test/pash_tests/trigrams.sh new file mode 100755 index 0000000..5942c2e --- /dev/null +++ b/test/pash_tests/trigrams.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# A somewhat suboptimal way of calculating 3-grams. +# Part of the intention is to highlight overheads of tagging each stream element + +IN=./input/1G.txt +OUT=./output/out.txt + +mkfifo s2 s3 + +cat $IN | +# head -n 2 | + sed 's/[^a-zA-Z0-9]/ /g' | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + tee s2 | + tail +2 | + paste s2 - | # At this point the stream has two elements + tee s3 | + cut -f 1 | + tail +3 | + paste s3 - | # Joining (1) the first two words , (2) + sort | + uniq > $OUT +rm s2 s3 + + + diff --git a/test/pash_tests/trim_primers.sh b/test/pash_tests/trim_primers.sh new file mode 100644 index 0000000..5254ae9 --- /dev/null +++ b/test/pash_tests/trim_primers.sh @@ -0,0 +1,6 @@ +# trim primers +INPUT=${INPUT:-$PASH_TOP/evaluation/bio/input/} +OUTPUT=${OUTPUT:-$PASH_TOP/evaluation/bio/output} +cd ${INPUT} +find . -maxdepth 1 -name "*.fastq" | xargs -I {} cutadapt -a TCCTCCGCTTATTGATAGC -o ${OUTPUT}/{}\_trimmed.fastq {}; + diff --git a/test/pash_tests/uniq-c.2.sh b/test/pash_tests/uniq-c.2.sh new file mode 100755 index 0000000..3c8a259 --- /dev/null +++ b/test/pash_tests/uniq-c.2.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +END_OF_1=$(tail -n 1 "$1") +END_NUM=$(echo "$END_OF_1" | grep -E -o '^[ ]*[0-9]*[ ]*' | tr -d "[:space:]") +END_WORD=$(echo "$END_OF_1" | sed 's/^[ ]*[0-9]*[ ]*//g') + +START_OF_2=$(head -n 1 "$2") +START_NUM=$(echo "$START_OF_2" | grep -E -o '^[ ]*[0-9]*[ ]*' | tr -d "[:space:]") +START_WORD=$(echo "$START_OF_2" | sed 's/^[ ]*[0-9]*[ ]*//g') + +if [[ $START_WORD == "$END_WORD" ]]; then + TOTAL_NUM=$((START_NUM + END_NUM)) + sed '$d' "$1" + printf "%7s %s\n" "$TOTAL_NUM" "$START_WORD" + sed '1d' "$2" +else + cat "$1" "$2" +fi diff --git a/test/pash_tests/uniq.sh b/test/pash_tests/uniq.sh new file mode 100755 index 0000000..dc75a38 --- /dev/null +++ b/test/pash_tests/uniq.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# simply rerun uniq +cat "$@" | uniq diff --git a/test/pash_tests/unix50.sh b/test/pash_tests/unix50.sh new file mode 100755 index 0000000..7c5182b --- /dev/null +++ b/test/pash_tests/unix50.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +# scripts from https://unixgame.io/ +# https://github.com/psinghbh/softsec.github.io +# input files https://github.com/psinghbh/softsec.github.io/tree/master/ctf/unixgame.io/challenges +# Which join is easier: http://www.theunixschool.com/2011/08/5-different-ways-to-join-all-lines-in.html +# 1 (default) + 3 + 1 + 1 + 6 + 1 + 1 + 3 + 5 + 9 + 3 + 2 + 1 = 37 (there are 3 missing) +# missing 8.5, 9.5, 12.1 + +if [[ -z "$IN_PRE" ]]; then + if [[ -z "$PASH_TOP" ]]; then + echo "Need to provide PASH_TOP, possibly $(git rev-parse --show-toplevel)" 1>&2 + exit 1 + else + export IN_PRE=$PASH_TOP/evaluation/benchmarks/unix50/input + fi +fi + +IN1=$IN_PRE/1.txt +IN2=$IN_PRE/2.txt +IN3=$IN_PRE/3.txt +IN4=$IN_PRE/4.txt +IN5=$IN_PRE/5.txt +IN6=$IN_PRE/6.txt +IN7=$IN_PRE/7.txt +IN8=$IN_PRE/8.txt +IN91=$IN_PRE/9.1.txt +IN92=$IN_PRE/9.2.txt +IN93=$IN_PRE/9.3.txt +IN94=$IN_PRE/9.4.txt +IN95=$IN_PRE/9.5.txt +IN96=$IN_PRE/9.6.txt +IN97=$IN_PRE/9.7.txt +IN98=$IN_PRE/9.8.txt +IN99=$IN_PRE/9.9.txt +IN10=$IN_PRE/10.txt +IN11=$IN_PRE/11.txt +IN12=$IN_PRE/12.txt + +# 1.0: extract the last name +cat $IN1 | cut -d ' ' -f 2 + +# 1.1: extract names and sort +cat $IN1 | cut -d ' ' -f 2 | sort + +# 1.2: extract names and sort +cat $IN1 | head -n 2 | cut -d ' ' -f 2 + +# 1.3: sort top first names +cat $IN1 | cut -d ' ' -f 1 | sort | uniq -c | sort -r + +# 2.1: get all Unix utilities +cat $IN2 | cut -d ' ' -f 4 | tr -d ',' + +# 3.1: get lowercase first letter of last names (awk) +cat $IN3 | cut -d ' ' -f 2 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + +# 4.1: find number of rounds +cat $IN4 | tr ' ' '\n' | grep '\.' | wc -l + +# 4.2: find pieces captured by Belle +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | wc -l + +# 4.3: find pieces captured by Belle with a pawn +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep -v '[KQRBN]' | wc -l + +# 4.4: histogram of Belle's captures (-pawns) by each type of piece +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | grep '[KQRBN]' | cut -c 1-1 | sort | uniq -c | sort -nr + +# 4.5: 4.4 + pawns +cat $IN4 | tr ' ' '\n' | grep 'x' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort | uniq -c | sort -nr + +# 4.6: piece used the most by Belle +cat $IN4 | tr ' ' '\n' | grep '\.' | cut -d '.' -f 2 | cut -c 1-1 | tr '[a-z]' 'P' | sort -r | uniq | head -n 3 | tail -n 1 + +# 5.1: extract hello world +cat $IN5 | grep 'print' | cut -d "\"" -f 2 | cut -c 1-12 + +# 6.1: order the bodies by how easy it would be to land on them in Thompson's Space Travel game when playing at the highest simulation scale +cat $IN6 | awk "{print \$2, \$0}" | sort -nr | cut -d ' ' -f 2 + +# 7.1: identify number of AT&T unix versions +cat $IN7 | cut -f 1 | grep 'AT&T' | wc -l + +# 7.2: find most frequently occurring machine +cat $IN7 | cut -f 2 | sort -n | uniq -c | sort -nr | head -n 1 | tr -s ' ' '\n' | tail -n 1 + +# 7.3: all the decades in which a unix version was released +cat $IN7 | cut -f 4 | sort -n | cut -c 3-3 | uniq | sed s/\$/'0s'/ + +# 8.1: count unix birth-year +cat $IN8 | tr ' ' '\n' | grep 1969 | wc -l + +# 8.2: find Bell Labs location where Dennis Ritchie had his office +cat $IN8 | grep 'Bell' | awk 'length <= 45' | cut -d ',' -f 2 | awk "{\$1=\$1};1" + +# 8.3: find names of the four people most involved with unix +cat $IN8 | grep '(' | cut -d '(' -f 2 | cut -d ')' -f 1 | head -n 1 + +# 8.4: find longest words without hyphens +cat $IN8 | tr -c "[a-z][A-Z]" '\n' | sort | awk "length >= 16" + +# # 8.5: Find second-most-freq 8-character word(s) without hyphens +# cat $IN8 > /dev/null + +# 9.1: extract the word PORT +cat $IN91 | tr ' ' '\n' | grep '[A-Z]' | tr '[a-z]' '\n' | grep '[A-Z]' | tr -d '\n' | cut -c 1-4 + +# 9.2: extract the word BELL +cat $IN92 | cut -c 1-1 | tr -d '\n' + +# 9.3: animal that used to decorate the Unix room +cat $IN93 | cut -c 1-2 | tr -d '\n' + +# 9.4: four corners with E centered, for an "X" configuration +cat $IN94 | tr ' ' '\n' | grep "\"" | sed 4d | cut -d "\"" -f 2 | tr -d '\n' + +# # 9.5: backwards running clock, in a backwards poem +# cat $IN95 > /dev/null + +# 9.6: Follow the directions for grep +cat $IN96 | tr ' ' '\n' | grep '[A-Z]' | sed 1d | sed 3d | sed 3d | tr '[a-z]' '\n' | grep '[A-Z]' | sed 3d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 9.7: Four corners +cat $IN97 | sed 2d | sed 2d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 9.8: TELE-communications +cat $IN98 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 2d | sed 3d | sed 4d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 9.9: +cat $IN99 | tr -c '[a-z][A-Z]' '\n' | grep '[A-Z]' | sed 1d | sed 1d | sed 2d | sed 3d | sed 5d | tr -c '[A-Z]' '\n' | tr -d '\n' + +# 10.1: count Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 | wc -l + +# 10.2: list Turing award recipients while working at Bell Labs +cat $IN10 | sed 1d | grep 'Bell' | cut -f 2 + +# 10.3: extract Ritchie's username +cat $IN10 | grep 'Bell' | cut -f 2 | head -n 1 | fmt -w1 | cut -c 1-1 | tr -d '\n' | tr '[A-Z]' '[a-z]' + +# 11.1: year Ritchie and Thompson receive the Hamming medal +cat $IN11 | grep 'UNIX' | cut -f 1 + +# 11.2: most repeated first name in the list? +cat $IN11 | cut -f 2 | cut -d ' ' -f 1 | sort | uniq -c | sort -nr | head -n 1 | fmt -w1 | sed 1d + + +# # 12.1: transform this list of instructions such that if the snake follows the +# # new instructions top to bottom, it ends on the location of the apple. +# cat $IN12 > /dev/null diff --git a/test/pash_tests/unparsing-special-chars.sh b/test/pash_tests/unparsing-special-chars.sh new file mode 100644 index 0000000..7f584ab --- /dev/null +++ b/test/pash_tests/unparsing-special-chars.sh @@ -0,0 +1,10 @@ + x=`printf '%s' \#`; printf '%s\n' "$x" + x=`printf '%s' "#"`; printf '%s\n' "$x" + x=`printf '%s' \<`; printf '%s\n' "$x" + x=`printf '%s' "<"`; printf '%s\n' "$x" + x=`printf '%s' \>`; printf '%s\n' "$x" + x=`printf '%s' ">"`; printf '%s\n' "$x" + x=`printf '%s' \~`; printf '%s\n' "$x" + x=`printf '%s' "~"`; printf '%s\n' "$x" + x=`printf '%s' \ `; printf '%s\n' "$x" + x=`printf '%s' " "`; printf '%s\n' "$x" \ No newline at end of file diff --git a/test/pash_tests/unsafe0.sh b/test/pash_tests/unsafe0.sh new file mode 100644 index 0000000..9d7ef33 --- /dev/null +++ b/test/pash_tests/unsafe0.sh @@ -0,0 +1 @@ +echo ${x=uhoh} diff --git a/test/pash_tests/unsafe1.sh b/test/pash_tests/unsafe1.sh new file mode 100644 index 0000000..a60c58c --- /dev/null +++ b/test/pash_tests/unsafe1.sh @@ -0,0 +1 @@ +echo $((x=2)) diff --git a/test/pash_tests/unsafe2.sh b/test/pash_tests/unsafe2.sh new file mode 100644 index 0000000..475e688 --- /dev/null +++ b/test/pash_tests/unsafe2.sh @@ -0,0 +1 @@ +echo ${nonesuch?uhoh} is unsafe diff --git a/test/pash_tests/update-img.sh b/test/pash_tests/update-img.sh new file mode 100755 index 0000000..ca6617c --- /dev/null +++ b/test/pash_tests/update-img.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +### +# Repackage updated pash docker image to latest commit +### + +cd $(dirname $0) + +# Assumes a pash image exists already +# curl img.pash.ndr.md | docker load; docker run --name pash-playground -it pash/18.04 + +docker start pash-playground +docker exec pash-playground bash -c 'cd /pash; git pull' +docker stop pash-playground + + +docker commit $(docker ps -a | grep pash-playground | cut -f1 -d' ') pash/18.04:latest +docker save pash/18.04:latest | gzip > pash-docker.tar.gz + +if [[ "$(hostname)" == "beta" ]]; then + # This assumes you're on beta + mv pash-docker.tar.gz /var/www/pash-web/ +fi + +docker build -t pash-play ../ + +if [[ ./token.txt ]]; then + cat ~/token.txt | docker login https://docker.pkg.github.com -u nvasilakis --password-stdin +fi + +docker push docker.pkg.github.com/andromeda/pash/play:latest \ No newline at end of file diff --git a/test/pash_tests/var_assgn.sh b/test/pash_tests/var_assgn.sh new file mode 100644 index 0000000..0938da7 --- /dev/null +++ b/test/pash_tests/var_assgn.sh @@ -0,0 +1,3 @@ +echo "foobar: ${FOOBAR}" +echo "foobar: ${FOOBAR:=baz}" +echo "foobar: ${FOOBAR}" diff --git a/test/pash_tests/wait_for_output_and_sigpipe_rest.sh b/test/pash_tests/wait_for_output_and_sigpipe_rest.sh new file mode 100755 index 0000000..a56bfb5 --- /dev/null +++ b/test/pash_tests/wait_for_output_and_sigpipe_rest.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +## TODO: Give it the output pid as an argument +wait "$@" + +## TODO: This only works if there is only a single output node +## (and a single node given as an argument to `wait`). +export internal_exec_status=$? + +# It is assumed that $distro is set when this is called. + +# Note: We need the || true after the grep so that it doesn't exit with error if it finds nothing. + + +# This value may contains multiple pids as a whitespace-separated string, and +# we must split it as multiple pids by shell's field splitting. +# shellcheck disable=SC2086 +(> /dev/null 2>&1 kill -SIGPIPE $pids_to_kill || true) + +## +## Old way of waiting, very inefficient. +## + +# now do different things depending on distro + +## TODO: Delete this since it is very costly +# case "$distro" in +# freebsd*) +# # not sure at all about this one +# pids_to_kill="$(ps -efl $BASHPID |awk '{print $1}' | { grep -E '[0-9]' || true; } )" +# ;; +# *) +# pids_to_kill="$(ps --ppid $BASHPID |awk '{print $1}' | { grep -E '[0-9]' || true; } )" +# ;; +# esac +# pids_to_kill="" + +## TODO: Maybe send a signal to all pids at once +# for pid in $pids_to_kill +# do +# # wait $pid +# (> /dev/null 2>&1 kill -SIGPIPE $pid || true) +# done diff --git a/test/pash_tests/wc.sh b/test/pash_tests/wc.sh new file mode 100755 index 0000000..1020c17 --- /dev/null +++ b/test/pash_tests/wc.sh @@ -0,0 +1,3 @@ +# IN=/home/ubuntu/pash/evaluation/scripts/input/10M.txt + +cat $IN | wc \ No newline at end of file diff --git a/test/pash_tests/web-index-aux.sh b/test/pash_tests/web-index-aux.sh new file mode 100644 index 0000000..cb6fd40 --- /dev/null +++ b/test/pash_tests/web-index-aux.sh @@ -0,0 +1,141 @@ +mkfifo {1,2,3}grams + +bigrams_aux() +{ + ( mkfifo s2 > /dev/null ) ; + ( mkfifo s3 > /dev/null ) ; + + sed '$d' s2 > s3 & + tee s2 | + tail +2 | + paste s3 - + rm s2 + rm s3 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + ## New way of doing it using an intermediate file. This is slow + ## but doesn't deadlock + cat $IN > $temp + + sed '$d' $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail +2 | paste $aux3 - > $OUT & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + + +trigrams_aux() +{ + s2=$(mktemp -u) + s3=$(mktemp -u) + + mkfifo $s2 $s3 + + tee $s2 | + tail +2 | + paste $s2 - | + tee $s3 | + cut -f 1 | + tail +3 | + paste $s3 - | + sed "\$d" | + sed "\$d" + + rm $s2 $s3 +} + + +extract_text() +{ + while read -r line + do + cat $line | + iconv -c -t ascii//TRANSLIT | + pandoc +RTS -K64m -RTS --from html --to plain --quiet + done +} + + +cat $IN | + sed "s#^#$WIKI#" | + extract_text | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + grep -vwFf $WEB_INDEX_DIR/stopwords.txt | + $WEB_INDEX_DIR/stem-words.js | + tee 3grams 2grams 1grams > /dev/null & + +cat 1grams | + sort | + uniq -c | + sort -rn > 1-grams.txt & + +cat 2grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq -c | + sort -rn > 2-grams.txt & + +cat 3grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + trigrams_aux | + sort | + uniq -c | + sort -rn # > 3-grams.txt + +rm {1,2,3}grams diff --git a/test/pash_tests/web-index.sh b/test/pash_tests/web-index.sh new file mode 100755 index 0000000..bca753b --- /dev/null +++ b/test/pash_tests/web-index.sh @@ -0,0 +1,148 @@ +#!/bin/bash + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/web-index/input/5.txt} +WEB_INDEX_DIR=${WEB_INDEX_DIR:-$PASH_TOP/evaluation/benchmarks/web-index/input} +WIKI=${WIKI:-$PASH_TOP/evaluation/benchmarks/web-index/input/} + +mkfifo {1,2,3}grams + +bigrams_aux() +{ + ( mkfifo s2 > /dev/null ) ; + ( mkfifo s3 > /dev/null ) ; + + sed '$d' s2 > s3 & + tee s2 | + tail +2 | + paste s3 - + rm s2 + rm s3 +} + +bigram_aux_map() +{ + IN=$1 + OUT=$2 + AUX_HEAD=$3 + AUX_TAIL=$4 + + s2=$(mktemp -u) + aux1=$(mktemp -u) + aux2=$(mktemp -u) + aux3=$(mktemp -u) + temp=$(mktemp -u) + + mkfifo $s2 + mkfifo $aux1 + mkfifo $aux2 + mkfifo $aux3 + + ## New way of doing it using an intermediate file. This is slow + ## but doesn't deadlock + cat $IN > $temp + + sed '$d' $temp > $aux3 & + cat $temp | head -n 1 > $AUX_HEAD & + cat $temp | tail -n 1 > $AUX_TAIL & + cat $temp | tail +2 | paste $aux3 - > $OUT & + + wait + + rm $temp + rm $s2 + rm $aux1 + rm $aux2 + rm $aux3 +} + +bigram_aux_reduce() +{ + IN1=$1 + AUX_HEAD1=$2 + AUX_TAIL1=$3 + IN2=$4 + AUX_HEAD2=$5 + AUX_TAIL2=$6 + OUT=$7 + AUX_HEAD_OUT=$8 + AUX_TAIL_OUT=$9 + + temp=$(mktemp -u) + + mkfifo $temp + + cat $AUX_HEAD1 > $AUX_HEAD_OUT & + cat $AUX_TAIL2 > $AUX_TAIL_OUT & + paste $AUX_TAIL1 $AUX_HEAD2 > $temp & + cat $IN1 $temp $IN2 > $OUT & + + wait + + rm $temp +} + + +trigrams_aux() +{ + s2=$(mktemp -u) + s3=$(mktemp -u) + + mkfifo $s2 $s3 + + tee $s2 | + tail +2 | + paste $s2 - | + tee $s3 | + cut -f 1 | + tail +3 | + paste $s3 - | + sed "\$d" | + sed "\$d" + + rm $s2 $s3 +} + + +extract_text() +{ + while read -r line + do + cat $line | + iconv -c -t ascii//TRANSLIT | + pandoc +RTS -K64m -RTS --from html --to plain --quiet + done +} + +export -f extract_text + +cat $IN | + sed "s#^#$WIKI#" | + extract_text | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + grep -vwFf $WEB_INDEX_DIR/stopwords.txt | + $WEB_INDEX_DIR/stem-words.js | + tee 3grams 2grams 1grams > /dev/null & + +cat 1grams | + sort | + uniq -c | + sort -rn > 1-grams.txt & + +cat 2grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + bigrams_aux | + sort | + uniq -c | + sort -rn > 2-grams.txt & + +cat 3grams | + tr -cs A-Za-z '\n' | + tr A-Z a-z | + trigrams_aux | + sort | + uniq -c | + sort -rn # > 3-grams.txt + +rm {1,2,3}grams diff --git a/test/pash_tests/wf.sh b/test/pash_tests/wf.sh new file mode 100755 index 0000000..262e7b7 --- /dev/null +++ b/test/pash_tests/wf.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Calculate the frequency of each word in the document, and sort by frequency + +IN=${IN:-$PASH_TOP/evaluation/benchmarks/oneliners/input/1G.txt} + +cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn diff --git a/test/pash_tests/wf_env_test.sh b/test/pash_tests/wf_env_test.sh new file mode 100644 index 0000000..805e069 --- /dev/null +++ b/test/pash_tests/wf_env_test.sh @@ -0,0 +1 @@ +IN=$PASH_TOP/evaluation/tests/input/10M.txt diff --git a/test/pash_tests/worker.sh b/test/pash_tests/worker.sh new file mode 100644 index 0000000..a94285a --- /dev/null +++ b/test/pash_tests/worker.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# trap ctrl-c and call ctrl_c() +trap cleanup INT + +export PASH_TOP=${PASH_TOP:-${BASH_SOURCE%/*}} +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" +# point to the local downloaded folders +export PYTHONPATH=${PASH_TOP}/python_pkgs/ +export PASH_TIMESTAMP="$(date +"%y-%m-%d-%T")" + +# add hdfs directory if hdfs command exist +if command -v "hdfs" &> /dev/null +then + datanode_dir=$(hdfs getconf -confKey dfs.datanode.data.dir) + export HDFS_DATANODE_DIR=${datanode_dir#"file://"} # removes file:// prefix +fi + +source "$PASH_TOP/compiler/pash_init_setup.sh" "$@" --distributed_exec + +export PASH_TMP_PREFIX="$(mktemp -d /tmp/pash_XXXXXXX)/" + +cleanup() { + kill "$FILEREADER_PID" "$DISCOVERY_PID" + wait "$FILEREADER_PID" "$DISCOVERY_PID" 2>/dev/null + rm -rf "$PASH_TMP_PREFIX" +} + +"$PASH_TOP/runtime/dspash/file_reader/filereader_server" & +FILEREADER_PID=$! +"$PASH_TOP/runtime/dspash/file_reader/discovery_server" & +DISCOVERY_PID=$! +python3 "$PASH_TOP/compiler/dspash/worker.py" "$@" diff --git a/test/pash_tests/wrap_cat.sh b/test/pash_tests/wrap_cat.sh new file mode 100755 index 0000000..f38b981 --- /dev/null +++ b/test/pash_tests/wrap_cat.sh @@ -0,0 +1,27 @@ +file1=1.out +file2=2.out +file3=3.out +file4=4.out +testFile=../../evaluation/scripts/input/10M.txt +batchSize=70000 + +mkfifo $file1 +mkfifo $file3 + +## 1. TODO: Deadlocks on merge + split (true) +## 2. Increasing batchsize deadlock +## 3. Improving wrap performance + +../r_split $testFile $batchSize $file1 $file3 & +# ../r_wrap cat < $file1 > $file3 & +../r_merge $file1 $file3 > $file4 + +# cat $testFile > $file4 + +# if cmp -s "$testFile" "$file4"; then +# printf 'The file "%s" is the same as "%s"\n' "$file1" "$file3" +# else +# printf 'The file "%s" is different from "%s"\n' "$file1" "$file3" +# fi + +rm -rf *.out diff --git a/test/round_trip.sh b/test/round_trip.sh index 1aa1648..1fbc86b 100755 --- a/test/round_trip.sh +++ b/test/round_trip.sh @@ -8,22 +8,48 @@ fi p=$1 tgt=$2 -orig=$(${p} ${tgt} 2>&1) -if [ "$?" -ne 0 ]; -then echo "${tgt} FAILED, couldn't run (output: ${orig})"; exit 2 +orig=$(mktemp) + +"$p" "$tgt" >"$orig" +if [ "$?" -ne 0 ] +then + echo "RT_ABORT_1: '$tgt' -> '$orig'" + exit 3 fi -rt=$(${p} ${tgt} | ${p} 2>&1) -if [ "$?" -ne 0 ]; -then echo "${tgt} FAILED round trip, couldn't run (output: $rt)"; exit 3 +rt=$(mktemp) + +"$p" "$orig" >"$rt" +if [ "$?" -ne 0 ] +then + echo "RT_ABORT_2: '$tgt' -> '$orig' -> '$rt'" + exit 4 fi -if [ "${orig}" = "${rt}" ]; -then echo ${tgt} OK; exit 0 +if diff -b "$orig" "$rt" >/dev/null +then + echo "PASS '$tgt'" + exit 0 else - echo ${tgt} FAILED - echo ${orig} - echo ========== - echo ${rt} + # try one more time around the loop + rtrt=$(mktemp) + + "$p" "$rt" >"$rtrt" + if [ "$?" -ne 0 ] + then + echo "RT_ABORT_3: '$tgt' -> '$orig' -> '$rt' -> '$rtrt'" + exit 5 + fi + + if diff -b "$rt" "$rtrt" >/dev/null + then + echo "PASS '$tgt' (two runs to fixpoint)" + exit 0 + fi + + echo "FAIL: '$tgt' first time" + diff -ub "$orig" "$rt" + echo ">>> '$tgt' second time" + diff -ub "$rt" "$rtrt" exit 1 fi diff --git a/test/test_ocaml_python.sh b/test/test_ocaml_python.sh new file mode 100755 index 0000000..48e2c68 --- /dev/null +++ b/test/test_ocaml_python.sh @@ -0,0 +1,63 @@ +#!/bin/sh + +: ${RT_OCAML=../ocaml/rt.sh} +: ${RT_PYTHON=../python/rt.py} + +if [ $# -ne 1 ] +then + echo "Usage: $0 testFile" + echo + exit 1 +fi + +testFile="$1" + +if [ ! -f "$testFile" ] +then + echo "Error: cannot read '$testFile'!" + echo + exit 1 +fi + +ocaml_rt=$(mktemp) +ocaml_err=$(mktemp) +python_rt=$(mktemp) +python_err=$(mktemp) + +"$RT_OCAML" "$testFile" >"$ocaml_rt" 2>"$ocaml_err" +ocaml_ec=$? +"$RT_PYTHON" < "$testFile" >"$python_rt" 2>"$python_err" +python_ec=$? + +if [ "$ocaml_ec" -ne 0 ] && [ "$python_ec" -ne 0 ] +then + echo "PASS '$testFile' | both abort" + exit 0 +elif [ "$ocaml_ec" -ne 0 ] +then + echo "OCAML_ABORT: '$testFile'" + cat "$ocaml_err" >&2 + exit 1 +elif [ "$python_ec" -ne 0 ] +then + echo "PYTHON_ABORT: '$testFile'" + cat "$python_err" >&2 + exit 1 +fi + +diff "$ocaml_rt" "$python_rt" >/dev/null +if [ $? -ne 0 ] +then + diff -w "$ocaml_rt" "$python_rt" >/dev/null + if [ $? -ne 0 ] + then + diff -w "$ocaml_rt" "$python_rt" >/dev/null + echo "FAIL: '$testFile' | $ocaml_rt $python_rt" + else + diff "$ocaml_rt" "$python_rt" >/dev/null + echo "FAIL_WHITESPACE: '$testFile' | $ocaml_rt $python_rt" + fi + exit 1 +fi + +echo "PASS '$testFile'" diff --git a/test/failing/aaaa_single b/test/tests/aaaa_single similarity index 100% rename from test/failing/aaaa_single rename to test/tests/aaaa_single diff --git a/test/failing/backslash b/test/tests/backslash similarity index 100% rename from test/failing/backslash rename to test/tests/backslash diff --git a/test/tests/weird_tilde.sh b/test/tests/weird_tilde.sh new file mode 100644 index 0000000..82949bc --- /dev/null +++ b/test/tests/weird_tilde.sh @@ -0,0 +1,11 @@ +case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac From 787ba847b4ce901c632e9b9f9f24faef95bf96ac Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 17:10:46 -0700 Subject: [PATCH 342/401] Add Python package support (#18) Python packaging with a nicer new interface. Major refactor of organization. Automatic builds of a variety of wheels, with automatic uploading to PyPi. Signed-off-by: Michael Greenberg --- .github/workflows/build.yml | 167 +++++++++++++++----- .gitignore | 3 + Dockerfile | 41 ----- MANIFEST.in | 23 +++ README.md | 42 +++-- TODO.md | 7 +- libdash.opam | 4 +- libdash/.gitignore | 2 + libdash/__init__.py | 2 + python/dash.py => libdash/_dash.py | 0 {python => libdash}/ast.py | 3 +- python/parse_to_ast.py => libdash/parser.py | 35 ++-- python/ast2shell.py => libdash/printer.py | 8 +- ocaml/LICENSE | 21 +++ pyproject.toml | 24 +++ python/LICENSE | 21 +++ python/Makefile | 2 +- python/rt.py | 9 +- setup.py | 46 ++++++ test/Makefile | 2 +- version.sh | 12 ++ 21 files changed, 347 insertions(+), 127 deletions(-) delete mode 100644 Dockerfile create mode 100644 MANIFEST.in create mode 100644 libdash/.gitignore create mode 100644 libdash/__init__.py rename python/dash.py => libdash/_dash.py (100%) rename {python => libdash}/ast.py (99%) rename python/parse_to_ast.py => libdash/parser.py (71%) rename python/ast2shell.py => libdash/printer.py (99%) create mode 100644 ocaml/LICENSE create mode 100644 pyproject.toml create mode 100644 python/LICENSE create mode 100644 setup.py create mode 100755 version.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 34f46d2..2bbc864 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,58 +1,143 @@ name: Main workflow on: - - pull_request - - push + pull_request: + push: + schedule: + - cron: '5 14 * * *' -jobs: - build-and-test: +jobs: + check-version-numbers: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Check version numbers + run: ./version.sh + + package-python: strategy: fail-fast: false matrix: os: - - macos-latest + - macos-12 + - macos-11 + - macos-10.15 + - ubuntu-latest + + runs-on: ${{ matrix.os }} + + steps: + - name: Install dependencies (libtool, aclocal, autoconf) + run: | + if [ "$RUNNER_OS" = "Linux" ]; then + sudo apt-get install libtool automake + elif [ "$RUNNER_OS" = "macOS" ]; then + brew install libtool autoconf automake + else + echo Unsupported RUNNER_OS=$RUNNER_OS + exit 1 + fi + + - name: Checkout code + uses: actions/checkout@v2 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.8.1 + + - name: Upload binary wheel + uses: actions/upload-artifact@v2 + with: + name: ${{ format('bdist.{0}', matrix.os) }} + path: wheelhouse/libdash-*.whl + + - name: Build source distribution (Linux only) + if: contains(matrix.os, 'ubuntu') + run: python setup.py sdist + + - name: Upload source distribution (from Linux) + uses: actions/upload-artifact@v2 + if: contains(matrix.os, 'ubuntu') + with: + name: sdist + path: dist/libdash-*.tar.gz + + build-both-and-compare: + strategy: + fail-fast: true + matrix: + os: + - macos-12 + - macos-11 + - macos-10.15 - ubuntu-latest ocaml-compiler: - - 4.12.x - - 4.13.x - 4.14.x runs-on: ${{ matrix.os }} steps: - - name: Checkout code - uses: actions/checkout@v2 + - name: Checkout code + uses: actions/checkout@v2 - - name: Use OCaml ${{ matrix.ocaml-compiler }} - uses: avsm/setup-ocaml@v2 - with: - ocaml-compiler: ${{ matrix.ocaml-compiler }} + - name: Use OCaml ${{ matrix.ocaml-compiler }} + uses: avsm/setup-ocaml@v2 + with: + ocaml-compiler: ${{ matrix.ocaml-compiler }} + + - name: Install OCaml bindings + run: opam install . - - name: Install system dependencies (via OPAM) - run: | - opam pin add libdash . --no-action - opam depext libdash --yes --with-test - - - name: Build dash - run: | - if [ $(uname) = "Darwin" ]; then glibtoolize; else libtoolize; fi - aclocal - autoheader - automake --add-missing - autoconf - ./configure - make - cp src/.libs/dlldash.so python/libdash.so - if [ $(uname) = "Darwin" ]; then cp src/.libs/libdash.dylib python/; fi - - - name: Test Python bindings - run: make -C python test - - - name: Install OCaml bindings - run: opam install . - - - name: Test OCaml bindings - run: opam exec -- make -C ocaml test - - - name: Compare OCaml and Python bindings - run: opam exec -- make -C test test + - name: Test OCaml bindings + run: opam exec -- make -C ocaml test + + # we don't reuse the wheels so that all of the CI runs can happen concurrently + - name: Install Python directly + run: sudo python3 setup.py install + + - name: Test Python bindings + run: make -C python test + + - name: Compare OCaml and Python bindings + run: opam exec -- make -C test test + + deploy: + needs: + - check-version-numbers + - package-python + - build-both-and-compare + runs-on: ubuntu-latest + if: ${{ github.ref == 'refs/heads/main' }} + + steps: + - name: Download distributions + uses: actions/download-artifact@v2 + + - name: Rename distributions + run: | + mkdir dist + VERSION=$(./version.sh) + ls bdist.*/ + mv bdist.*/libdash-*.whl dist/ + mv sdist/libdash-*.tar.gz dist/ + echo Look on my Works, ye Mighty, and despair! + ls dist + + - name: Deploy 'latest' release on GH + uses: marvinpinto/action-automatic-releases@latest + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + automatic_release_tag: "latest" + prerelease: true + title: "Python source and binary distributions" + files: | + dist/* + + - name: Deploy tagged release on PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@master + with: + password: ${{ secrets.PYPI_API_TOKEN }} + verbose: true diff --git a/.gitignore b/.gitignore index 4376351..4ee132e 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,6 @@ libtool ltmain.sh ocamlprof.dump __pycache__ +libdash.egg-info +dist +build diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index eccbb0b..0000000 --- a/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# start with a reasonable image. Debian 9 stretch is what's on the POSIX testing VM -FROM ocaml/opam2:debian-stable - -# silence apt -# TODO this still isn't silencing it :( -ENV DEBIAN_FRONTEND=noninteractive - -# system support for libdash; libgmp for zarith for lem -RUN sudo apt-get install -y autoconf autotools-dev libtool pkg-config libffi-dev - -RUN opam update - -RUN opam switch 4.07 - -# make sure we have ocamlfind and ocamlbuild -RUN opam install ocamlfind ocamlbuild - -# set up FFI for libdash; num library for lem; extunix for shell syscalls -RUN opam pin add -n ctypes 0.11.5 -RUN opam install ctypes-foreign ctypes - -WORKDIR /home/opam - -# copy in repo files for libdash to the WORKDIR (should be /home/opam) -# we do this as late as possible so we don't have to redo the slow stuff above -ADD --chown=opam:opam . libdash - -# build libdash, expose shared object -#RUN cd libdash; ./autogen.sh && ./configure --prefix=/usr --libdir=/usr/lib/x86_64-linux-gnu -#RUN cd libdash; make -#RUN cd libdash; sudo make install -# build ocaml bindings -#RUN cd libdash/ocaml; opam config exec -- make && opam config exec -- make install - -RUN (cd libdash; eval $(opam env); opam install -v -t .) - -# system test -RUN cd libdash/test; opam config exec -- make && opam config exec make test - -ENTRYPOINT [ "opam", "exec", "--" ] -CMD [ "bash" ] diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..04c5720 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,23 @@ +include COPYING Makefile.am autogen.sh configure.ac +graft src +exclude src/builtins.c src/builtins.h src/builtins.def +exclude src/dash +exclude src/init.c src/mkinit +exclude src/mknodes src/mksignames src/mksyntax +exclude src/nodes.c src/nodes.h +exclude src/signames.c +exclude src/syntax.c src/syntax.h +exclude src/token.h +prune src/.deps +prune src/bltin/.deps +prune src/mkinit.dSYM +prune src/mknodes.dSYM +prune src/mksignames.dSYM +prune src/mksyntax.dSYM +global-exclude *.dSYM *.o *.lo *.la *.py[cod] __pycache__ Makefile *.log .gitignore +prune src/.libs +prune ocaml/ +prune test/ +prune build/ +exclude python/rt.py +include libdash/libdash.so libdash/libdash.dylib \ No newline at end of file diff --git a/README.md b/README.md index 93a9eac..89edf6e 100644 --- a/README.md +++ b/README.md @@ -2,37 +2,45 @@ *libdash* is a fork of the Linux Kernel's `dash` shell that builds a linkable library with extra exposed interfaces. The primary use of libdash is to parse shell scripts, but it could be used for more. -The OCaml bindings---packaged as the [`libdash` OPAM package](https://opam.ocaml.org/packages/libdash/)---include two executables, `shell_to_json` and `json_to_shell`. +The Python bindings are packaged as the [`libdash` PyPi package](https://pypi.org/project/libdash/). + +The OCaml bindings---packaged as the [`libdash` OPAM package](https://opam.ocaml.org/packages/libdash/)---include two executables, `shell_to_json` and `json_to_shell`, which let you conveniently parse POSIX shell scripts into a JSON AST. # What are the dependencies? -The C code for dash should build on a wide variety of systems. The library may not build on platforms with esoteric linkers; it's been tested on OS X. +The C code for dash should build on a wide variety of systems; it requires `libtool` and `autotools` (`aclocal`, `autoheader`, `automake`, `autoconf`). The library may not build on platforms with esoteric linkers; it's been tested on macOS and Linux. -The OCaml code relies on `ctypes-0.11.5` and `ctypes-foreign`; everything else should be in `base`. +The Python and OCaml bindings depend on being able to build the C code. See `libdash.opam` for details on the OCaml code's dependencies, which includes the build-time external dependencies. Python wheels have no need for these build-time dependencies, but building from a Python source distribution will only succeed when `libtool` and `autotools` are present. -# How to build and test it +The CI scripts (in `.github/workflows/build.yml`) give build details. -You should be able to simply run `docker build -t libdash .` to get a runnable environment. Everything will be in `/home/opam/libdash`. +## How to build it -## How to build it locally +### Python -Install the OPAM file: `opam pin add .` or `opam install .`. This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. +Run `python3 setup.py install`. -You can test the OCaml bindings by running: +You can test the Python bindings by running: ``` -cd ocaml; make test +cd python; make test ``` -You can test the Python bindings by running: +### OCaml + +Install the OPAM file: `opam pin add .` or `opam install .`. This will build the OCaml library and install it in your OPAM repository. There are tests in another directory; they will only build when libdash is actually installed. + +You can test the OCaml bindings by running: ``` -cd python; make test +cd ocaml; make test ``` -The tests use `test/round_trip.sh` to ensure that every tester file in `test/tests` round-trips correctly through parsing and pretty printing. The OPAM package can be installed with the `-t` flag to run the tests internally; see `ocaml/Makefile`'s testing targets. +### Testing -Additionally, you can run tests that compare the OCaml and Python implementations: +The tests use `test/round_trip.sh` to ensure that every tester file in `test/tests` round-trips correctly through parsing and pretty printing. + +Additionally, you can run tests that compare the OCaml and Python implementations (after you've installed them both): ``` cd test; make @@ -40,8 +48,14 @@ cd test; make # How to use the parser -The ideal interface to use is `parsecmd_safe` in `parser.c`. Parsing the POSIX shell is a complicated affair: beyond the usual locale issues, aliases affect the lexer, so one must use `setalias` and `unalias` to manage any aliases that ought to exist. +For Python, see [`python/rt.py`](https://github.com/mgree/libdash/blob/master/python/rt.py), an example tool that does a round-trip: shell syntax to AST back to shell syntax. + +For OCaml, see [`ocaml/shell_to_json.ml`](https://github.com/mgree/libdash/blob/master/ocaml/shell_to_json.ml), a tool that parses shell syntax and produces JSON (using the [atdgen](https://opam.ocaml.org/packages/atdgen/) bindings). + +The ideal low-level interface to use is `parsecmd_safe` in `parser.c`; you'll need to ensure that dash's initialization routines have been called and that the stack marks are managed correctly. Parsing the POSIX shell is a complicated affair: beyond the usual locale issues, aliases affect the lexer, so one must use `setalias` and `unalias` to manage any aliases that ought to exist. # How work with the parsed nodes The general AST is described in `nodes.h`. There are some tricky invariants around the precise formatting of control codes; the OCaml code shows some examples of working with the `args` fields in `ocaml/ast.ml`, which converts the C AST to an OCaml AST. + +The OCaml tools `shell_to_json` and `json_to_shell` will produce JSON ASTs, allowing you to work with these ASTs in any language. diff --git a/TODO.md b/TODO.md index 03e504b..96832bd 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,8 @@ - [x] clear out old C stuff -- [ ] get roundtrips correct (fix OCaml pretty printing) -- [ ] correct libdash.so installation (locally) +- [x] get roundtrips correct (fix OCaml pretty printing) +- [x] correct libdash.so installation (locally) - [ ] pip setup - [ ] pash pull request +- [ ] fixup docker +- [ ] smoosh pull request + diff --git a/libdash.opam b/libdash.opam index 88c0759..165dfd5 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.1.1" +version: "0.2" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -40,6 +40,6 @@ install: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/v0.1.1.tar.gz" + src: "https://github.com/mgree/libdash/archive/v0.1.2.tar.gz" } diff --git a/libdash/.gitignore b/libdash/.gitignore new file mode 100644 index 0000000..8749261 --- /dev/null +++ b/libdash/.gitignore @@ -0,0 +1,2 @@ +libdash.dylib +libdash.so diff --git a/libdash/__init__.py b/libdash/__init__.py new file mode 100644 index 0000000..19850a4 --- /dev/null +++ b/libdash/__init__.py @@ -0,0 +1,2 @@ +from .parser import parse +from .printer import to_string diff --git a/python/dash.py b/libdash/_dash.py similarity index 100% rename from python/dash.py rename to libdash/_dash.py diff --git a/python/ast.py b/libdash/ast.py similarity index 99% rename from python/ast.py rename to libdash/ast.py index 8b517a7..7717624 100644 --- a/python/ast.py +++ b/libdash/ast.py @@ -1,6 +1,7 @@ import os import sys -from dash import * + +from ._dash import * # parser.h CTLESC = 129 diff --git a/python/parse_to_ast.py b/libdash/parser.py similarity index 71% rename from python/parse_to_ast.py rename to libdash/parser.py index 0604673..4c0b724 100644 --- a/python/parse_to_ast.py +++ b/libdash/parser.py @@ -1,8 +1,8 @@ import os import subprocess from ctypes import * -from ast import of_node -from dash import * +from .ast import of_node +from ._dash import * FILE_PATH = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) LIBDASH_LIBRARY_PATH = os.path.join(FILE_PATH, "libdash.so") @@ -15,18 +15,23 @@ def __init__(self, message='ParseError'): super(ParsingException, self).__init__(message) # This is a mix of dash.ml:parse_next and parse_to_json.ml. -def parse_to_ast (inputPath, init=True): +def parse(inputPath, init=True): + """ + Parses the file at `inputPath` to an AST. + + `init` determines whether libdash should be initialized; set it to `False` after the first call. + """ lines = [] - libdash = CDLL (LIBDASH_LIBRARY_PATH) + libdash = CDLL(LIBDASH_LIBRARY_PATH) if (init): - initialize (libdash) + initialize(libdash) if (inputPath == "-"): - setinputtostdin (libdash) + setinputtostdin(libdash) else: - setinputfile (libdash, inputPath) + setinputfile(libdash, inputPath) fp = open (inputPath, 'r') for line in fp: @@ -35,14 +40,14 @@ def parse_to_ast (inputPath, init=True): # struct parsefile *parsefile = &basepf; /* current input file */ # Get the value of parsefile (not &parsefile)! - parsefile_ptr_ptr = addressof (parsefile.in_dll (libdash, "parsefile")) - parsefile_ptr = cast (parsefile_ptr_ptr, POINTER (POINTER (parsefile))) + parsefile_ptr_ptr = addressof(parsefile.in_dll (libdash, "parsefile")) + parsefile_ptr = cast(parsefile_ptr_ptr, POINTER (POINTER (parsefile))) parsefile_var = parsefile_ptr.contents - smark = init_stack (libdash) + smark = init_stack(libdash) - NEOF = addressof (c_int.in_dll (libdash, "tokpushback")) - NERR = addressof (c_int.in_dll (libdash, "lasttoken")) + NEOF = addressof(c_int.in_dll(libdash, "tokpushback")) + NERR = addressof(c_int.in_dll(libdash, "lasttoken")) while (True): linno_before = parsefile_var.contents.linno - 1; # libdash is 1-indexed @@ -65,10 +70,10 @@ def parse_to_ast (inputPath, init=True): if (inputPath != "-"): ## Both of these assertions check "our" assumption with respect to the final parser state ## and are therefore not necessary if they become an issue. - assert((linno_after == len (lines)) or (linno_after == len (lines) + 1)) + assert ((linno_after == len (lines)) or (linno_after == len (lines) + 1)) # Last line did not have a newline - assert(len (lines [-1]) > 0 and (lines [-1][-1] != '\n')) + assert (len (lines [-1]) > 0 and (lines [-1][-1] != '\n')) else: assert (nleft_after == 0); # Read whole lines @@ -83,4 +88,4 @@ def parse_to_ast (inputPath, init=True): yield (new_ast, parsedLines, linno_before, linno_after) - pop_stack (libdash, smark) + pop_stack(libdash, smark) diff --git a/python/ast2shell.py b/libdash/printer.py similarity index 99% rename from python/ast2shell.py rename to libdash/printer.py index 998d978..9c98f42 100644 --- a/python/ast2shell.py +++ b/libdash/printer.py @@ -1,9 +1,6 @@ #!/usr/bin/python3 - import os; -# from os import abort; - STRING_OF_VAR_TYPE_DICT = { "Normal" : "", @@ -138,7 +135,10 @@ def background (s): # "case " ^ string_of_arg a ^ " in " ^ # separated string_of_case cs ^ " esac" # | Defun (_,name,body) -> name ^ "() {\n" ^ to_string body ^ "\n}" -def to_string (ast): +def to_string(ast): + """ + Renders an AST back in shell syntax. + """ # print (ast); if (len (ast) == 0): diff --git a/ocaml/LICENSE b/ocaml/LICENSE new file mode 100644 index 0000000..e598e3c --- /dev/null +++ b/ocaml/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Michael Greenberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a05e0b3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[project] +name = "libdash" +version = "0.2" +authors = [ + { name="Michael Greenberg", email="michael@greenberg.science" }, +] +description = "Bindings for the dash shell as a library" +readme = "README.md" +license = { file="COPYING" } +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "Topic :: System :: System Shells", + "License :: OSI Approved :: MIT License", + "Operating System :: POSIX", +] + +[project.urls] +"Homepage" = "https://github.com/mgree/libdash" +"Bug Tracker" = "https://github.com/mgree/libdash/issues" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" diff --git a/python/LICENSE b/python/LICENSE new file mode 100644 index 0000000..a78c7de --- /dev/null +++ b/python/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Michael Greenberg, Konstantinos Kallas, and Thurston Dang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/Makefile b/python/Makefile index 3ecb9f3..b446cce 100644 --- a/python/Makefile +++ b/python/Makefile @@ -1,6 +1,6 @@ .PHONY: test clean -test: rt.py ast.py ast2shell.py dash.py parse_to_ast.py +test: rt.py ../libdash/*.py @find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.py "$$f"; done | tee python.log @cat python.log | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c @grep ':' python.log && echo "FAILED" && exit 1 || exit 0 diff --git a/python/rt.py b/python/rt.py index 61b235b..4706df8 100755 --- a/python/rt.py +++ b/python/rt.py @@ -2,18 +2,17 @@ import sys -from parse_to_ast import parse_to_ast -from ast2shell import to_string +import libdash sys.setrecursionlimit (9001) def print_asts(new_asts): for (ast, lines, linno_before, linno_after) in new_asts: - print(to_string(ast)) + print(libdash.to_string(ast)) if (len(sys.argv) == 1): - new_asts = parse_to_ast("-", True) + new_asts = libdash.parse("-", True) else: - new_asts = parse_to_ast(sys.argv[1], True) + new_asts = libdash.parse(sys.argv[1], True) print_asts(new_asts) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c177ed2 --- /dev/null +++ b/setup.py @@ -0,0 +1,46 @@ +from setuptools import setup +from setuptools.command.build_py import build_py + +import os +import shutil +import subprocess +import sys + +from pathlib import Path +long_description = (Path(__file__).parent / "README.md").read_text() + +def try_exec(*cmds): + proc = subprocess.run(cmds) + + if proc.returncode != 0: + print('`{}` failed'.format(' '.join(cmds)), file=sys.stderr) + proc.check_returncode() + +class libdash_build_py(build_py): + def run(self): + build_py.run(self) + + if sys.platform == 'darwin': + libtoolize = "glibtoolize" + else: + libtoolize = "libtoolize" + + try_exec(libtoolize) + try_exec('aclocal') + try_exec('autoheader') + try_exec('automake', '--add-missing') + try_exec('autoconf') + try_exec('./configure') + try_exec('make') + + shutil.copy2('src/.libs/dlldash.so', os.path.join(self.build_lib, 'libdash/libdash.so')) + if sys.platform == 'darwin': + shutil.copy2('src/.libs/libdash.dylib', os.path.join(self.build_lib, 'libdash/libdash.dylib')) + +setup(name='libdash', + packages=['libdash'], + cmdclass={'build_py': libdash_build_py}, + long_description=long_description, + long_description_content_type='text/markdown', + include_package_data=True, + has_ext_modules=lambda: True) diff --git a/test/Makefile b/test/Makefile index 358e409..2a642e8 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,6 +1,6 @@ SCRIPTS_DIR=$(PASH_TOP) -PYTHON_FILES=../python/rt.py ../python/ast.py ../python/ast2shell.py ../python/dash.py ../python/parse_to_ast.py +PYTHON_FILES=../python/rt.py $(addprefix ../libdash/,__init__.py _dash.py ast.py parser.py printer.py) OCAML_FILES=../ocaml/rt.sh .PHONY : test clean diff --git a/version.sh b/version.sh new file mode 100755 index 0000000..946ea2c --- /dev/null +++ b/version.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +OPAM_VERSION=$(grep -e '^version:' libdash.opam | cut -d':' -f2 | tr -d ' "') + +PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' "') + +[ "$OPAM_VERSION" = "$PYTHON_VERSION" ] && echo "$OPAM_VERSION" && exit 0 + +echo "Version numbers don't match!" +echo " OPAM is '$OPAM_VERSION'" +echo " Python is '$PYTHON_VERSION'" +exit 1 From d294791f6841b0ff5eaa118ce5acc129846b5c94 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 17:13:07 -0700 Subject: [PATCH 343/401] Fix refs in deploy stage Signed-off-by: Michael Greenberg --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2bbc864..a71b3c2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -109,7 +109,7 @@ jobs: - package-python - build-both-and-compare runs-on: ubuntu-latest - if: ${{ github.ref == 'refs/heads/main' }} + if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags') steps: - name: Download distributions From b2b29943d74731aa0f5986ad6cd0ed526ed81d9f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 17:36:15 -0700 Subject: [PATCH 344/401] alas macos-10.15, we hardly knew ye Signed-off-by: Michael Greenberg --- .github/workflows/build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a71b3c2..2ab4a42 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,7 +24,6 @@ jobs: os: - macos-12 - macos-11 - - macos-10.15 - ubuntu-latest runs-on: ${{ matrix.os }} @@ -71,7 +70,6 @@ jobs: os: - macos-12 - macos-11 - - macos-10.15 - ubuntu-latest ocaml-compiler: - 4.14.x From d2941139699bf53321ff3fc5464f51f8a63ac9d5 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 21:01:38 -0400 Subject: [PATCH 345/401] readme touchup, remove unncessary call to version.sh --- .github/workflows/build.yml | 1 - README.md | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2ab4a42..73688a6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -116,7 +116,6 @@ jobs: - name: Rename distributions run: | mkdir dist - VERSION=$(./version.sh) ls bdist.*/ mv bdist.*/libdash-*.whl dist/ mv sdist/libdash-*.tar.gz dist/ diff --git a/README.md b/README.md index 89edf6e..2db376e 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,11 @@ The Python and OCaml bindings depend on being able to build the C code. See `lib The CI scripts (in `.github/workflows/build.yml`) give build details. -## How to build it +## How to build `libdash` from source ### Python -Run `python3 setup.py install`. +Run `python3 setup.py install`. On macOS, you must first install the build dependencies via `brew install libtool autoconf automake`. You can test the Python bindings by running: From 3ceae064b09f17a4aba1491e836aa7c354ddf66a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 21:19:42 -0400 Subject: [PATCH 346/401] no need for macos-12 either, builds fr 10.9 Signed-off-by: Michael Greenberg --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 73688a6..f4dd851 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,7 +22,6 @@ jobs: fail-fast: false matrix: os: - - macos-12 - macos-11 - ubuntu-latest From 77ac090fdac2d17c73c533f233b1100ed19bcfd8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 21:52:52 -0400 Subject: [PATCH 347/401] correct version number in setup.py Signed-off-by: Michael Greenberg --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index c177ed2..ab9dd23 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,13 @@ import sys from pathlib import Path +pyproject = (Path(__file__).parent / "pyproject.toml").read_text() +version_string = re.search('^version = "([^"]+)"$', pyproject) +if version_string is None: + print("Couldn't determine file version from pyproject.toml...", file=sys.stderr) + sys.exit(1) +version = version_string.group(1) + long_description = (Path(__file__).parent / "README.md").read_text() def try_exec(*cmds): @@ -40,6 +47,7 @@ def run(self): setup(name='libdash', packages=['libdash'], cmdclass={'build_py': libdash_build_py}, + version=version, long_description=long_description, long_description_content_type='text/markdown', include_package_data=True, From c9967cf04b1905a18a6b2d7ee63ad7b8d6c406b9 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 21:58:32 -0400 Subject: [PATCH 348/401] include pyproject.toml --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 04c5720..d58caff 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -20,4 +20,4 @@ prune ocaml/ prune test/ prune build/ exclude python/rt.py -include libdash/libdash.so libdash/libdash.dylib \ No newline at end of file +include libdash/libdash.so libdash/libdash.dylib pyproject.toml \ No newline at end of file From a9a09e8aa18e726b10d0ec7f1171953e713754fa Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 22:06:34 -0400 Subject: [PATCH 349/401] Hardcode version and add checking in version.sh Signed-off-by: Michael Greenberg --- MANIFEST.in | 2 +- setup.py | 9 +-------- version.sh | 7 +++++-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index d58caff..04c5720 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -20,4 +20,4 @@ prune ocaml/ prune test/ prune build/ exclude python/rt.py -include libdash/libdash.so libdash/libdash.dylib pyproject.toml \ No newline at end of file +include libdash/libdash.so libdash/libdash.dylib \ No newline at end of file diff --git a/setup.py b/setup.py index ab9dd23..b5f3481 100644 --- a/setup.py +++ b/setup.py @@ -7,13 +7,6 @@ import sys from pathlib import Path -pyproject = (Path(__file__).parent / "pyproject.toml").read_text() -version_string = re.search('^version = "([^"]+)"$', pyproject) -if version_string is None: - print("Couldn't determine file version from pyproject.toml...", file=sys.stderr) - sys.exit(1) -version = version_string.group(1) - long_description = (Path(__file__).parent / "README.md").read_text() def try_exec(*cmds): @@ -47,7 +40,7 @@ def run(self): setup(name='libdash', packages=['libdash'], cmdclass={'build_py': libdash_build_py}, - version=version, + version='0.2', long_description=long_description, long_description_content_type='text/markdown', include_package_data=True, diff --git a/version.sh b/version.sh index 946ea2c..ddf5d11 100755 --- a/version.sh +++ b/version.sh @@ -4,9 +4,12 @@ OPAM_VERSION=$(grep -e '^version:' libdash.opam | cut -d':' -f2 | tr -d ' "') PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' "') -[ "$OPAM_VERSION" = "$PYTHON_VERSION" ] && echo "$OPAM_VERSION" && exit 0 +PYTHON_VERSION2=$(grep -e 'version=' setup.py | cut -d'=' -f2 | tr -d "',") + +[ "$OPAM_VERSION" = "$PYTHON_VERSION" ] && [ "$PYTHON_VERSION" = "$PYTHON_VERSION2" ] && echo "$OPAM_VERSION" && exit 0 echo "Version numbers don't match!" echo " OPAM is '$OPAM_VERSION'" -echo " Python is '$PYTHON_VERSION'" +echo " Python is '$PYTHON_VERSION' in pyproject.toml" +echo " Python is '$PYTHON_VERSION2' in setup.py" exit 1 From 1a46d4ae68038f07f6220570d091e3fb79e5a434 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 26 Jul 2022 22:24:21 -0400 Subject: [PATCH 350/401] fix src url for opam Signed-off-by: Michael Greenberg --- libdash.opam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdash.opam b/libdash.opam index 165dfd5..6765f5e 100644 --- a/libdash.opam +++ b/libdash.opam @@ -40,6 +40,6 @@ install: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/v0.1.2.tar.gz" + src: "https://github.com/mgree/libdash/archive/v0.2.tar.gz" } From 099ae0568f18e260b4c8a8691150f0ef6b32b4e4 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 27 Jul 2022 09:31:08 -0400 Subject: [PATCH 351/401] fix python deployment --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f4dd851..8cc602d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -133,7 +133,8 @@ jobs: - name: Deploy tagged release on PyPI if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@master + uses: pypa/gh-action-pypi-publish@v1 with: password: ${{ secrets.PYPI_API_TOKEN }} verbose: true + skip_existing: true From 993d01c9fd10407875b7ea75f7647e7d2dfc3957 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 27 Jul 2022 10:04:06 -0400 Subject: [PATCH 352/401] correct deploy tag arggggh --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8cc602d..f2b6a39 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -133,7 +133,7 @@ jobs: - name: Deploy tagged release on PyPI if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@v1 + uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} verbose: true From e04868815ce9c116dcf1738759801baac1f0ec7c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 27 Jul 2022 10:43:58 -0400 Subject: [PATCH 353/401] drop skip_existing now that we have the green light --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f2b6a39..9914450 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -137,4 +137,3 @@ jobs: with: password: ${{ secrets.PYPI_API_TOKEN }} verbose: true - skip_existing: true From ea673727343e70b24cd9dfa86478fb750047588f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 27 Jul 2022 11:03:29 -0400 Subject: [PATCH 354/401] force runtime computation of library path Signed-off-by: Michael Greenberg --- libdash/parser.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/libdash/parser.py b/libdash/parser.py index 4c0b724..e3e397f 100644 --- a/libdash/parser.py +++ b/libdash/parser.py @@ -4,9 +4,18 @@ from .ast import of_node from ._dash import * -FILE_PATH = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) -LIBDASH_LIBRARY_PATH = os.path.join(FILE_PATH, "libdash.so") +LIBDASH_LIBRARY_PATH = None +def libdash_library_path(): + global LIBDASH_LIBRARY_PATH + + if LIBDASH_LIBRARY_PATH is not None: + return LIBDASH_LIBRARY_PATH + + FILE_PATH = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) + LIBDASH_LIBRARY_PATH = os.path.join(FILE_PATH, "libdash.so") + return LIBDASH_LIBRARY_PATH + EOF_NLEFT = -99; # libdash/src/input.c class ParsingException(Exception): @@ -23,7 +32,7 @@ def parse(inputPath, init=True): """ lines = [] - libdash = CDLL(LIBDASH_LIBRARY_PATH) + libdash = CDLL(libdash_library_path()) if (init): initialize(libdash) From 83d2d775b6d546897c909554ba7e5a738c27f983 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 27 Jul 2022 19:54:51 -0400 Subject: [PATCH 355/401] force 0.3 release Signed-off-by: Michael Greenberg --- libdash.opam | 4 ++-- pyproject.toml | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libdash.opam b/libdash.opam index 6765f5e..75053df 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,6 +1,6 @@ opam-version: "2.0" name: "libdash" -version: "0.2" +version: "0.3" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " @@ -40,6 +40,6 @@ install: [ ] dev-repo: "git+https:///github.com/mgree/libdash" url { - src: "https://github.com/mgree/libdash/archive/v0.2.tar.gz" + src: "https://github.com/mgree/libdash/archive/v0.3.tar.gz" } diff --git a/pyproject.toml b/pyproject.toml index a05e0b3..069e94c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "libdash" -version = "0.2" +version = "0.3" authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, ] diff --git a/setup.py b/setup.py index b5f3481..78501f1 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def run(self): setup(name='libdash', packages=['libdash'], cmdclass={'build_py': libdash_build_py}, - version='0.2', + version='0.3', long_description=long_description, long_description_content_type='text/markdown', include_package_data=True, From e587a861d39a47aceb4b5cf74d47519053a56eac Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 28 Jul 2022 09:09:49 -0400 Subject: [PATCH 356/401] Add build canary (#19) Load files from actual repos and run basic smoke tests as a distribution canary. The Test PyPi fragment may not work properly with deployment scripts all the time (will it reject our updates?), but it'll certainly work on version bumps. --- .github/workflows/build.yml | 7 ++++ .github/workflows/canary.yml | 67 ++++++++++++++++++++++++++++++++++++ TODO.md | 4 +-- 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/canary.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9914450..d30aff5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -131,6 +131,13 @@ jobs: files: | dist/* + - name: Deploy test distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + verbose: true + repository_url: https://test.pypi.org/legacy/ + - name: Deploy tagged release on PyPI if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml new file mode 100644 index 0000000..4630dc2 --- /dev/null +++ b/.github/workflows/canary.yml @@ -0,0 +1,67 @@ +name: Distribution canary + +on: + push: + schedule: + - cron: '20 13 * * *' + +jobs: + ocaml: + strategy: + fail-fast: true + matrix: + os: + - macos-12 + - macos-11 + - ubuntu-latest + ocaml-compiler: + - 4.14.x + + runs-on: ${{ matrix.os }} + + steps: + - name: Use OCaml ${{ matrix.ocaml-compiler }} + uses: avsm/setup-ocaml@v2 + with: + ocaml-compiler: ${{ matrix.ocaml-compiler }} + + - name: Install OCaml bindings from OPAM + run: | + opam update + opam install libdash + + - name: Test OPAM executables + run: test "$(echo hi | opam exec -- shell_to_json | opam exec json_to_shell)" = "hi" + + python: + strategy: + fail-fast: true + matrix: + os: + - macos-12 + - macos-11 + - ubuntu-latest + python-version: + - '3.7' + - '3.8' + - '3.9' + - '3.10' + repository_url: + - https://pypi.org/simple/ + - https://test.pypi.org/simple/ + + runs-on: ${{ matrix.os }} + + steps: + - name: Use Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python bindings from ${{ matrix.repository_url }} + run: pip install -v -i "${{ matrix.repository_url }}" --extra-index-url https://pypi.org/simple/ libdash + + - name: Test Python library + run: | + RT="$(printf 'import libdash\nasts = libdash.parse("-", True)\nfor (ast, lines, linno_before, linno_after) in asts:\n print(libdash.to_string(ast))\n')" + test "$(echo hi | python -c "$RT")" = "hi" diff --git a/TODO.md b/TODO.md index 96832bd..77dd1dd 100644 --- a/TODO.md +++ b/TODO.md @@ -1,8 +1,8 @@ - [x] clear out old C stuff - [x] get roundtrips correct (fix OCaml pretty printing) - [x] correct libdash.so installation (locally) -- [ ] pip setup +- [x] pip setup +- [x] testpypi setup - [ ] pash pull request -- [ ] fixup docker - [ ] smoosh pull request From 8910361a7a00ab0f09bd4ee03531b974fc73044e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 28 Jul 2022 19:39:27 -0400 Subject: [PATCH 357/401] opam fixup, with thanks to kit-ty-kate --- .github/workflows/build.yml | 1 + libdash.opam | 16 +++++----------- version.sh | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d30aff5..6f6ff6e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -137,6 +137,7 @@ jobs: password: ${{ secrets.TEST_PYPI_API_TOKEN }} verbose: true repository_url: https://test.pypi.org/legacy/ + skip_existing: true - name: Deploy tagged release on PyPI if: startsWith(github.ref, 'refs/tags') diff --git a/libdash.opam b/libdash.opam index 75053df..05c0680 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,19 +1,16 @@ opam-version: "2.0" -name: "libdash" -version: "0.3" synopsis: "Bindings to the dash shell's parser" maintainer: "Michael Greenberg " authors: "Michael Greenberg " -license: "BSD" +license: "BSD-3-Clause" homepage: "https://github.com/mgree/libdash" bug-reports: "https://github.com/mgree/libdash/issues" depends: [ - "ocaml" {>= "4.0.7"} + "ocaml" {>= "4.07"} "ocamlfind" {>= "1.8.0"} - "ctypes" {>= "0.11.5"} - "ctypes-foreign" {>= "0.4.0"} - "atdgen" {>= "2.2.1"} - "opam-installer" {>= "2.0.0"} + "ctypes" {>= "0.18.0"} + "ctypes-foreign" {>= "0.18.0"} + "atdgen" {>= "2.3.2"} "conf-autoconf" {build} "conf-aclocal" {build} "conf-libtool" {build} @@ -35,9 +32,6 @@ build: [ ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it [make "-C" "ocaml" "test"] {with-test} ] -install: [ - ["opam-installer" "--prefix=%{prefix}%" "libdash.install"] -] dev-repo: "git+https:///github.com/mgree/libdash" url { src: "https://github.com/mgree/libdash/archive/v0.3.tar.gz" diff --git a/version.sh b/version.sh index ddf5d11..4506991 100755 --- a/version.sh +++ b/version.sh @@ -1,6 +1,6 @@ #!/bin/sh -OPAM_VERSION=$(grep -e '^version:' libdash.opam | cut -d':' -f2 | tr -d ' "') +OPAM_VERSION=$(grep -e 'https://github.com/mgree/libdash/archive/' libdash.opam | sed -e 's/.*v\([0-9.]*\)\.tar\.gz"/\1/') PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' "') From eca15dbc1066ff9e0d7f10a28e368e31f5662a5b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 29 Jul 2022 09:05:41 -0400 Subject: [PATCH 358/401] run ocaml tests to completion on macos; add missing double dashes --- .github/workflows/canary.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index 4630dc2..997eddb 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -8,7 +8,7 @@ on: jobs: ocaml: strategy: - fail-fast: true + fail-fast: false matrix: os: - macos-12 @@ -31,7 +31,7 @@ jobs: opam install libdash - name: Test OPAM executables - run: test "$(echo hi | opam exec -- shell_to_json | opam exec json_to_shell)" = "hi" + run: test "$(echo hi | opam exec -- shell_to_json | opam exec -- json_to_shell)" = "hi" python: strategy: From 94bd0569ef438011dd1139c1f409186298a04074 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 1 Aug 2022 10:54:05 -0400 Subject: [PATCH 359/401] add conf-automake to be explicit, run opam depext in canary as workaround per https://github.com/ocaml/opam-repository/issues/21921 --- .github/workflows/canary.yml | 1 + libdash.opam | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index 997eddb..da22b23 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -28,6 +28,7 @@ jobs: - name: Install OCaml bindings from OPAM run: | opam update + opam depext opam install libdash - name: Test OPAM executables diff --git a/libdash.opam b/libdash.opam index 05c0680..811fd1b 100644 --- a/libdash.opam +++ b/libdash.opam @@ -13,6 +13,7 @@ depends: [ "atdgen" {>= "2.3.2"} "conf-autoconf" {build} "conf-aclocal" {build} + "conf-automake" {build} "conf-libtool" {build} ] build: [ From 8e35bbac449f884898be867b984e5c3731b9b8a5 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 1 Aug 2022 11:34:24 -0400 Subject: [PATCH 360/401] be explicit about whose depexts --- .github/workflows/canary.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index da22b23..bba1ff9 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -28,7 +28,7 @@ jobs: - name: Install OCaml bindings from OPAM run: | opam update - opam depext + opam depext libdash opam install libdash - name: Test OPAM executables From a7b1ccf9439e1e086043f6ff4eb8801309244e7d Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 1 Aug 2022 19:09:32 -0400 Subject: [PATCH 361/401] run canary on ubuntu 18.04, too --- .github/workflows/canary.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index bba1ff9..75d8d90 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -13,7 +13,8 @@ jobs: os: - macos-12 - macos-11 - - ubuntu-latest + - ubuntu-18.04 + - ubuntu-20.04 ocaml-compiler: - 4.14.x From eb19acb49e1765d56263d12edf1bac213dc219f4 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 1 Aug 2022 19:15:50 -0400 Subject: [PATCH 362/401] specify ubuntu versions for python, too Signed-off-by: Michael Greenberg --- .github/workflows/canary.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index 75d8d90..dd36f16 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -42,7 +42,8 @@ jobs: os: - macos-12 - macos-11 - - ubuntu-latest + - ubuntu-18.04 + - ubuntu-20.04 python-version: - '3.7' - '3.8' From 6494689f27807f5c315b80ee79833b0d03add370 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 09:13:41 -0400 Subject: [PATCH 363/401] python minimum version -> 3.6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 069e94c..9f0b3f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ authors = [ description = "Bindings for the dash shell as a library" readme = "README.md" license = { file="COPYING" } -requires-python = ">=3.7" +requires-python = ">=3.6" classifiers = [ "Programming Language :: Python :: 3", "Topic :: System :: System Shells", From e8952c2e9be4c13966cea85559aed699e7811a9c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 09:15:47 -0400 Subject: [PATCH 364/401] bump version to patch 0.3.1 --- pyproject.toml | 2 +- setup.py | 2 +- version.sh | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9f0b3f5..b81284c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "libdash" -version = "0.3" +version = "0.3.1" authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, ] diff --git a/setup.py b/setup.py index 78501f1..541a899 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def run(self): setup(name='libdash', packages=['libdash'], cmdclass={'build_py': libdash_build_py}, - version='0.3', + version='0.3.1', long_description=long_description, long_description_content_type='text/markdown', include_package_data=True, diff --git a/version.sh b/version.sh index 4506991..42cfbc3 100755 --- a/version.sh +++ b/version.sh @@ -1,7 +1,5 @@ #!/bin/sh -OPAM_VERSION=$(grep -e 'https://github.com/mgree/libdash/archive/' libdash.opam | sed -e 's/.*v\([0-9.]*\)\.tar\.gz"/\1/') - PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' "') PYTHON_VERSION2=$(grep -e 'version=' setup.py | cut -d'=' -f2 | tr -d "',") @@ -9,7 +7,6 @@ PYTHON_VERSION2=$(grep -e 'version=' setup.py | cut -d'=' -f2 | tr -d "',") [ "$OPAM_VERSION" = "$PYTHON_VERSION" ] && [ "$PYTHON_VERSION" = "$PYTHON_VERSION2" ] && echo "$OPAM_VERSION" && exit 0 echo "Version numbers don't match!" -echo " OPAM is '$OPAM_VERSION'" echo " Python is '$PYTHON_VERSION' in pyproject.toml" echo " Python is '$PYTHON_VERSION2' in setup.py" exit 1 From e1cd090e167983a11de7544719cee52fa5d7af2b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 09:18:29 -0400 Subject: [PATCH 365/401] lower setuptools to accommodate 3.6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b81284c..1741b90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,5 +20,5 @@ classifiers = [ "Bug Tracker" = "https://github.com/mgree/libdash/issues" [build-system] -requires = ["setuptools>=61.0"] +requires = ["setuptools>=59.0"] build-backend = "setuptools.build_meta" From 344f27a9fb4d60d4e78cfba8cc3e13b2996ec726 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 09:22:43 -0400 Subject: [PATCH 366/401] don't run canary on push --- .github/workflows/canary.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index dd36f16..d882689 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -1,7 +1,7 @@ name: Distribution canary on: - push: +# push: schedule: - cron: '20 13 * * *' From 667146ca6bdd7b33a9eb08b14d0e9c5f11fb87ba Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 09:23:17 -0400 Subject: [PATCH 367/401] fix version.sh --- version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sh b/version.sh index 42cfbc3..4982456 100755 --- a/version.sh +++ b/version.sh @@ -4,7 +4,7 @@ PYTHON_VERSION=$(grep -e '^version =' pyproject.toml | cut -d'=' -f2 | tr -d ' " PYTHON_VERSION2=$(grep -e 'version=' setup.py | cut -d'=' -f2 | tr -d "',") -[ "$OPAM_VERSION" = "$PYTHON_VERSION" ] && [ "$PYTHON_VERSION" = "$PYTHON_VERSION2" ] && echo "$OPAM_VERSION" && exit 0 +[ "$PYTHON_VERSION" = "$PYTHON_VERSION2" ] && echo "$PYTHON_VERSION" && exit 0 echo "Version numbers don't match!" echo " Python is '$PYTHON_VERSION' in pyproject.toml" From 10494d780f6a097dac88c26c166cf03a5ca76d70 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 09:24:43 -0400 Subject: [PATCH 368/401] j/k keep it at 3.7 --- pyproject.toml | 6 +++--- setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1741b90..069e94c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "libdash" -version = "0.3.1" +version = "0.3" authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, ] description = "Bindings for the dash shell as a library" readme = "README.md" license = { file="COPYING" } -requires-python = ">=3.6" +requires-python = ">=3.7" classifiers = [ "Programming Language :: Python :: 3", "Topic :: System :: System Shells", @@ -20,5 +20,5 @@ classifiers = [ "Bug Tracker" = "https://github.com/mgree/libdash/issues" [build-system] -requires = ["setuptools>=59.0"] +requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 541a899..78501f1 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def run(self): setup(name='libdash', packages=['libdash'], cmdclass={'build_py': libdash_build_py}, - version='0.3.1', + version='0.3', long_description=long_description, long_description_content_type='text/markdown', include_package_data=True, From 809169c3f00b97390be91d12fd5d199d11eece33 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 18 Aug 2022 10:33:19 -0400 Subject: [PATCH 369/401] pretty printing documentation [ci skip] --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 2db376e..b2cbe81 100644 --- a/README.md +++ b/README.md @@ -59,3 +59,11 @@ The ideal low-level interface to use is `parsecmd_safe` in `parser.c`; you'll ne The general AST is described in `nodes.h`. There are some tricky invariants around the precise formatting of control codes; the OCaml code shows some examples of working with the `args` fields in `ocaml/ast.ml`, which converts the C AST to an OCaml AST. The OCaml tools `shell_to_json` and `json_to_shell` will produce JSON ASTs, allowing you to work with these ASTs in any language. + +# Pretty printing + +The pretty printer does its best to produce valid shell scripts, but it's possible to manually construct AST nodes that don't directly correspond to valid scripts. + +For example, the Python AST `[[['Q', [['C', 34]]]]]` represents a quoted field containing a double quote character. Translated literally, this would yield the string `"""`, which is not a valid shell script. The pretty printer will instead automatically escape the inner quote, rendering `"\""`. + +While the printer tries to get things right either way, you should use escapes to signal to the printer when to escape: you should use the Python AST `[[['Q', [['E', 34]]]]]` to mark the inner double quote as escaped. From 2510eb6c728e0ef7a229caf37a97b714696307bb Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 23 Aug 2022 10:05:36 -0400 Subject: [PATCH 370/401] Quoting fixes (#20) There are now three quoting modes in pretty printing: unquoted, quoted (only escape special characters, including `"`), and heredoc (only escape special characters, excluding `"`). We now no longer treat `!` as an escaped character, which is correct on non-interactive shells but will break interactive scripts. Longer term, we need to know when pretty-printing who is consuming our output. But right now the only real client is non-interactive, so here we are. --- libdash/printer.py | 29 ++++++++++++++++++----------- ocaml/ast.ml | 30 ++++++++++++++++++------------ 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/libdash/printer.py b/libdash/printer.py index 9c98f42..4317638 100644 --- a/libdash/printer.py +++ b/libdash/printer.py @@ -15,6 +15,10 @@ "Length" : "#" }; +UNQUOTED = 0 # everything escaped +QUOTED = 1 # only escape special characters +HEREDOC = 2 # like QUOTED, but _don't_ escape double quotes +QUOTE_MODES = [UNQUOTED, QUOTED, HEREDOC] # dash.ml # @@ -324,25 +328,27 @@ def escaped (param): # "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg a ^ "}" # | Q a -> "\"" ^ string_of_arg a ^ "\"" # | B t -> "$(" ^ to_string t ^ ")" -def string_of_arg_char (c, is_quoted=False): +def string_of_arg_char (c, quote_mode=UNQUOTED): (type, param) = c; if (type == "E"): char = chr (param); - ## MMG 2021-09-20 It might be safe to move everything except for " in the second list, but no need to do it if the tests pass + ## MMG 2021-09-20 It might be safe to move everything except for " in the second list, but no need to do it if the tests pass + ## '!' dropped for bash non-interactive bash compatibility ## Chars to escape unconditionally - chars_to_escape = ["'", '"', '`', '(', ')', '{', '}', '$', '!', '&', '|', ';'] + chars_to_escape = ["'", '"', '`', '(', ')', '{', '}', '$', '&', '|', ';'] ## Chars to escape only when not quoted chars_to_escape_when_no_quotes = ['*', '?', '[', ']', '#', '<', '>', '~', ' '] if char in chars_to_escape: return '\\' + char - elif char in chars_to_escape_when_no_quotes and not is_quoted: + elif char in chars_to_escape_when_no_quotes and quote_mode==UNQUOTED: return '\\' + char else: return escaped (param) elif (type == "C"): - if chr(param) == '"': + # HEREDOC should never escape double quotes per POSIX 2.7.4 + if quote_mode==QUOTED and chr(param) == '"': return '\\"' else: return chr (param); @@ -360,7 +366,7 @@ def string_of_arg_char (c, is_quoted=False): print ("Unexpected param for T: %s" % param); abort (); elif (type == "A"): - return "$((" + string_of_arg (param, is_quoted) + "))"; + return "$((" + string_of_arg (param, quote_mode) + "))"; elif (type == "V"): assert (len (param) == 4); if (param [0] == "Length"): @@ -381,11 +387,11 @@ def string_of_arg_char (c, is_quoted=False): else: os.abort (); # For my own sanity - stri += string_of_var_type (vt) + string_of_arg (a, is_quoted) + "}"; + stri += string_of_var_type (vt) + string_of_arg (a, quote_mode) + "}"; return stri; elif (type == "Q"): - return "\"" + string_of_arg (param, is_quoted=True) + "\""; + return "\"" + string_of_arg (param, quote_mode=QUOTED) + "\""; elif (type == "B"): return "$(" + to_string (param) + ")"; else: @@ -395,13 +401,13 @@ def string_of_arg_char (c, is_quoted=False): # and string_of_arg = function # | [] -> "" # | c :: a -> string_of_arg_char c ^ string_of_arg a -def string_of_arg (args, is_quoted=False): +def string_of_arg (args, quote_mode=UNQUOTED): # print (args); i = 0 text = [] while i < len(args): - c = string_of_arg_char(args[i], is_quoted) + c = string_of_arg_char(args[i], quote_mode=quote_mode) # dash will parse '$?' as # [(C, '$'), (E, '?')] @@ -526,7 +532,8 @@ def string_of_redir (redir): elif (type == "Heredoc"): (t, fd, a) = params; - heredoc = string_of_arg (a, is_quoted = True); + # MMG 2022-08-23 not quite correct + heredoc = string_of_arg (a, quote_mode=HEREDOC); marker = fresh_marker0 (heredoc); stri = show_unless (0, fd) + "<<"; diff --git a/ocaml/ast.ml b/ocaml/ast.ml index b93735c..8127b51 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -98,6 +98,11 @@ let skip = Command (-1,[],[],[]) let special_chars : char list = explode "|&;<>()$`\\\"'" +type quote_mode = + QUnquoted + | QQuoted + | QHeredoc + let needs_escaping c = List.mem c special_chars let rec of_node (n : node union ptr) : t = @@ -442,33 +447,34 @@ and string_of_if c t e = | If (c,t,e) -> "; el" ^ string_of_if c t e | _ -> "; else " ^ to_string e ^ "; fi") -and string_of_arg_char ?quoted:(quoted=false) = function +and string_of_arg_char ?quote_mode:(quote_mode=QUnquoted) = function | E c -> - let chars_to_escape = "'\"`(){}$!&|;" in + (* removed ! from chars_to_escape to have the right behavior in non-interactive shells *) + let chars_to_escape = "'\"`(){}$&|;" in let chars_to_escape_when_no_quotes = "*?[]#<>~ " in if String.contains chars_to_escape c then "\\" ^ String.make 1 c - else if String.contains chars_to_escape_when_no_quotes c && not quoted + else if String.contains chars_to_escape_when_no_quotes c && quote_mode=QUnquoted then "\\" ^ String.make 1 c else Char.escaped c - | C '"' when quoted -> "\\\"" + | C '"' when quote_mode=QQuoted -> "\\\"" | C c -> String.make 1 c | T None -> "~" | T (Some u) -> "~" ^ u - | A a -> "$((" ^ string_of_arg ~quoted a ^ "))" + | A a -> "$((" ^ string_of_arg ~quote_mode a ^ "))" | V (Length,_,name,_) -> "${#" ^ name ^ "}" | V (vt,nul,name,a) -> - "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg ~quoted a ^ "}" - | Q a -> "\"" ^ string_of_arg ~quoted:true a ^ "\"" + "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg ~quote_mode a ^ "}" + | Q a -> "\"" ^ string_of_arg ~quote_mode:QQuoted a ^ "\"" | B t -> "$(" ^ to_string t ^ ")" -and string_of_arg ?quoted:(quoted=false) = function +and string_of_arg ?quote_mode:(quote_mode=QUnquoted) = function | [] -> "" | c :: a -> - let char = string_of_arg_char ~quoted c in + let char = string_of_arg_char ~quote_mode c in if char = "$" && next_is_escaped a - then "\\$" ^ string_of_arg ~quoted a - else char ^ string_of_arg ~quoted a + then "\\$" ^ string_of_arg ~quote_mode a + else char ^ string_of_arg ~quote_mode a and next_is_escaped = function | E _ :: _ -> true @@ -489,7 +495,7 @@ and string_of_redir = function | Dup (ToFD,fd,tgt) -> show_unless 1 fd ^ ">&" ^ string_of_arg tgt | Dup (FromFD,fd,tgt) -> show_unless 0 fd ^ "<&" ^ string_of_arg tgt | Heredoc (t,fd,a) -> - let heredoc = string_of_arg ~quoted:true a in + let heredoc = string_of_arg ~quote_mode:QHeredoc a in let marker = fresh_marker (lines heredoc) in show_unless 0 fd ^ "<<" ^ (if t = XHere then marker else "'" ^ marker ^ "'") ^ "\n" ^ heredoc ^ marker ^ "\n" From 41f4faf0c25508b14cdf2ffc10560d7e60703b72 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 23 Aug 2022 10:07:07 -0400 Subject: [PATCH 371/401] readme note, bump version --- README.md | 4 ++++ pyproject.toml | 2 +- setup.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b2cbe81..a984246 100644 --- a/README.md +++ b/README.md @@ -67,3 +67,7 @@ The pretty printer does its best to produce valid shell scripts, but it's possib For example, the Python AST `[[['Q', [['C', 34]]]]]` represents a quoted field containing a double quote character. Translated literally, this would yield the string `"""`, which is not a valid shell script. The pretty printer will instead automatically escape the inner quote, rendering `"\""`. While the printer tries to get things right either way, you should use escapes to signal to the printer when to escape: you should use the Python AST `[[['Q', [['E', 34]]]]]` to mark the inner double quote as escaped. + +# Known issues + +We currently do not escape the character `!` (exclamation point). In an interactive shell, `!` is likely treated as a history substitution (and so should be escaped), but in a non-interactive shell, `!` is treated normally. We currently cater to non-interactive shells; eventually this behavior will be controllable. diff --git a/pyproject.toml b/pyproject.toml index 069e94c..a66f117 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "libdash" -version = "0.3" +version = "0.3.1" authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, ] diff --git a/setup.py b/setup.py index 78501f1..541a899 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def run(self): setup(name='libdash', packages=['libdash'], cmdclass={'build_py': libdash_build_py}, - version='0.3', + version='0.3.1', long_description=long_description, long_description_content_type='text/markdown', include_package_data=True, From 64e2390e48b79dff603251e72620ad0fdd3ec0b7 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 7 Nov 2022 09:17:36 -0500 Subject: [PATCH 372/401] Fix macOS build (#23) Directly invoking `setup.py` was causing a build failure on macOS; using `pip3` solves the problem. Signed-off-by: Michael Greenberg --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6f6ff6e..0f6e295 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -92,7 +92,7 @@ jobs: # we don't reuse the wheels so that all of the CI runs can happen concurrently - name: Install Python directly - run: sudo python3 setup.py install + run: sudo pip3 install . - name: Test Python bindings run: make -C python test From 1277fd280435d850562adc0763fd27c7f0955ad5 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Nov 2023 14:46:06 -0500 Subject: [PATCH 373/401] update checkout --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0f6e295..693e518 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Check version numbers run: ./version.sh @@ -40,7 +40,7 @@ jobs: fi - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Build wheels uses: pypa/cibuildwheel@v2.8.1 @@ -77,7 +77,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Use OCaml ${{ matrix.ocaml-compiler }} uses: avsm/setup-ocaml@v2 From 4e241000f46838fd45b00ee6e0ab8b82124c051a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 19 Dec 2023 13:18:33 -0500 Subject: [PATCH 374/401] Static `ctypes`; building with `dune` (#28) Signed-off-by: Michael Greenberg --- .github/workflows/build.yml | 8 +- .gitignore | 7 + dune | 36 +++++ dune-project | 22 ++++ dune-workspace | 4 + libdash.opam | 56 ++++---- ocaml/ast.ml | 12 +- ocaml/ast_atd.atd | 26 ++-- ocaml/dash.ml | 241 ++-------------------------------- ocaml/dash.mli | 6 +- ocaml/dune | 62 +++++++++ ocaml/function_description.ml | 36 +++++ ocaml/json_to_shell.ml | 1 + ocaml/shell_to_json.ml | 2 + ocaml/type_description.ml | 191 +++++++++++++++++++++++++++ src/type_description.ml | 184 ++++++++++++++++++++++++++ 16 files changed, 607 insertions(+), 287 deletions(-) create mode 100644 dune create mode 100644 dune-project create mode 100644 dune-workspace create mode 100644 ocaml/dune create mode 100644 ocaml/function_description.ml create mode 100644 ocaml/type_description.ml create mode 100644 src/type_description.ml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 693e518..8282400 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,13 +83,11 @@ jobs: uses: avsm/setup-ocaml@v2 with: ocaml-compiler: ${{ matrix.ocaml-compiler }} + dune-cache: true - - name: Install OCaml bindings - run: opam install . + - name: Install and test OCaml bindings + run: opam install --with-test --working-dir . - - name: Test OCaml bindings - run: opam exec -- make -C ocaml test - # we don't reuse the wheels so that all of the CI runs can happen concurrently - name: Install Python directly run: sudo pip3 install . diff --git a/.gitignore b/.gitignore index 4ee132e..7b8d7c5 100644 --- a/.gitignore +++ b/.gitignore @@ -22,8 +22,15 @@ Makefile /stamp-h1 # generated by make +/src/builtins.h +/src/nodes.h +/src/syntax.h +/src/token.h /src/token_vars.h +# generated by dune +_build + # Apple debug symbol bundles *.dSYM/ diff --git a/dune b/dune new file mode 100644 index 0000000..7ca799c --- /dev/null +++ b/dune @@ -0,0 +1,36 @@ +(data_only_dirs src) + +(rule + (deps (source_tree src) configure.ac Makefile.am) + (targets libdash.a dlldash.so + builtins.h nodes.h syntax.h token.h token_vars.h + ) + (action + (bash + "\ + \n set -e\ + \n if [ \"$(uname -s)\" = \"Darwin\" ]; then glibtoolize; else libtoolize; fi\ + \n aclocal && autoheader && automake --add-missing && autoconf\ + \n ./configure --prefix=\"$(pwd)\"\ + \n %{make}\ + \n %{make} install\ + \n cp lib/libdash.a libdash.a\ + \n cp lib/dlldash.so dlldash.so\ + \n cp src/{builtins,nodes,syntax,token,token_vars}.h .\ + \n"))) + +(subdir src + (rule + (deps ../builtins.h ../nodes.h ../syntax.h ../token.h ../token_vars.h) + (targets builtins.h nodes.h syntax.h token.h token_vars.h) + (action + (progn + (copy ../builtins.h builtins.h) + (copy ../nodes.h nodes.h) + (copy ../syntax.h syntax.h) + (copy ../token.h token.h) + (copy ../token_vars.h token_vars.h))))) + +(library + (name dash) + (foreign_archives dash)) diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..fd6c868 --- /dev/null +++ b/dune-project @@ -0,0 +1,22 @@ +(lang dune 3.12) +(name libdash) +(using ctypes 0.3) + +(source (github mgree/libdash)) +(license BSD-3-Clause) +(authors "Michael Greenberg") +(maintainers "michael@greenberg.science") + +(package + (name libdash) + (synopsis "Bindings to the dash shell's parser") + (depends + ("ctypes" (>= "0.21.1")) + ("ctypes-foreign" (>= "0.21.1")) + ("atdgen" (>= "2.15.0")) + ("conf-autoconf" (>= 0.1)) + ("conf-aclocal" (>= 2)) + ("conf-automake" (>= 1)) + ("conf-libtool" (>= 1)))) + +(generate_opam_files) \ No newline at end of file diff --git a/dune-workspace b/dune-workspace new file mode 100644 index 0000000..42ee224 --- /dev/null +++ b/dune-workspace @@ -0,0 +1,4 @@ +(lang dune 3.12) +(env + (dev + (flags (:standard -warn-error -27)))) \ No newline at end of file diff --git a/libdash.opam b/libdash.opam index 811fd1b..9278923 100644 --- a/libdash.opam +++ b/libdash.opam @@ -1,40 +1,34 @@ +# This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "Bindings to the dash shell's parser" -maintainer: "Michael Greenberg " -authors: "Michael Greenberg " +maintainer: ["michael@greenberg.science"] +authors: ["Michael Greenberg"] license: "BSD-3-Clause" homepage: "https://github.com/mgree/libdash" bug-reports: "https://github.com/mgree/libdash/issues" depends: [ - "ocaml" {>= "4.07"} - "ocamlfind" {>= "1.8.0"} - "ctypes" {>= "0.18.0"} - "ctypes-foreign" {>= "0.18.0"} - "atdgen" {>= "2.3.2"} - "conf-autoconf" {build} - "conf-aclocal" {build} - "conf-automake" {build} - "conf-libtool" {build} + "dune" {>= "3.12"} + "ctypes" {>= "0.21.1"} + "ctypes-foreign" {>= "0.21.1"} + "atdgen" {>= "2.15.0"} + "conf-autoconf" {>= "0.1"} + "conf-aclocal" {>= "2"} + "conf-automake" {>= "1"} + "conf-libtool" {>= "1"} + "odoc" {with-doc} ] build: [ - ["libtoolize"] {os != "macos"} - ["glibtoolize"] {os = "macos"} - ["aclocal"] - ["autoheader"] - ["automake" "--add-missing"] - ["autoconf"] - ["mkdir" "_build"] - ["./configure" "--prefix=%{build}%/_build"] - [make] - [make "install"] # into _build - ["ocaml/mk_meta.sh" "%{_:lib}%"] # pass along the lib directory for the rpath in the META - [make "-C" "ocaml" "all"] - ["./mk_dot_install.sh"] - ["./ldconfig.sh"] # fix up .so files if ldconfig didn't do it - [make "-C" "ocaml" "test"] {with-test} + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] ] -dev-repo: "git+https:///github.com/mgree/libdash" -url { - src: "https://github.com/mgree/libdash/archive/v0.3.tar.gz" -} - +dev-repo: "git+https://github.com/mgree/libdash.git" diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 8127b51..9205e83 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -86,25 +86,20 @@ let string_of_var_type = function open Ctypes -open Foreign open Dash let rec last = function | [] -> None | [x] -> Some x - | x::xs -> last xs + | _::xs -> last xs let skip = Command (-1,[],[],[]) -let special_chars : char list = explode "|&;<>()$`\\\"'" - type quote_mode = QUnquoted | QQuoted | QHeredoc -let needs_escaping c = List.mem c special_chars - let rec of_node (n : node union ptr) : t = if nullptr n then skip @@ -225,7 +220,7 @@ and of_binary (n : node union ptr) = (of_node (getf n nbinary_ch1), of_node (getf n nbinary_ch2)) and to_arg (n : narg structure) : arg = - let a,s,bqlist,stack = parse_arg ~tilde_ok:true ~assign:false (explode (getf n narg_text)) (getf n narg_backquote) [] in + let a,s,bqlist,stack = parse_arg ~assign:false (explode (getf n narg_text)) (getf n narg_backquote) [] in (* we should have used up the string and have no backquotes left in our list *) assert (s = []); assert (nullptr bqlist); @@ -304,6 +299,7 @@ and parse_arg ?tilde_ok:(tilde_ok=false) ~assign:(assign:bool) (s : char list) ( then (* we're in arithmetic or double quotes, so tilde is ignored *) arg_char assign (C '~') s bqlist stack else + let _ = tilde_ok in (* unused? *) let uname,s' = parse_tilde [] s in arg_char assign (T uname) s' bqlist stack (* ordinary character *) @@ -325,7 +321,7 @@ and parse_tilde acc s = and arg_char assign c s bqlist stack = let tilde_ok = match c with - | C c -> assign && (match last s with + | C _ -> assign && (match last s with | Some ':' -> true | _ -> false) | _ -> false diff --git a/ocaml/ast_atd.atd b/ocaml/ast_atd.atd index 647a1de..989789b 100644 --- a/ocaml/ast_atd.atd +++ b/ocaml/ast_atd.atd @@ -1,8 +1,8 @@ -type char = int +type char = int -type linno = int +type linno = int -type t = [ +type t = [ Command of (linno * assign list * args * redirection list) (* assign, args, redir *) | Pipe of (bool * t list) (* background?, commands *) | Redir of (linno * t * redirection list) @@ -19,15 +19,15 @@ type t = [ | Defun of (linno * string * t) (* name, body *) ] -type assign = (string * arg) +type assign = (string * arg) -type redirection = [ +type redirection = [ File of (redir_type * int * arg) | Dup of (dup_type * int * arg) | Heredoc of (heredoc_type * int * arg) ] -type redir_type = [ +type redir_type = [ To | Clobber | From @@ -35,21 +35,21 @@ type redir_type = [ | Append ] -type dup_type = [ +type dup_type = [ ToFD | FromFD ] -type heredoc_type = [ +type heredoc_type = [ Here | XHere (* for when in a quote... not sure when this comes up *) ] -type args = arg list +type args = arg list -type arg = arg_char list +type arg = arg_char list -type arg_char = [ +type arg_char = [ C of char | E of char (* escape... necessary for expansion *) | T of string option (* tilde *) @@ -59,7 +59,7 @@ type arg_char = [ | B of t (* backquote *) ] -type var_type = [ +type var_type = [ Normal | Minus | Plus @@ -72,7 +72,7 @@ type var_type = [ | Length ] -type case = { +type case = { cpattern : arg list; cbody : t } \ No newline at end of file diff --git a/ocaml/dash.ml b/ocaml/dash.ml index ec0f33f..65215e9 100644 --- a/ocaml/dash.ml +++ b/ocaml/dash.ml @@ -1,246 +1,33 @@ -open Printf open Ctypes -open Ctypes_types -open Foreign +include Cdash.Functions +include Cdash.Types (* First, some dash trivia. *) - -type stackmark - -let stackmark : stackmark structure typ = structure "stackmark" -let stackp = field stackmark "stackp" (ptr void) -let nxt = field stackmark "nxt" string -let size = field stackmark "stacknleft" PosixTypes.size_t -let () = seal stackmark -let init_stack () = - let stack = make stackmark in - foreign "setstackmark" (ptr stackmark @-> returning void) (addr stack); - stack - -let pop_stack stack = - foreign "popstackmark" (ptr stackmark @-> returning void) (addr stack) +type stackmark_t = Stackmark.stackmark -let alloc_stack_string = - foreign "sstrdup" (string @-> returning (ptr char)) - -let free_stack_string s = - foreign "stunalloc" (ptr char @-> returning void) s - -let dash_init : unit -> unit = foreign "init" (void @-> returning void) -let initialize_dash_errno : unit -> unit = - foreign "initialize_dash_errno" (void @-> returning void) +let init_stack () : stackmark = + let stack = Ctypes.make stackmark in + setstackmark (addr stack); + stack -let initialize () = +let pop_stack stack : unit = + popstackmark (addr stack) + +let initialize () : unit = initialize_dash_errno (); dash_init () -let popfile : unit -> unit = - foreign "popfile" (void @-> returning void) - -let setinputstring : char ptr -> unit = - foreign "setinputstring" (ptr char @-> returning void) - let setinputtostdin () : unit = - foreign "setinputfd" (int @-> int @-> returning void) 0 0 (* don't both pushing the file *) + setinputfd 0 0 (* don't bother pushing the file *) let setinputfile ?push:(push=false) (s : string) : unit = - let _ = foreign "setinputfile" (string @-> int @-> returning int) s (if push then 1 else 0) in + let _ = raw_setinputfile s (if push then 1 else 0) in () let setvar (x : string) (v : string) : unit = - let _ = foreign "setvar" (string @-> string @-> int @-> returning (ptr void)) x v 0 in + let _ = raw_setvar x v 0 in () - -let setalias (name : string) (mapping : string) : unit = - foreign "setalias" (string @-> string @-> returning void) name mapping - -let unalias (name : string) : unit = - foreign "unalias" (string @-> returning void) name - -(* Next, a utility function that isn't in Unix or ExtUnix. *) - -let freshfd_ge10 (fd : int) : int = - foreign "freshfd_ge10" (int @-> returning int) fd - -(* Actual AST stuff begins here. *) -(* first, we define the node type... *) - -type node -let node : node union typ = union "node" -let node_type = field node "type" int -(* but we don't seal it yet! *) - -type nodelist -let nodelist : nodelist structure typ = structure "nodelist" -let nodelist_next = field nodelist "next" (ptr nodelist) -let nodelist_n = field nodelist "n" (ptr node) -let () = seal nodelist - -type ncmd - -let ncmd : ncmd structure typ = structure "ncmd" -let ncmd_type = field ncmd "type" int -let ncmd_linno = field ncmd "linno" int -let ncmd_assign = field ncmd "assign" (ptr node) -let ncmd_args = field ncmd "args" (ptr node) -let ncmd_redirect = field ncmd "redirect" (ptr node) -let () = seal ncmd - -let node_ncmd = field node "ncmd" ncmd - -type npipe - -let npipe : npipe structure typ = structure "npipe" -let npipe_type = field npipe "type" int -let npipe_backgnd = field npipe "backgnd" int -let npipe_cmdlist = field npipe "cmdlist" (ptr nodelist) -let () = seal npipe - -let node_npipe = field node "npipe" npipe - -type nredir - -let nredir : nredir structure typ = structure "nredir" -let nredir_type = field nredir "type" int -let nredir_linno = field nredir "linno" int -let nredir_n = field nredir "n" (ptr node) -let nredir_redirect = field nredir "redirect" (ptr node) -let () = seal nredir - -let node_nredir = field node "nredir" nredir - -type nbinary - -let nbinary : nbinary structure typ = structure "nbinary" -let nbinary_type = field nbinary "type" int -let nbinary_ch1 = field nbinary "ch1" (ptr node) -let nbinary_ch2 = field nbinary "ch2" (ptr node) -let () = seal nbinary - -let node_nbinary = field node "nbinary" nbinary - -type nif - -let nif : nif structure typ = structure "nif" -let nif_type = field nif "type" int -let nif_test = field nif "test" (ptr node) -let nif_ifpart = field nif "ifpart" (ptr node) -let nif_elsepart = field nif "elsepart" (ptr node) -let () = seal nif - -let node_nif = field node "nif" nif - -type nfor - -let nfor : nfor structure typ = structure "nfor" -let nfor_type = field nfor "type" int -let nfor_linno = field nfor "linno" int -let nfor_args = field nfor "args" (ptr node) -let nfor_body = field nfor "body" (ptr node) -let nfor_var = field nfor "var" string -let () = seal nfor - -let node_nfor = field node "nfor" nfor - -type ncase - -let ncase : ncase structure typ = structure "ncase" -let ncase_type = field ncase "type" int -let ncase_linno = field ncase "linno" int -let ncase_expr = field ncase "expr" (ptr node) -let ncase_cases = field ncase "cases" (ptr node) -let () = seal ncase - -let node_ncase = field node "ncase" ncase - -type nclist - -let nclist : nclist structure typ = structure "nclist" -let nclist_type = field nclist "type" int -let nclist_next = field nclist "next" (ptr node) -let nclist_pattern = field nclist "pattern" (ptr node) -let nclist_body = field nclist "body" (ptr node) -let () = seal nclist - -let node_nclist = field node "nclist" nclist - -type ndefun - -let ndefun : ndefun structure typ = structure "ndefun" -let ndefun_type = field ndefun "type" int -let ndefun_linno = field ndefun "linno" int -let ndefun_text = field ndefun "text" string -let ndefun_body = field ndefun "body" (ptr node) -let () = seal ndefun - -let node_ndefun = field node "ndefun" ndefun - -type narg - -let narg : narg structure typ = structure "narg" -let narg_type = field narg "type" int -let narg_next = field narg "next" (ptr node) -let narg_text = field narg "text" string -let narg_backquote = field narg "backquote" (ptr nodelist) -let () = seal narg - -let node_narg = field node "narg" narg - -type nfile - -let nfile : nfile structure typ = structure "nfile" -let nfile_type = field nfile "type" int -let nfile_next = field nfile "next" (ptr node) -let nfile_fd = field nfile "fd" int -let nfile_fname = field nfile "fname" (ptr node) -let nfile_expfname = field nfile "expfname" string -let () = seal nfile - -let node_nfile = field node "nfile" nfile - -type ndup - -let ndup : ndup structure typ = structure "ndup" -let ndup_type = field ndup "type" int -let ndup_next = field ndup "next" (ptr node) -let ndup_fd = field ndup "fd" int -let ndup_dupfd = field ndup "dupfd" int -let ndup_vname = field ndup "vname" (ptr node) -let () = seal ndup - -let node_ndup = field node "ndup" ndup - -type nhere - -let nhere : nhere structure typ = structure "nhere" -let nhere_type = field nhere "type" int -let nhere_next = field nhere "next" (ptr node) -let nhere_fd = field nhere "fd" int -let nhere_doc = field nhere "doc" (ptr node) -let () = seal nhere - -let node_nhere = field node "nhere" nhere - -type nnot - -let nnot : nnot structure typ = structure "nnot" -let nnot_type = field nnot "type" int -let nnot_com = field nnot "com" (ptr node) -let () = seal nnot - -let node_nnot = field node "nnot" nnot -let () = seal node - -let parsecmd_safe : int -> node union ptr = - foreign "parsecmd_safe" (int @-> returning (ptr node)) - -let parse s = - setinputstring s; (* TODO set stack mark? *) - parsecmd_safe 0 - -let neof : node union ptr = foreign_value "tokpushback" node -let nerr : node union ptr = foreign_value "lasttoken" node let addrof p = raw_address_of_ptr (to_voidp p) diff --git a/ocaml/dash.mli b/ocaml/dash.mli index f18c119..a7bf212 100644 --- a/ocaml/dash.mli +++ b/ocaml/dash.mli @@ -11,9 +11,9 @@ val initialize : unit -> unit see libdash/test/test.ml for an example usage in parse_all *) -type stackmark -val init_stack : unit -> stackmark Ctypes.structure -val pop_stack : stackmark Ctypes.structure -> unit +type stackmark_t +val init_stack : unit -> stackmark_t Ctypes.structure +val pop_stack : stackmark_t Ctypes.structure -> unit val alloc_stack_string : string -> (char Ctypes.ptr) val free_stack_string : (char Ctypes.ptr) -> unit diff --git a/ocaml/dune b/ocaml/dune new file mode 100644 index 0000000..0df89cd --- /dev/null +++ b/ocaml/dune @@ -0,0 +1,62 @@ +(executables + (names shell_to_json json_to_shell) + (public_names shell_to_json json_to_shell) + (modules shell_to_json json_to_shell ast_json) + (modes (native exe)) + (foreign_archives ../dash) + (libraries libdash yojson atdgen)) + +(library + (name libdash) + (public_name libdash) + (modes native) + (modules (:standard \ json_to_shell shell_to_json ast_json)) + (libraries ctypes ctypes.foreign) + (ctypes + (external_library_name dash) + (build_flags_resolver (vendored (c_flags :standard) (c_library_flags :standard))) + (deps (glob_files ../src/*.h) ../src/builtins.h ../src/nodes.h ../src/syntax.h ../src/token.h ../src/token_vars.h) + (headers (preamble + "\ + \n#include \"../src/shell.h\"\ + \n#include \"../src/memalloc.h\"\ + \n#include \"../src/mystring.h\"\ + \n#include \"../src/init.h\"\ + \n#include \"../src/main.h\"\ + \n#include \"../src/input.h\"\ + \n#include \"../src/var.h\"\ + \n#include \"../src/alias.h\"\ + \n#include \"../src/redir.h\"\ + \n#include \"../src/parser.h\"\ + \n#include \"../src/nodes.h\"\ + \n")) + (type_description + (instance Types) + (functor Type_description)) + (function_description + (instance Functions) + (functor Function_description)) + (generated_types Types_generated) + (generated_entry_point Cdash))) + +(rule + (targets ast_json.mli ast_json.ml) + (deps ast_atd.atd) + (action + (progn + (run atdgen -j -j-std ast_atd.atd) + (run sed -i -e "/type char = Libdash.Ast.char/d" ast_atd_j.ml) + (run sed -i -e "/type char = Libdash.Ast.char/d" ast_atd_j.mli) + (run mv ast_atd_j.ml ast_json.ml) + (run mv ast_atd_j.mli ast_json.mli)))) + +(rule + (alias runtest) + (deps (glob_files ../test/tests/*) (glob_files ../test/pash_tests/*) + ../test/round_trip.sh rt.sh %{bin:json_to_shell} %{bin:shell_to_json}) + (action + (setenv + JSON_TO_SHELL %{bin:json_to_shell} + (setenv + SHELL_TO_JSON %{bin:shell_to_json} + (bash "{ find ../test/tests ../test/pash_tests -type f | while read f; do ../test/round_trip.sh ./rt.sh \"$f\"; done | egrep '^[A-Z0-9_]+:' | cut -d ':' -f 1 | sort | uniq -c | grep ':' ; } && echo FAILED && exit 1 || { echo OK; exit 0; }"))))) diff --git a/ocaml/function_description.ml b/ocaml/function_description.ml new file mode 100644 index 0000000..cf65d95 --- /dev/null +++ b/ocaml/function_description.ml @@ -0,0 +1,36 @@ +open Ctypes + +module Types = Types_generated +open Types + +module Functions (F : Ctypes.FOREIGN) = struct + open F + + let setstackmark = foreign "setstackmark" (ptr stackmark @-> returning void) + let popstackmark = foreign "popstackmark" (ptr stackmark @-> returning void) + + let alloc_stack_string = foreign "sstrdup" (string @-> returning (ptr char)) + let free_stack_string = foreign "stunalloc" (ptr char @-> returning void) + + let dash_init = foreign "init" (void @-> returning void) + let initialize_dash_errno = foreign "initialize_dash_errno" (void @-> returning void) + + let popfile = foreign "popfile" (void @-> returning void) + let setinputstring = foreign "setinputstring" (ptr char @-> returning void) + let setinputfd = foreign "setinputfd" (int @-> int @-> returning void) + let raw_setinputfile = foreign "setinputfile" (string @-> int @-> returning int) + + let raw_setvar = foreign "setvar" (string @-> string @-> int @-> returning (ptr void)) + + let setalias = foreign "setalias" (string @-> string @-> returning void) + let unalias = foreign "unalias" (string @-> returning void) + + (* Unix/ExtUnix don't let you renumber things the way you want *) + let freshfd_ge10 = foreign "freshfd_ge10" (int @-> returning int) + + let parsecmd_safe = foreign "parsecmd_safe" (int @-> returning (ptr node)) + let neof = foreign_value "tokpushback" node + let nerr = foreign_value "lasttoken" node +end + + diff --git a/ocaml/json_to_shell.ml b/ocaml/json_to_shell.ml index 7f41033..2474e8c 100644 --- a/ocaml/json_to_shell.ml +++ b/ocaml/json_to_shell.ml @@ -1,4 +1,5 @@ (* This is straight-up copied from the libdash tests *) +open Libdash let verbose = ref false let input_src : string option ref = ref None diff --git a/ocaml/shell_to_json.ml b/ocaml/shell_to_json.ml index d170e5c..29f32ac 100644 --- a/ocaml/shell_to_json.ml +++ b/ocaml/shell_to_json.ml @@ -1,5 +1,7 @@ (* This is straight-up copied from the libdash tests *) +open Libdash + let verbose = ref false let input_src : string option ref = ref None diff --git a/ocaml/type_description.ml b/ocaml/type_description.ml new file mode 100644 index 0000000..ef6a134 --- /dev/null +++ b/ocaml/type_description.ml @@ -0,0 +1,191 @@ +open Ctypes + +module Types (F : Ctypes.TYPE) = struct + open F + + (* stackmarks [used for string allocation in dash] *) + module Stackmark = struct + + type stackmark + type t = stackmark Ctypes.structure + + let t : stackmark structure typ = structure "stackmark" + let stackp = field t "stackp" (ptr void) + let nxt = field t "stacknxt" string + let size = field t "stacknleft" F.size_t + let () = seal t + end + + type stackmark = Stackmark.t + let stackmark = Stackmark.t + + (* AST nodes *) + + (* define the node type... *) + type node + let node : node union typ = union "node" + let node_type = field node "type" int + (* ...but don't seal it yet! *) + + type nodelist + let nodelist : nodelist structure typ = structure "nodelist" + let nodelist_next = field nodelist "next" (ptr nodelist) + let nodelist_n = field nodelist "n" (ptr node) + let () = seal nodelist + + type ncmd + + let ncmd : ncmd structure typ = structure "ncmd" + let ncmd_type = field ncmd "type" int + let ncmd_linno = field ncmd "linno" int + let ncmd_assign = field ncmd "assign" (ptr node) + let ncmd_args = field ncmd "args" (ptr node) + let ncmd_redirect = field ncmd "redirect" (ptr node) + let () = seal ncmd + + let node_ncmd = field node "ncmd" ncmd + + type npipe + + let npipe : npipe structure typ = structure "npipe" + let npipe_type = field npipe "type" int + let npipe_backgnd = field npipe "backgnd" int + let npipe_cmdlist = field npipe "cmdlist" (ptr nodelist) + let () = seal npipe + + let node_npipe = field node "npipe" npipe + + type nredir + + let nredir : nredir structure typ = structure "nredir" + let nredir_type = field nredir "type" int + let nredir_linno = field nredir "linno" int + let nredir_n = field nredir "n" (ptr node) + let nredir_redirect = field nredir "redirect" (ptr node) + let () = seal nredir + + let node_nredir = field node "nredir" nredir + + type nbinary + + let nbinary : nbinary structure typ = structure "nbinary" + let nbinary_type = field nbinary "type" int + let nbinary_ch1 = field nbinary "ch1" (ptr node) + let nbinary_ch2 = field nbinary "ch2" (ptr node) + let () = seal nbinary + + let node_nbinary = field node "nbinary" nbinary + + type nif + + let nif : nif structure typ = structure "nif" + let nif_type = field nif "type" int + let nif_test = field nif "test" (ptr node) + let nif_ifpart = field nif "ifpart" (ptr node) + let nif_elsepart = field nif "elsepart" (ptr node) + let () = seal nif + + let node_nif = field node "nif" nif + + type nfor + + let nfor : nfor structure typ = structure "nfor" + let nfor_type = field nfor "type" int + let nfor_linno = field nfor "linno" int + let nfor_args = field nfor "args" (ptr node) + let nfor_body = field nfor "body" (ptr node) + let nfor_var = field nfor "var" string + let () = seal nfor + + let node_nfor = field node "nfor" nfor + + type ncase + + let ncase : ncase structure typ = structure "ncase" + let ncase_type = field ncase "type" int + let ncase_linno = field ncase "linno" int + let ncase_expr = field ncase "expr" (ptr node) + let ncase_cases = field ncase "cases" (ptr node) + let () = seal ncase + + let node_ncase = field node "ncase" ncase + + type nclist + + let nclist : nclist structure typ = structure "nclist" + let nclist_type = field nclist "type" int + let nclist_next = field nclist "next" (ptr node) + let nclist_pattern = field nclist "pattern" (ptr node) + let nclist_body = field nclist "body" (ptr node) + let () = seal nclist + + let node_nclist = field node "nclist" nclist + + type ndefun + + let ndefun : ndefun structure typ = structure "ndefun" + let ndefun_type = field ndefun "type" int + let ndefun_linno = field ndefun "linno" int + let ndefun_text = field ndefun "text" string + let ndefun_body = field ndefun "body" (ptr node) + let () = seal ndefun + + let node_ndefun = field node "ndefun" ndefun + + type narg + + let narg : narg structure typ = structure "narg" + let narg_type = field narg "type" int + let narg_next = field narg "next" (ptr node) + let narg_text = field narg "text" string + let narg_backquote = field narg "backquote" (ptr nodelist) + let () = seal narg + + let node_narg = field node "narg" narg + + type nfile + + let nfile : nfile structure typ = structure "nfile" + let nfile_type = field nfile "type" int + let nfile_next = field nfile "next" (ptr node) + let nfile_fd = field nfile "fd" int + let nfile_fname = field nfile "fname" (ptr node) + let nfile_expfname = field nfile "expfname" string + let () = seal nfile + + let node_nfile = field node "nfile" nfile + + type ndup + + let ndup : ndup structure typ = structure "ndup" + let ndup_type = field ndup "type" int + let ndup_next = field ndup "next" (ptr node) + let ndup_fd = field ndup "fd" int + let ndup_dupfd = field ndup "dupfd" int + let ndup_vname = field ndup "vname" (ptr node) + let () = seal ndup + + let node_ndup = field node "ndup" ndup + + type nhere + + let nhere : nhere structure typ = structure "nhere" + let nhere_type = field nhere "type" int + let nhere_next = field nhere "next" (ptr node) + let nhere_fd = field nhere "fd" int + let nhere_doc = field nhere "doc" (ptr node) + let () = seal nhere + + let node_nhere = field node "nhere" nhere + + type nnot + + let nnot : nnot structure typ = structure "nnot" + let nnot_type = field nnot "type" int + let nnot_com = field nnot "com" (ptr node) + let () = seal nnot + + let node_nnot = field node "nnot" nnot + let () = seal node + +end diff --git a/src/type_description.ml b/src/type_description.ml new file mode 100644 index 0000000..7ee7915 --- /dev/null +++ b/src/type_description.ml @@ -0,0 +1,184 @@ +open Ctypes + +module Types (F : Ctypes.TYPE) = struct + open F + + (* stackmarks [used for string allocation in dash] *) + type stackmark + + let stackmark : stackmark structure typ = structure "stackmark" + let stackp = field stackmark "stackp" (ptr void) + let nxt = field stackmark "nxt" string + let size = field stackmark "stacknleft" F.size_t + let () = seal stackmark + + (* AST nodes *) + + (* define the node type... *) + type node + let node : node union typ = union "node" + let node_type = field node "type" int + (* ...but don't seal it yet! *) + + type nodelist + let nodelist : nodelist structure typ = structure "nodelist" + let nodelist_next = field nodelist "next" (ptr nodelist) + let nodelist_n = field nodelist "n" (ptr node) + let () = seal nodelist + + type ncmd + + let ncmd : ncmd structure typ = structure "ncmd" + let ncmd_type = field ncmd "type" int + let ncmd_linno = field ncmd "linno" int + let ncmd_assign = field ncmd "assign" (ptr node) + let ncmd_args = field ncmd "args" (ptr node) + let ncmd_redirect = field ncmd "redirect" (ptr node) + let () = seal ncmd + + let node_ncmd = field node "ncmd" ncmd + + type npipe + + let npipe : npipe structure typ = structure "npipe" + let npipe_type = field npipe "type" int + let npipe_backgnd = field npipe "backgnd" int + let npipe_cmdlist = field npipe "cmdlist" (ptr nodelist) + let () = seal npipe + + let node_npipe = field node "npipe" npipe + + type nredir + + let nredir : nredir structure typ = structure "nredir" + let nredir_type = field nredir "type" int + let nredir_linno = field nredir "linno" int + let nredir_n = field nredir "n" (ptr node) + let nredir_redirect = field nredir "redirect" (ptr node) + let () = seal nredir + + let node_nredir = field node "nredir" nredir + + type nbinary + + let nbinary : nbinary structure typ = structure "nbinary" + let nbinary_type = field nbinary "type" int + let nbinary_ch1 = field nbinary "ch1" (ptr node) + let nbinary_ch2 = field nbinary "ch2" (ptr node) + let () = seal nbinary + + let node_nbinary = field node "nbinary" nbinary + + type nif + + let nif : nif structure typ = structure "nif" + let nif_type = field nif "type" int + let nif_test = field nif "test" (ptr node) + let nif_ifpart = field nif "ifpart" (ptr node) + let nif_elsepart = field nif "elsepart" (ptr node) + let () = seal nif + + let node_nif = field node "nif" nif + + type nfor + + let nfor : nfor structure typ = structure "nfor" + let nfor_type = field nfor "type" int + let nfor_linno = field nfor "linno" int + let nfor_args = field nfor "args" (ptr node) + let nfor_body = field nfor "body" (ptr node) + let nfor_var = field nfor "var" string + let () = seal nfor + + let node_nfor = field node "nfor" nfor + + type ncase + + let ncase : ncase structure typ = structure "ncase" + let ncase_type = field ncase "type" int + let ncase_linno = field ncase "linno" int + let ncase_expr = field ncase "expr" (ptr node) + let ncase_cases = field ncase "cases" (ptr node) + let () = seal ncase + + let node_ncase = field node "ncase" ncase + + type nclist + + let nclist : nclist structure typ = structure "nclist" + let nclist_type = field nclist "type" int + let nclist_next = field nclist "next" (ptr node) + let nclist_pattern = field nclist "pattern" (ptr node) + let nclist_body = field nclist "body" (ptr node) + let () = seal nclist + + let node_nclist = field node "nclist" nclist + + type ndefun + + let ndefun : ndefun structure typ = structure "ndefun" + let ndefun_type = field ndefun "type" int + let ndefun_linno = field ndefun "linno" int + let ndefun_text = field ndefun "text" string + let ndefun_body = field ndefun "body" (ptr node) + let () = seal ndefun + + let node_ndefun = field node "ndefun" ndefun + + type narg + + let narg : narg structure typ = structure "narg" + let narg_type = field narg "type" int + let narg_next = field narg "next" (ptr node) + let narg_text = field narg "text" string + let narg_backquote = field narg "backquote" (ptr nodelist) + let () = seal narg + + let node_narg = field node "narg" narg + + type nfile + + let nfile : nfile structure typ = structure "nfile" + let nfile_type = field nfile "type" int + let nfile_next = field nfile "next" (ptr node) + let nfile_fd = field nfile "fd" int + let nfile_fname = field nfile "fname" (ptr node) + let nfile_expfname = field nfile "expfname" string + let () = seal nfile + + let node_nfile = field node "nfile" nfile + + type ndup + + let ndup : ndup structure typ = structure "ndup" + let ndup_type = field ndup "type" int + let ndup_next = field ndup "next" (ptr node) + let ndup_fd = field ndup "fd" int + let ndup_dupfd = field ndup "dupfd" int + let ndup_vname = field ndup "vname" (ptr node) + let () = seal ndup + + let node_ndup = field node "ndup" ndup + + type nhere + + let nhere : nhere structure typ = structure "nhere" + let nhere_type = field nhere "type" int + let nhere_next = field nhere "next" (ptr node) + let nhere_fd = field nhere "fd" int + let nhere_doc = field nhere "doc" (ptr node) + let () = seal nhere + + let node_nhere = field node "nhere" nhere + + type nnot + + let nnot : nnot structure typ = structure "nnot" + let nnot_type = field nnot "type" int + let nnot_com = field nnot "com" (ptr node) + let () = seal nnot + + let node_nnot = field node "nnot" nnot + let () = seal node + +end From 0b3814a7befd301ca24742e088dc873a71f31ba8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Sun, 4 Feb 2024 11:03:08 -0500 Subject: [PATCH 375/401] Rename libdash library to avoid name collision in dune (#29) fix from https://github.com/ocaml/dune/issues/9773 --- dune | 3 ++- ocaml/dune | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dune b/dune index 7ca799c..ccc9ae6 100644 --- a/dune +++ b/dune @@ -6,6 +6,7 @@ builtins.h nodes.h syntax.h token.h token_vars.h ) (action + (setenv CC "%{cc}" (bash "\ \n set -e\ @@ -17,7 +18,7 @@ \n cp lib/libdash.a libdash.a\ \n cp lib/dlldash.so dlldash.so\ \n cp src/{builtins,nodes,syntax,token,token_vars}.h .\ - \n"))) + \n")))) (subdir src (rule diff --git a/ocaml/dune b/ocaml/dune index 0df89cd..0f897b9 100644 --- a/ocaml/dune +++ b/ocaml/dune @@ -3,18 +3,21 @@ (public_names shell_to_json json_to_shell) (modules shell_to_json json_to_shell ast_json) (modes (native exe)) - (foreign_archives ../dash) (libraries libdash yojson atdgen)) +(rule (copy ../dlldash.so dlldash_native.so)) +(rule (copy ../libdash.a libdash_native.a)) + (library (name libdash) (public_name libdash) (modes native) (modules (:standard \ json_to_shell shell_to_json ast_json)) (libraries ctypes ctypes.foreign) + (foreign_archives dash_native) (ctypes (external_library_name dash) - (build_flags_resolver (vendored (c_flags :standard) (c_library_flags :standard))) + (build_flags_resolver vendored) (deps (glob_files ../src/*.h) ../src/builtins.h ../src/nodes.h ../src/syntax.h ../src/token.h ../src/token_vars.h) (headers (preamble "\ From 4bc079427ccd607cc39edec3d02f7984776661dc Mon Sep 17 00:00:00 2001 From: Bolun Thompson Date: Mon, 9 Dec 2024 07:37:47 -0800 Subject: [PATCH 376/401] Quote dollar signs when not by themself (#31) Fix up escaping of `$`; revise tests to support. --------- Signed-off-by: Bolun Thompson --- libdash/printer.py | 9 +----- ocaml/ast.ml | 2 +- test/round_trip.sh | 40 +++++++++++++++---------- test/tests/single_quoted_dollar_sign.sh | 4 +++ 4 files changed, 31 insertions(+), 24 deletions(-) create mode 100644 test/tests/single_quoted_dollar_sign.sh diff --git a/libdash/printer.py b/libdash/printer.py index 4317638..6c84f2a 100644 --- a/libdash/printer.py +++ b/libdash/printer.py @@ -408,14 +408,7 @@ def string_of_arg (args, quote_mode=UNQUOTED): text = [] while i < len(args): c = string_of_arg_char(args[i], quote_mode=quote_mode) - - # dash will parse '$?' as - # [(C, '$'), (E, '?')] - # but we don't normally want to escape ? - # - # so we check up after the fact: if the character after $ is escaped, - # we'll escape the $, too - if c == "$" and (i+1 < len(args)) and args[i+1][0] == "E": + if c == "$" and (i+1 < len(args)): c = "\\$" text.append(c) diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 9205e83..80a77d4 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -468,7 +468,7 @@ and string_of_arg ?quote_mode:(quote_mode=QUnquoted) = function | [] -> "" | c :: a -> let char = string_of_arg_char ~quote_mode c in - if char = "$" && next_is_escaped a + if char = "$" && a <> [] then "\\$" ^ string_of_arg ~quote_mode a else char ^ string_of_arg ~quote_mode a diff --git a/test/round_trip.sh b/test/round_trip.sh index 1fbc86b..df7c7af 100755 --- a/test/round_trip.sh +++ b/test/round_trip.sh @@ -8,6 +8,10 @@ fi p=$1 tgt=$2 +two_roundtrips() { + [ "$(head -n1 "$tgt")" != '# TEST: single roundtrip' ] +} + orig=$(mktemp) "$p" "$tgt" >"$orig" @@ -31,25 +35,31 @@ then echo "PASS '$tgt'" exit 0 else - # try one more time around the loop - rtrt=$(mktemp) - - "$p" "$rt" >"$rtrt" - if [ "$?" -ne 0 ] + if two_roundtrips then - echo "RT_ABORT_3: '$tgt' -> '$orig' -> '$rt' -> '$rtrt'" - exit 5 - fi + # try one more time around the loop + rtrt=$(mktemp) - if diff -b "$rt" "$rtrt" >/dev/null - then - echo "PASS '$tgt' (two runs to fixpoint)" - exit 0 + "$p" "$rt" >"$rtrt" + if [ "$?" -ne 0 ] + then + echo "RT_ABORT_3: '$tgt' -> '$orig' -> '$rt' -> '$rtrt'" + exit 5 + fi + + if diff -b "$rt" "$rtrt" >/dev/null + then + echo "PASS '$tgt' (two runs to fixpoint)" + exit 0 + fi fi - + echo "FAIL: '$tgt' first time" diff -ub "$orig" "$rt" - echo ">>> '$tgt' second time" - diff -ub "$rt" "$rtrt" + if two_roundtrips + then + echo ">>> '$tgt' second time" + diff -ub "$rt" "$rtrt" + fi exit 1 fi diff --git a/test/tests/single_quoted_dollar_sign.sh b/test/tests/single_quoted_dollar_sign.sh new file mode 100644 index 0000000..53d1edc --- /dev/null +++ b/test/tests/single_quoted_dollar_sign.sh @@ -0,0 +1,4 @@ +# TEST: single roundtrip + +echo '$1' +echo $ a From 21d8a6bfcd8bb8f8b96fb38d6f2548cdc72ee58a Mon Sep 17 00:00:00 2001 From: Bolun Thompson Date: Sat, 4 Jan 2025 03:51:01 +0900 Subject: [PATCH 377/401] Correctly print subshells nested in a variable expansion (#32) Fix: Nested shell in subshell Signed-off-by: Bolun Thompson --- libdash/printer.py | 9 ++++++++- ocaml/ast.ml | 8 ++++++-- test/tests/nested_shell_in_subshell.sh | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 test/tests/nested_shell_in_subshell.sh diff --git a/libdash/printer.py b/libdash/printer.py index 6c84f2a..d0e26ac 100644 --- a/libdash/printer.py +++ b/libdash/printer.py @@ -393,7 +393,14 @@ def string_of_arg_char (c, quote_mode=UNQUOTED): elif (type == "Q"): return "\"" + string_of_arg (param, quote_mode=QUOTED) + "\""; elif (type == "B"): - return "$(" + to_string (param) + ")"; + body = to_string (param) + # to handle $( () ) + try: + if body[0] == "(" and body[-1] == ")": + body = f" {body} " + except IndexError: + pass + return "$(" + body + ")" else: abort (); diff --git a/ocaml/ast.ml b/ocaml/ast.ml index 80a77d4..3abdfb2 100644 --- a/ocaml/ast.ml +++ b/ocaml/ast.ml @@ -462,8 +462,12 @@ and string_of_arg_char ?quote_mode:(quote_mode=QUnquoted) = function | V (vt,nul,name,a) -> "${" ^ name ^ (if nul then ":" else "") ^ string_of_var_type vt ^ string_of_arg ~quote_mode a ^ "}" | Q a -> "\"" ^ string_of_arg ~quote_mode:QQuoted a ^ "\"" - | B t -> "$(" ^ to_string t ^ ")" - + | B t -> + let s = to_string t in + if String.length s >= 2 && s.[0] = '(' && s.[String.length s - 1] = ')' then + "$( " ^ s ^ " )" + else + "$(" ^ s ^ ")" and string_of_arg ?quote_mode:(quote_mode=QUnquoted) = function | [] -> "" | c :: a -> diff --git a/test/tests/nested_shell_in_subshell.sh b/test/tests/nested_shell_in_subshell.sh new file mode 100644 index 0000000..8bfb75b --- /dev/null +++ b/test/tests/nested_shell_in_subshell.sh @@ -0,0 +1,4 @@ +( (echo abc) ) +echo $( (echo abc) ) +echo `(echo abc)` +echo $() From 87018ad71634f3a2650666659ac8e982de44af4b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 23 Jan 2025 09:39:57 -0500 Subject: [PATCH 378/401] update workflows --- .github/workflows/build.yml | 34 +++++++++++++++++----------------- .github/workflows/canary.yml | 17 +++++++++-------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8282400..9e31599 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,13 +16,13 @@ jobs: - name: Check version numbers run: ./version.sh - + package-python: strategy: fail-fast: false matrix: os: - - macos-11 + - macos-latest - ubuntu-latest runs-on: ${{ matrix.os }} @@ -38,15 +38,15 @@ jobs: echo Unsupported RUNNER_OS=$RUNNER_OS exit 1 fi - + - name: Checkout code uses: actions/checkout@v4 - name: Build wheels uses: pypa/cibuildwheel@v2.8.1 - + - name: Upload binary wheel - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: ${{ format('bdist.{0}', matrix.os) }} path: wheelhouse/libdash-*.whl @@ -54,14 +54,14 @@ jobs: - name: Build source distribution (Linux only) if: contains(matrix.os, 'ubuntu') run: python setup.py sdist - + - name: Upload source distribution (from Linux) - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 if: contains(matrix.os, 'ubuntu') with: name: sdist path: dist/libdash-*.tar.gz - + build-both-and-compare: strategy: fail-fast: true @@ -84,17 +84,17 @@ jobs: with: ocaml-compiler: ${{ matrix.ocaml-compiler }} dune-cache: true - + - name: Install and test OCaml bindings run: opam install --with-test --working-dir . - + # we don't reuse the wheels so that all of the CI runs can happen concurrently - name: Install Python directly run: sudo pip3 install . - + - name: Test Python bindings run: make -C python test - + - name: Compare OCaml and Python bindings run: opam exec -- make -C test test @@ -105,10 +105,10 @@ jobs: - build-both-and-compare runs-on: ubuntu-latest if: github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/tags') - + steps: - name: Download distributions - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 - name: Rename distributions run: | @@ -118,7 +118,7 @@ jobs: mv sdist/libdash-*.tar.gz dist/ echo Look on my Works, ye Mighty, and despair! ls dist - + - name: Deploy 'latest' release on GH uses: marvinpinto/action-automatic-releases@latest with: @@ -135,8 +135,8 @@ jobs: password: ${{ secrets.TEST_PYPI_API_TOKEN }} verbose: true repository_url: https://test.pypi.org/legacy/ - skip_existing: true - + skip_existing: true + - name: Deploy tagged release on PyPI if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index d882689..8522c91 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -4,19 +4,20 @@ on: # push: schedule: - cron: '20 13 * * *' - + jobs: ocaml: strategy: fail-fast: false matrix: os: - - macos-12 - - macos-11 - - ubuntu-18.04 - - ubuntu-20.04 + - macos-14 + - macos-15 + - ubuntu-22.04 + - ubuntu-24.04 ocaml-compiler: - 4.14.x + - 5.2.x runs-on: ${{ matrix.os }} @@ -34,7 +35,7 @@ jobs: - name: Test OPAM executables run: test "$(echo hi | opam exec -- shell_to_json | opam exec -- json_to_shell)" = "hi" - + python: strategy: fail-fast: true @@ -63,8 +64,8 @@ jobs: - name: Install Python bindings from ${{ matrix.repository_url }} run: pip install -v -i "${{ matrix.repository_url }}" --extra-index-url https://pypi.org/simple/ libdash - + - name: Test Python library run: | RT="$(printf 'import libdash\nasts = libdash.parse("-", True)\nfor (ast, lines, linno_before, linno_after) in asts:\n print(libdash.to_string(ast))\n')" - test "$(echo hi | python -c "$RT")" = "hi" + test "$(echo hi | python -c "$RT")" = "hi" From eca3f2f8997c6596ffabf58d8008c580ca5c9d62 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 27 Jan 2025 10:29:50 -0500 Subject: [PATCH 379/401] use setup-ocaml@v3 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9e31599..4f16e34 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -80,7 +80,7 @@ jobs: uses: actions/checkout@v4 - name: Use OCaml ${{ matrix.ocaml-compiler }} - uses: avsm/setup-ocaml@v2 + uses: avsm/setup-ocaml@v3 with: ocaml-compiler: ${{ matrix.ocaml-compiler }} dune-cache: true From 53bbfa86542732a092fe177c5b7b208d6704642a Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Mon, 27 Jan 2025 10:33:53 -0500 Subject: [PATCH 380/401] use macos-latest --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4f16e34..ebf6160 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,8 +67,7 @@ jobs: fail-fast: true matrix: os: - - macos-12 - - macos-11 + - macos-latest - ubuntu-latest ocaml-compiler: - 4.14.x From 46b5db8eb27b33926036200c7ff0afabcc300936 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 12:41:48 -0400 Subject: [PATCH 381/401] trying to rebuild wheels and release --- TODO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.md b/TODO.md index 77dd1dd..59d8c47 100644 --- a/TODO.md +++ b/TODO.md @@ -3,6 +3,6 @@ - [x] correct libdash.so installation (locally) - [x] pip setup - [x] testpypi setup +- [ ] version bump, fix CI - [ ] pash pull request - [ ] smoosh pull request - From ab19a6a92c220d9368eb737ee26d7e9346a02a74 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 12:50:23 -0400 Subject: [PATCH 382/401] trying to fix atdgen dep --- dune-project | 1 + ocaml/dune | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dune-project b/dune-project index fd6c868..b100776 100644 --- a/dune-project +++ b/dune-project @@ -14,6 +14,7 @@ ("ctypes" (>= "0.21.1")) ("ctypes-foreign" (>= "0.21.1")) ("atdgen" (>= "2.15.0")) + ("atdgen-runtime" (>= "2.15.0")) ("conf-autoconf" (>= 0.1)) ("conf-aclocal" (>= 2)) ("conf-automake" (>= 1)) diff --git a/ocaml/dune b/ocaml/dune index 0f897b9..a10e513 100644 --- a/ocaml/dune +++ b/ocaml/dune @@ -3,7 +3,7 @@ (public_names shell_to_json json_to_shell) (modules shell_to_json json_to_shell ast_json) (modes (native exe)) - (libraries libdash yojson atdgen)) + (libraries libdash yojson atdgen-runtime)) (rule (copy ../dlldash.so dlldash_native.so)) (rule (copy ../libdash.a libdash_native.a)) From 71da41ba99e7c110a4d98c38d19974acee9485af Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 20:29:13 -0400 Subject: [PATCH 383/401] cleanup --- .gitignore | 14 +------------- configure.ac | 3 +-- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 7b8d7c5..6064816 100644 --- a/.gitignore +++ b/.gitignore @@ -44,22 +44,10 @@ _build .DS_Store .DS_Store? ._* +.\#* .Spotlight* .Trash* *[Tt]humbs.db - -*.a -*.cmxa - -test -*.native -*~ -*.o -*.cmx -*.cmi -_build -test.err - ar-lib config.* src/libdash.a diff --git a/configure.ac b/configure.ac index 01ac722..e456c7f 100644 --- a/configure.ac +++ b/configure.ac @@ -8,11 +8,10 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)]) dnl Checks for programs. AC_PROG_CC - AC_USE_SYSTEM_EXTENSIONS dnl AC_GNU_SOURCE -dnl AC_PROG_YACC +AC_PROG_YACC dnl MMG 2018-09-26 support building the library AM_PROG_AR From 3ebc27aa0f8f216756f2240f620ae22639749307 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 20:30:48 -0400 Subject: [PATCH 384/401] only build on push for main, not other branhes --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ebf6160..7794948 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,6 +3,8 @@ name: Main workflow on: pull_request: push: + branches: + - main schedule: - cron: '5 14 * * *' From aecb5694ccaef251d3b7e7bf1ba9e6a9db884811 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 21:53:19 -0400 Subject: [PATCH 385/401] ci not building --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7794948..462c67d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,6 +2,8 @@ name: Main workflow on: pull_request: + branches: + - main push: branches: - main From f6b2c6641bcf1022445cab184c0d85e4c7b0519c Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 21:56:18 -0400 Subject: [PATCH 386/401] s/main/master/ --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 462c67d..3177e9f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,10 +3,10 @@ name: Main workflow on: pull_request: branches: - - main + - master push: branches: - - main + - master schedule: - cron: '5 14 * * *' From cfa39fbbc35d0768d178dda249e02f3331e7e064 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 22:26:48 -0400 Subject: [PATCH 387/401] run on all pushes, since we cannot form a merge commit --- .github/workflows/build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3177e9f..2e00515 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,8 +5,6 @@ on: branches: - master push: - branches: - - master schedule: - cron: '5 14 * * *' From dd7dacd88ebd0e6538e388c45e93aecd11d56b14 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Tue, 28 Apr 2026 22:35:13 -0400 Subject: [PATCH 388/401] reconcile with upstream --- src/alias.c | 2 +- src/input.c | 4 ++-- src/main.c | 2 +- src/mktokens | 17 +++++++++-------- src/parser.c | 2 +- src/redir.c | 4 ++-- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/alias.c b/src/alias.c index bce509c..b8c2704 100644 --- a/src/alias.c +++ b/src/alias.c @@ -49,7 +49,7 @@ struct alias *atab[ATABSIZE]; /* -STATIC void setalias(const char *, const char *); +STATIC void setalias(const char *, const char *); */ // libdash STATIC struct alias *freealias(struct alias *); STATIC struct alias **__lookupalias(const char *); diff --git a/src/input.c b/src/input.c index 13a8e09..7856e29 100644 --- a/src/input.c +++ b/src/input.c @@ -83,7 +83,7 @@ STATIC void pushfile(void); static void popstring(void); static int preadfd(void); /* -static void setinputfd(int fd, int push); +static void setinputfd(int fd, int push); */ // libdash static int preadbuffer(void); @@ -573,7 +573,7 @@ setinputfile(const char *fname, int flags) * interrupts off. */ -/* +/* static void */ void // libdash diff --git a/src/main.c b/src/main.c index 35a5532..7eb8b42 100644 --- a/src/main.c +++ b/src/main.c @@ -81,7 +81,7 @@ static int cmdloop(int); //libdash void -initialize_dash_errno() +initialize_dash_errno() { #ifdef __GLIBC__ dash_errno = __errno_location(); diff --git a/src/mktokens b/src/mktokens index 3ab7bc5..dcef676 100644 --- a/src/mktokens +++ b/src/mktokens @@ -37,10 +37,11 @@ # token marks the end of a list. The third column is the name to print in # error messages. -: ${TMPDIR:=/tmp} +: "${TMPDIR:=/tmp}" -cat > $TMPDIR/ka$$ <<\! +cat > "${TMPDIR}"/ka$$ <<\! TEOF 1 end of file +TBLANK 0 blank TNL 0 newline TSEMI 0 ";" TBACKGND 0 "&" @@ -70,28 +71,28 @@ TWHILE 0 "while" TBEGIN 0 "{" TEND 1 "}" ! -nl=`wc -l ${TMPDIR}/ka$$` +nl=`wc -l "${TMPDIR}"/ka$$` exec > token.h -awk '{print "#define " $1 " " NR-1}' ${TMPDIR}/ka$$ +awk '{print "#define " $1 " " NR-1}' "${TMPDIR}"/ka$$ exec > token_vars.h echo ' /* Array indicating which tokens mark the end of a list */ static const char tokendlist[] = {' -awk '{print "\t" $2 ","}' ${TMPDIR}/ka$$ +awk '{print "\t" $2 ","}' "${TMPDIR}"/ka$$ echo '}; static const char *const tokname[] = {' sed -e 's/"/\\"/g' \ -e 's/[^ ]*[ ][ ]*[^ ]*[ ][ ]*\(.*\)/ "\1",/' \ - ${TMPDIR}/ka$$ + "${TMPDIR}"/ka$$ echo '}; ' -sed 's/"//g' ${TMPDIR}/ka$$ | awk ' +sed 's/"//g' "${TMPDIR}"/ka$$ | awk ' /TNOT/{print "#define KWDOFFSET " NR-1; print ""; print "static const char *const parsekwd[] = {"} /TNOT/,/neverfound/{if (last) print " \"" last "\","; last = $3} END{print " \"" last "\"\n};"}' -rm ${TMPDIR}/ka$$ +rm "${TMPDIR}"/ka$$ diff --git a/src/parser.c b/src/parser.c index 531ecbc..d013817 100644 --- a/src/parser.c +++ b/src/parser.c @@ -784,7 +784,7 @@ readtoken(void) } else { t = xxreadtoken(); goto ignorenl; - } + } } } out: diff --git a/src/redir.c b/src/redir.c index 5dd990c..e823462 100644 --- a/src/redir.c +++ b/src/redir.c @@ -453,7 +453,7 @@ FORKRESET { #endif -/* +/* * Just a convenience because fcntl isn't well exposed in OCaml. */ // libdash @@ -464,7 +464,7 @@ freshfd_ge10(int fd) int err; newfd = fcntl(fd, F_DUPFD_CLOEXEC, 10); - + err = newfd < 0 ? errno : 0; if (err == EBADF) { newfd = -1; From 984ed807d01ff083984c26785d64faf153785d4b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Fri, 8 May 2026 12:55:52 -0400 Subject: [PATCH 389/401] relax assertion --- libdash/parser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libdash/parser.py b/libdash/parser.py index e3e397f..4db7808 100644 --- a/libdash/parser.py +++ b/libdash/parser.py @@ -11,11 +11,11 @@ def libdash_library_path(): if LIBDASH_LIBRARY_PATH is not None: return LIBDASH_LIBRARY_PATH - + FILE_PATH = os.path.dirname(os.path.realpath(os.path.abspath(__file__))) LIBDASH_LIBRARY_PATH = os.path.join(FILE_PATH, "libdash.so") return LIBDASH_LIBRARY_PATH - + EOF_NLEFT = -99; # libdash/src/input.c class ParsingException(Exception): @@ -83,8 +83,9 @@ def parse(inputPath, init=True): # Last line did not have a newline assert (len (lines [-1]) > 0 and (lines [-1][-1] != '\n')) - else: - assert (nleft_after == 0); # Read whole lines + elif nleft_after != 0: + # we formerly asserted that `nleft_after != 0`, but this no longer holds + linno_after = linno_after + 1; # The last line wasn't counted n_ptr = cast (n_ptr_C, POINTER (union_node)) new_ast = of_node (n_ptr) From b1e3d09c00851cc54f21ceff532bf8e8349b78fc Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:14:43 -0400 Subject: [PATCH 390/401] update pyproject.toml, try using venv --- .github/workflows/build.yml | 15 +++++++++++---- pyproject.toml | 8 ++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2e00515..a533112 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -90,14 +90,21 @@ jobs: run: opam install --with-test --working-dir . # we don't reuse the wheels so that all of the CI runs can happen concurrently - - name: Install Python directly - run: sudo pip3 install . + - name: Install Python via venv + run: + python3 -m venv .venv + . .venv/bin/activate + python3 -m pip install . - name: Test Python bindings - run: make -C python test + run: + . .venv/bin/activate + make -C python test - name: Compare OCaml and Python bindings - run: opam exec -- make -C test test + run: + . .venv/bin/activate + opam exec -- make -C test test deploy: needs: diff --git a/pyproject.toml b/pyproject.toml index a66f117..d7e9923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,21 +3,21 @@ name = "libdash" version = "0.3.1" authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, + { name="PaSh contributors" }, ] description = "Bindings for the dash shell as a library" readme = "README.md" -license = { file="COPYING" } +license-files = "COPYING" requires-python = ">=3.7" classifiers = [ "Programming Language :: Python :: 3", "Topic :: System :: System Shells", - "License :: OSI Approved :: MIT License", "Operating System :: POSIX", ] [project.urls] -"Homepage" = "https://github.com/mgree/libdash" -"Bug Tracker" = "https://github.com/mgree/libdash/issues" +"Homepage" = "https://github.com/binpash/libdash" +"Bug Tracker" = "https://github.com/binpash/libdash/issues" [build-system] requires = ["setuptools>=61.0"] From bc7b090154473ca283813da5811007697a441a5e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:18:45 -0400 Subject: [PATCH 391/401] fix license-files, update checkout action --- .github/workflows/build.yml | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a533112..5425396 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Check version numbers run: ./version.sh @@ -42,7 +42,7 @@ jobs: fi - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Build wheels uses: pypa/cibuildwheel@v2.8.1 @@ -78,7 +78,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Use OCaml ${{ matrix.ocaml-compiler }} uses: avsm/setup-ocaml@v3 diff --git a/pyproject.toml b/pyproject.toml index d7e9923..ed91bb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ authors = [ ] description = "Bindings for the dash shell as a library" readme = "README.md" -license-files = "COPYING" +license-files = ["COPYING"] requires-python = ">=3.7" classifiers = [ "Programming Language :: Python :: 3", From 0fc7b6ce5e83fdaa0c5fb5cde35b944ba3a91be5 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:21:34 -0400 Subject: [PATCH 392/401] try to fix license-files --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ed91bb6..8fffe79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,9 +5,9 @@ authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, { name="PaSh contributors" }, ] +license = "BSD-3-Clause" description = "Bindings for the dash shell as a library" readme = "README.md" -license-files = ["COPYING"] requires-python = ">=3.7" classifiers = [ "Programming Language :: Python :: 3", @@ -15,6 +15,9 @@ classifiers = [ "Operating System :: POSIX", ] +[tool.setuptools] +license-files = ["COPYING"] + [project.urls] "Homepage" = "https://github.com/binpash/libdash" "Bug Tracker" = "https://github.com/binpash/libdash/issues" From c770bee22367bfaa3cb8cb7322cbe676249496e8 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:25:21 -0400 Subject: [PATCH 393/401] try updating cibuildwheel --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5425396..4a136bf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@v6 - name: Build wheels - uses: pypa/cibuildwheel@v2.8.1 + uses: pypa/cibuildwheel@v3.4.1 - name: Upload binary wheel uses: actions/upload-artifact@v4 From b6fa675a544d688f7ca265d4ecd737b9891af09b Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:31:56 -0400 Subject: [PATCH 394/401] try updating setuptools version --- pyproject.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8fffe79..dd4a93d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ authors = [ { name="PaSh contributors" }, ] license = "BSD-3-Clause" +license-files = ["COPYING"] description = "Bindings for the dash shell as a library" readme = "README.md" requires-python = ">=3.7" @@ -15,13 +16,11 @@ classifiers = [ "Operating System :: POSIX", ] -[tool.setuptools] -license-files = ["COPYING"] [project.urls] "Homepage" = "https://github.com/binpash/libdash" "Bug Tracker" = "https://github.com/binpash/libdash/issues" [build-system] -requires = ["setuptools>=61.0"] +requires = ["setuptools>=77.0.3"] build-backend = "setuptools.build_meta" From 7a12a538a1757167089c5ebdd33ab2c8a2fd315f Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:54:36 -0400 Subject: [PATCH 395/401] go back to the old, deprecated format --- .github/workflows/build.yml | 6 +++--- pyproject.toml | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4a136bf..cb3bba8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -91,18 +91,18 @@ jobs: # we don't reuse the wheels so that all of the CI runs can happen concurrently - name: Install Python via venv - run: + run: | python3 -m venv .venv . .venv/bin/activate python3 -m pip install . - name: Test Python bindings - run: + run: | . .venv/bin/activate make -C python test - name: Compare OCaml and Python bindings - run: + run: | . .venv/bin/activate opam exec -- make -C test test diff --git a/pyproject.toml b/pyproject.toml index dd4a93d..6ef7d38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,7 @@ authors = [ { name="Michael Greenberg", email="michael@greenberg.science" }, { name="PaSh contributors" }, ] -license = "BSD-3-Clause" -license-files = ["COPYING"] +license = { file = "COPYING" } description = "Bindings for the dash shell as a library" readme = "README.md" requires-python = ">=3.7" From 632c3a972a2e126e32ee542bc75dba439067a472 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 16:55:51 -0400 Subject: [PATCH 396/401] reasonable setuptools version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6ef7d38..2561f05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,5 +21,5 @@ classifiers = [ "Bug Tracker" = "https://github.com/binpash/libdash/issues" [build-system] -requires = ["setuptools>=77.0.3"] +requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" From a7c8ccfa1a34e6ee67916ad078e331e66ed34755 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 17:12:58 -0400 Subject: [PATCH 397/401] try setting architecture correctly in setup.py --- setup.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 541a899..11aedbc 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,7 @@ from setuptools.command.build_py import build_py import os +import platform import shutil import subprocess import sys @@ -11,7 +12,7 @@ def try_exec(*cmds): proc = subprocess.run(cmds) - + if proc.returncode != 0: print('`{}` failed'.format(' '.join(cmds)), file=sys.stderr) proc.check_returncode() @@ -19,12 +20,20 @@ def try_exec(*cmds): class libdash_build_py(build_py): def run(self): build_py.run(self) - - if sys.platform == 'darwin': + + if sys.platform == 'darwin': libtoolize = "glibtoolize" + + target_arch = os.environ.get("ARCHFLAGS") + if not target_arch: + target_arch = f"-arch {platform.machine()}" + os.environ["ARCHFLAGS"] = target_arch + + if host_arch not in target_arch and "MACOSX_DEPLOYMENT_TARGET" not in os.environ: + os.environ["MACOSX_DEPLOYMENT_TARGET"] = "11.0" else: libtoolize = "libtoolize" - + try_exec(libtoolize) try_exec('aclocal') try_exec('autoheader') From 01b1a50d48ed7a0a60dea0a833e5b577f89c00ee Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 17:16:45 -0400 Subject: [PATCH 398/401] whoops --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 11aedbc..628de86 100644 --- a/setup.py +++ b/setup.py @@ -25,10 +25,10 @@ def run(self): libtoolize = "glibtoolize" target_arch = os.environ.get("ARCHFLAGS") + host_arch = platform.machine() if not target_arch: - target_arch = f"-arch {platform.machine()}" + target_arch = f"-arch {host_arch}" os.environ["ARCHFLAGS"] = target_arch - if host_arch not in target_arch and "MACOSX_DEPLOYMENT_TARGET" not in os.environ: os.environ["MACOSX_DEPLOYMENT_TARGET"] = "11.0" else: From e88a67a0e752576ca6472ea571a663d784f11d75 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 17:47:52 -0400 Subject: [PATCH 399/401] try skipping delocation --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cb3bba8..e2ba507 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,6 +46,8 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v3.4.1 + env: + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" - name: Upload binary wheel uses: actions/upload-artifact@v4 From 62c8e6aed7cf70b771c2a2bd4e6ae4c391e5be40 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 18:14:31 -0400 Subject: [PATCH 400/401] debug CI host setup --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 628de86..0f3a5a4 100644 --- a/setup.py +++ b/setup.py @@ -31,9 +31,12 @@ def run(self): os.environ["ARCHFLAGS"] = target_arch if host_arch not in target_arch and "MACOSX_DEPLOYMENT_TARGET" not in os.environ: os.environ["MACOSX_DEPLOYMENT_TARGET"] = "11.0" + + print(f'ARCHFLAGS: {target_arch} MACOSX_DEPLOYMENT_TARGET: {os.environ["MACOSX_DEPLOYMENT_TARGET"]}') else: libtoolize = "libtoolize" + try_exec('arch') try_exec(libtoolize) try_exec('aclocal') try_exec('autoheader') From e3afbf2319f3fafa8008dbc7b4168fe461df938e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Thu, 21 May 2026 18:22:14 -0400 Subject: [PATCH 401/401] fix os.environ direct index --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0f3a5a4..d248ae8 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def run(self): if host_arch not in target_arch and "MACOSX_DEPLOYMENT_TARGET" not in os.environ: os.environ["MACOSX_DEPLOYMENT_TARGET"] = "11.0" - print(f'ARCHFLAGS: {target_arch} MACOSX_DEPLOYMENT_TARGET: {os.environ["MACOSX_DEPLOYMENT_TARGET"]}') + print(f'ARCHFLAGS: {target_arch} MACOSX_DEPLOYMENT_TARGET: {os.environ.get("MACOSX_DEPLOYMENT_TARGET", "")}') else: libtoolize = "libtoolize"