diff --git a/moses/ems/support/split-sentences.perl b/moses/ems/support/split-sentences.perl index ae510ce..bae7953 100755 --- a/moses/ems/support/split-sentences.perl +++ b/moses/ems/support/split-sentences.perl @@ -96,12 +96,16 @@ if ($MODE eq "base64documents") { while () { my $line = decode_base64($_); - open(my $fh, ":encoding(UTF-8)", \$out) or die $!; + &split_single_document; + } + print(encode_base64($out, "") . "\n"); } } else { - print &split_single_document(*STDIN); + &split_single_document; } @@ -109,18 +113,16 @@ sub split_single_document { # Argument is an open file handle. Lines will be merged unless a line with # just

or similar tag or a blank line. Or unless $KEEP_LINES # is True. - my ($fh) = @_; my $text = ""; - my $out = ""; # Loop over text, add lines together until we get a blank line or a

- while (<$fh>) { + while () { chomp; if ($KEEP_LINES) { - $out .= &split_block($_,""); + print &split_block($_,""); } elsif (/^<.+>$/ || /^\s*$/) { # Time to process this block; we've hit a blank or

- $out .= &split_block($text, $_); - $out .= "

\n" if $NOP == 0 && (/^\s*$/ && $text); ## If we have text followed by

+ print &split_block($text, $_); + print "

\n" if $NOP == 0 && (/^\s*$/ && $text); ## If we have text followed by

$text = ""; } else { # Append the text, with a space. @@ -128,8 +130,7 @@ sub split_single_document { } } # Do the leftover text. - $out .= &split_block($text,"") if $text; - return $out; + print &split_block($text,"") if $text; } sub split_block { @@ -268,7 +269,7 @@ sub preprocess { # We stopped one token from the end to allow for easy look-ahead. # Append it now. - $text = $text.$words[$i]; + $text = $text.$words[$i] if scalar(@words) > 0; # Clean up spaces at head and tail of each line as well as any double-spacing $text =~ s/ +/ /g;