diff --git a/Cargo.lock b/Cargo.lock index fef2b361..bd5b057d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -376,9 +376,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags", ] @@ -426,7 +426,7 @@ dependencies = [ [[package]] name = "roc_std" version = "0.0.1" -source = "git+https://github.com/roc-lang/roc.git#d746e2a0414448e08183e6cf5e4765601e396af3" +source = "git+https://github.com/roc-lang/roc.git#0be7b3f7501501f6ced1648f9c2644560d338505" dependencies = [ "arrayvec", "static_assertions", @@ -435,7 +435,7 @@ dependencies = [ [[package]] name = "roc_std_heap" version = "0.0.1" -source = "git+https://github.com/roc-lang/roc.git#d746e2a0414448e08183e6cf5e4765601e396af3" +source = "git+https://github.com/roc-lang/roc.git#0be7b3f7501501f6ced1648f9c2644560d338505" dependencies = [ "memmap2", "roc_std", diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index 8a0016a9..46c1c204 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -6,7 +6,8 @@ #![allow(non_snake_case)] #![allow(improper_ctypes)] use core::ffi::c_void; -use roc_std::{RocBox, RocList, RocResult, RocStr, ReadOnlyRocList, ReadOnlyRocStr}; +use core::panic; +use roc_std::{ReadOnlyRocList, ReadOnlyRocStr, RocBox, RocList, RocRefcounted, RocResult, RocStr}; use roc_std_heap::ThreadSafeRefcountedResourceHeap; use std::borrow::{Borrow, Cow}; use std::ffi::OsStr; @@ -29,10 +30,10 @@ thread_local! { .unwrap(); } -static ARGS : OnceLock> = OnceLock::new(); +static ARGS: OnceLock> = OnceLock::new(); -fn file_heap() -> &'static ThreadSafeRefcountedResourceHeap> { - static FILE_HEAP: OnceLock>> = OnceLock::new(); +fn file_heap() -> &'static ThreadSafeRefcountedResourceHeap { + static FILE_HEAP: OnceLock> = OnceLock::new(); FILE_HEAP.get_or_init(|| { let DEFAULT_MAX_FILES = 65536; let max_files = env::var("ROC_BASIC_CLI_MAX_FILES") @@ -303,6 +304,7 @@ pub fn init() { roc_fx_fileReadBytes as _, roc_fx_fileReader as _, roc_fx_fileReadLine as _, + roc_fx_fileReadByteBuf as _, roc_fx_fileDelete as _, roc_fx_cwd as _, roc_fx_posixTime as _, @@ -337,7 +339,8 @@ pub fn init() { #[no_mangle] pub extern "C" fn rust_main(args: ReadOnlyRocList) -> i32 { - ARGS.set(args).unwrap_or_else(|_| panic!("only one thread running, must be able to set args")); + ARGS.set(args) + .unwrap_or_else(|_| panic!("only one thread running, must be able to set args")); init(); extern "C" { @@ -375,7 +378,9 @@ pub extern "C" fn roc_fx_envDict() -> RocList<(RocStr, RocStr)> { #[no_mangle] pub extern "C" fn roc_fx_args() -> ReadOnlyRocList { // Note: the clone here is no-op since the refcount is readonly. Just goes from &RocList to RocList. - ARGS.get().expect("args was set during init and must be here").clone() + ARGS.get() + .expect("args was set during init and must be here") + .clone() } #[no_mangle] @@ -571,34 +576,43 @@ fn path_from_roc_path(bytes: &RocList) -> Cow<'_, Path> { #[no_mangle] pub extern "C" fn roc_fx_fileReadBytes(roc_path: &RocList) -> RocResult, RocStr> { - // TODO: write our own duplicate of `read_to_end` that directly fills a `RocList`. - // This adds an extra O(n) copy. - let mut bytes = Vec::new(); - match File::open(path_from_roc_path(roc_path)) { - Ok(mut file) => match file.read_to_end(&mut bytes) { - Ok(_bytes_read) => RocResult::ok(RocList::from(bytes.as_slice())), - Err(err) => RocResult::err(toRocReadError(err)), - }, + Ok(mut file) => { + let size = file + .metadata() + .map(|m| m.len()) + .expect("TODO: make robust: file has not size?"); + let mut buf_list = RocList::with_capacity(size as usize); + let buf_slice: &mut [u8] = unsafe { + std::slice::from_raw_parts_mut(buf_list.as_mut_ptr(), buf_list.capacity()) + }; + + match file.read_exact(buf_slice) { + Ok(()) => { + let out_list = unsafe { + RocList::from_raw_parts( + buf_list.as_mut_ptr(), + buf_list.capacity(), + buf_list.capacity(), + ) + }; + std::mem::forget(buf_list); + + RocResult::ok(out_list) + } + Err(err) => RocResult::err(toRocReadError(err)), + } + } Err(err) => RocResult::err(toRocReadError(err)), } } #[no_mangle] -pub extern "C" fn roc_fx_fileReader( - roc_path: &RocList, - size: u64, -) -> RocResult, RocStr> { +pub extern "C" fn roc_fx_fileReader(roc_path: &RocList) -> RocResult, RocStr> { match File::open(path_from_roc_path(roc_path)) { Ok(file) => { - let buf_reader = if size > 0 { - BufReader::with_capacity(size as usize, file) - } else { - BufReader::new(file) - }; - let heap = file_heap(); - let alloc_result = heap.alloc_for(buf_reader); + let alloc_result = heap.alloc_for(file); match alloc_result { Ok(out) => RocResult::ok(out), Err(err) => RocResult::err(toRocReadError(err)), @@ -609,47 +623,119 @@ pub extern "C" fn roc_fx_fileReader( } #[no_mangle] -pub extern "C" fn roc_fx_fileReadLine(data: RocBox<()>) -> RocResult, RocStr> { - let buf_reader: &mut BufReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); +pub extern "C" fn roc_fx_fileReadLine( + data: RocBox<()>, + //TODO: this would allow the internal buffer to get much much bigger, is this acceptable? SHould we maybe include a warning about that + buffer: RocList, +) -> RocResult, RocStr> { + let file: &mut File = ThreadSafeRefcountedResourceHeap::box_to_resource(data); - let mut buffer = RocList::empty(); - match read_until(buf_reader, b'\n', &mut buffer) { - Ok(..) => { + let buffer = if buffer.is_unique() { + buffer + } else { + RocList::with_capacity(8000) + }; + match read_until(file, b'\n', buffer) { + Ok(mut buffer) => { + buffer.inc(); // Note: this returns an empty list when no bytes were read, e.g. End Of File RocResult::ok(buffer) } Err(err) => RocResult::err(err.to_string().as_str().into()), } } +// We should be able to ask the user to "return" their buffer. So that if they do they get the same buffer back and we don't have to re-allocate. Should be a nice optimization. +// TODO: If the capacity is larger but the len isn't right we should be able to extend the len to match. I don't have access to a function that does that though +#[no_mangle] +pub extern "C" fn roc_fx_fileReadByteBuf( + reader: RocBox<()>, + buf: &mut RocList, +) -> RocResult, RocStr> { + let file: &mut File = ThreadSafeRefcountedResourceHeap::box_to_resource(reader); + + let canUseInternal = buf.is_unique(); + + if canUseInternal { + unsafe { + //This ensures we always expand the buffer to the full capacity of the list + let buf_slice: &mut [u8] = + std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity()); + loop { + let read = match file.read(buf_slice) { + Ok(n) => n, + Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, + Err(e) => return RocResult::err(e.to_string().as_str().into()), + }; + let mut roc_list = RocList::from_raw_parts(buf.as_mut_ptr(), read, buf.capacity()); + roc_list.inc(); + + return RocResult::ok(roc_list); + } + } + } else { + // return RocResult::err("not unique".into()); + unsafe { + //Make a new list + let mut list = RocList::with_capacity(buf.capacity()); + //get a slice to the full memmory of the list + let slice: &mut [u8] = + std::slice::from_raw_parts_mut(list.as_mut_ptr(), list.capacity()); + loop { + let read = match file.read(slice) { + Ok(n) => n, + Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, + Err(e) => return RocResult::err(e.to_string().as_str().into()), + }; + //update the length based on amount read + let roc_list = RocList::from_raw_parts(list.as_mut_ptr(), read, list.capacity()); + std::mem::forget(list); + return RocResult::ok(roc_list); + } + } + } +} -fn read_until( +/// Reads until the provided delim expanding the roc buffer as it goes. Returns a new reference to the same roc buffer but with a length exactly as long as the +fn read_until( r: &mut R, delim: u8, - buf: &mut RocList, -) -> io::Result { + mut buf: RocList, +) -> io::Result> { let mut read = 0; + let og_capacity = buf.capacity(); loop { let (done, used) = { - let available = match r.fill_buf() { + //get a slice between the end of the last read and the end of the buffer + let buf_slice: &mut [u8] = unsafe { + std::slice::from_raw_parts_mut(buf.as_mut_ptr().add(read), buf.capacity() - read) + }; + let this_read = match r.read(buf_slice) { Ok(n) => n, Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, Err(e) => return Err(e), }; - match memchr::memchr(delim, available) { - Some(i) => { - buf.extend_from_slice(&available[..=i]); - (true, i + 1) - } - None => { - buf.extend_from_slice(available); - (false, available.len()) + //if we read 0 bytes we are done because that's EOF + if this_read == 0 { + (true, 0) + } else { + let readSlice: &[u8] = &buf_slice[..this_read]; + match memchr::memchr(delim, readSlice) { + Some(i) => (true, i + 1), + None => (false, this_read), } } }; - r.consume(used); read += used; if done || used == 0 { - return Ok(read); + let out = unsafe { RocList::from_raw_parts(buf.as_mut_ptr(), read, buf.capacity()) }; + //Don't drop the buffer because we are returning it + std::mem::forget(buf); + return Ok(out); + } + + // Ensure we have enough capacity for the next read + if buf.capacity() < read + og_capacity { + buf.reserve(og_capacity); } } } @@ -1043,9 +1129,9 @@ pub extern "C" fn roc_fx_tcpReadUntil( let stream: &mut BufReader = ThreadSafeRefcountedResourceHeap::box_to_resource(stream); - let mut buffer = RocList::empty(); - match read_until(stream, byte, &mut buffer) { - Ok(_) => RocResult::ok(buffer), + let buffer = RocList::with_capacity(8000); + match read_until(stream, byte, buffer) { + Ok(buffer) => RocResult::ok(buffer), Err(err) => RocResult::err(to_tcp_stream_err(err)), } } diff --git a/crates/roc_host_bin/src/main.rs b/crates/roc_host_bin/src/main.rs index 58922471..c4f5fce5 100644 --- a/crates/roc_host_bin/src/main.rs +++ b/crates/roc_host_bin/src/main.rs @@ -1,4 +1,4 @@ -use roc_std::{RocList, RocStr, ReadOnlyRocList, ReadOnlyRocStr}; +use roc_std::{ReadOnlyRocList, ReadOnlyRocStr, RocList, RocStr}; use std::borrow::Borrow; fn main() { diff --git a/crates/roc_host_lib/src/lib.rs b/crates/roc_host_lib/src/lib.rs index af9857d4..33bd1e8d 100644 --- a/crates/roc_host_lib/src/lib.rs +++ b/crates/roc_host_lib/src/lib.rs @@ -1,12 +1,17 @@ -use roc_std::{RocList, RocStr, ReadOnlyRocList, ReadOnlyRocStr}; +use roc_std::{ReadOnlyRocList, ReadOnlyRocStr, RocList, RocStr}; use std::borrow::Borrow; +/// # Safety +/// This function is the entry point for the program, it will be linked by roc using the legacy linker +/// to produce the final executable. +/// +/// Note we use argc and argv to pass arguments to the program instead of std::env::args(). #[no_mangle] pub unsafe extern "C" fn main(argc: usize, argv: *const *const i8) -> i32 { let args = std::slice::from_raw_parts(argv, argc); let mut args: RocList = args - .into_iter() + .iter() .map(|&c_ptr| { let c_str = std::ffi::CStr::from_ptr(c_ptr); let roc_str = RocStr::from(c_str.to_string_lossy().borrow()); diff --git a/platform/File.roc b/platform/File.roc index 59b7662c..114feb51 100644 --- a/platform/File.roc +++ b/platform/File.roc @@ -7,6 +7,7 @@ module [ readUtf8!, readBytes!, # read, TODO fix "Ability specialization is unknown - code generation cannot proceed!: DeriveError(UnboundVar)" + read!, delete!, isDir!, isFile!, @@ -15,10 +16,12 @@ module [ Reader, openReader!, openReaderWithCapacity!, + openReaderWithBuf!, readLine!, + readBytesToBuf!, hardLink!, ] - +# import Shared exposing [ByteReader] import Path exposing [Path, MetadataErr] import InternalFile import PlatformTasks @@ -206,47 +209,85 @@ type! : Str => Result [IsFile, IsDir, IsSymLink] [PathErr MetadataErr] type! = \path -> Path.type! (Path.fromStr path) -Reader := { reader : PlatformTasks.FileReader, path : Path } +Reader := { reader : PlatformTasks.FileReader, path : Path, buffer : List U8 } ## Try to open a `File.Reader` for buffered (= part by part) reading given a path string. ## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. ## -## This uses [rust's std::io::BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html). -## ## Use [readUtf8!] if you want to get the entire file contents at once. openReader! : Str => Result Reader [GetFileReadErr Path ReadErr] openReader! = \pathStr -> path = Path.fromStr pathStr + buffer = List.withCapacity 8000 # 0 means with default capacity - PlatformTasks.fileReader! (Str.toUtf8 pathStr) 0 + PlatformTasks.fileReader! (Str.toUtf8 pathStr) |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) - |> Result.map \reader -> @Reader { reader, path } + |> Result.map \reader -> @Reader { reader, path, buffer } ## Try to open a `File.Reader` for buffered (= part by part) reading given a path string. ## The buffer will be created with the specified capacity. ## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. ## -## This uses [rust's std::io::BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html). -## ## Use [readUtf8!] if you want to get the entire file contents at once. openReaderWithCapacity! : Str, U64 => Result Reader [GetFileReadErr Path ReadErr] openReaderWithCapacity! = \pathStr, capacity -> path = Path.fromStr pathStr + # 8k is the default in rust and seems reasonable + buffer = List.withCapacity (if capacity == 0 then 8000 else capacity) - PlatformTasks.fileReader! (Str.toUtf8 pathStr) capacity + PlatformTasks.fileReader! (Str.toUtf8 pathStr) |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) - |> Result.map \reader -> @Reader { reader, path } + |> Result.map \reader -> @Reader { reader, path, buffer } ## Try to read a line from a file given a Reader. ## The line will be provided as the list of bytes (`List U8`) until a newline (`0xA` byte). ## This list will be empty when we reached the end of the file. ## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. ## -## This uses [rust's `BufRead::read_line`](https://doc.rust-lang.org/std/io/trait.BufRead.html#method.read_line). -## ## Use [readUtf8!] if you want to get the entire file contents at once. readLine! : Reader => Result (List U8) [FileReadErr Path Str] -readLine! = \@Reader { reader, path } -> - PlatformTasks.fileReadLine! reader +readLine! = \@Reader { reader, path, buffer } -> + PlatformTasks.fileReadLine! reader buffer |> Result.mapErr \err -> FileReadErr path err + +## Try to read bytes from a file given a Reader. +## Returns a list of bytes (`List U8`) read from the file. +## The list will be empty when we reach the end of the file. +## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. +## +## NOTE: Avoid storing a reference to the buffer returned by this function beyond the next call to try readBytes. +## That will allow the buffer to be reused and avoid unnecessary allocations. +## +## Use [readUtf8!] if you want to get the entire file contents at once as a UTF-8 string. +read! : Reader => Result (List U8) [FileReadErr Path Str] +read! = \@Reader { reader, path, buffer } -> + PlatformTasks.fileReadByteBuf! reader buffer + |> Result.mapErr \err -> FileReadErr path err + +## Try to open a `File.Reader` using the provided buffer as the internal buffer. +## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. +## +## Use [readUtf8!] if you want to get the entire file contents at once. +openReaderWithBuf! : Str, List U8 => Result Reader [GetFileReadErr Path ReadErr] +openReaderWithBuf! = \pathStr, buffer -> + path = Path.fromStr pathStr + + PlatformTasks.fileReader! (Str.toUtf8 pathStr) + |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) + |> Result.map \reader -> @Reader { reader, path, buffer } + + +## Try to read bytes from a file given a Reader. +## Returns a list of bytes (`List U8`) read from the file. +## The list will be empty when we reach the end of the file. +## This function is exists for very specific use cases where you want to use multiple buffers with a single reader +## +## Prefer [File.readBytes!] which will automatically reuse the reader's internalbuffer. +readBytesToBuf! : Reader => Result (List U8) [FileReadErr Path Str] +readBytesToBuf! = \@Reader { reader, path, buffer } -> + PlatformTasks.fileReadByteBuf! reader buffer + |> Result.mapErr \err -> FileReadErr path err + + + diff --git a/platform/PlatformTasks.roc b/platform/PlatformTasks.roc index b7b27b8a..15f5314f 100644 --- a/platform/PlatformTasks.roc +++ b/platform/PlatformTasks.roc @@ -31,6 +31,7 @@ hosted PlatformTasks fileWriteBytes!, fileReader!, fileReadLine!, + fileReadByteBuf!, pathType!, posixTime!, tcpConnect!, @@ -84,8 +85,9 @@ fileDelete! : List U8 => Result {} Str fileReadBytes! : List U8 => Result (List U8) Str FileReader := Box {} -fileReader! : List U8, U64 => Result FileReader Str -fileReadLine! : FileReader => Result (List U8) Str +fileReader! : List U8 => Result FileReader Str +fileReadLine! : FileReader,List U8 => Result (List U8) Str +fileReadByteBuf! : FileReader, List U8=> Result (List U8) Str envDict! : {} => List (Str, Str) envVar! : Str => Result Str {}