diff --git a/Cargo.lock b/Cargo.lock index b817044..b9dddc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1586,6 +1586,7 @@ dependencies = [ "env_logger", "fn-traits", "hyperlight-host", + "hyperlight-js-common", "hyperlight-js-runtime", "lazy_static", "libc", @@ -1613,6 +1614,10 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "hyperlight-js-common" +version = "0.2.5" + [[package]] name = "hyperlight-js-runtime" version = "0.2.5" @@ -1626,7 +1631,10 @@ dependencies = [ "hashbrown 0.17.1", "hex", "hmac", + "hyperlight-common", + "hyperlight-guest", "hyperlight-guest-bin", + "hyperlight-js-common", "rquickjs", "serde", "serde_json", @@ -1885,6 +1893,7 @@ version = "0.2.5" dependencies = [ "arc-swap", "hyperlight-js", + "hyperlight-js-common", "napi", "napi-build", "napi-derive", diff --git a/Cargo.toml b/Cargo.toml index 7cdb3ff..d042e5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["src/hyperlight-js", "src/js-host-api", "src/hyperlight-js-runtime"] +members = ["src/hyperlight-js", "src/js-host-api", "src/hyperlight-js-runtime", "src/hyperlight-js-common"] [workspace.package] version = "0.2.5" @@ -11,10 +11,13 @@ repository = "https://github.com/hyperlight-dev/hyperlight-js" readme = "README.md" [workspace.dependencies] +hyperlight-common = { version = "0.15.0", default-features = false } hyperlight-guest-bin = { version = "0.15.0", features = ["libc"] } +hyperlight-guest = { version = "0.15.0" } hyperlight-host = { version = "0.15.0", default-features = false } hyperlight-js = { version = "0.2.5", path = "src/hyperlight-js" } +hyperlight-js-common = { version = "0.2.5", path = "src/hyperlight-js-common" } hyperlight-js-runtime = { version = "0.2.5", path = "src/hyperlight-js-runtime" } [profile.dev] diff --git a/docs/release.md b/docs/release.md index fa20969..0520c38 100644 --- a/docs/release.md +++ b/docs/release.md @@ -51,7 +51,7 @@ When this job is done, a new [GitHub release](https://github.com/hyperlight-dev/ This release contains the benchmark results and the source code for the release along with automatically generated release notes. -In addition the hyperlight-js crates will be published to crates.io. You can verify this by going to the [hyperlight-js page on crates.io](https://crates.io/crates/hyperlight-js) and checking that the new version is listed. +In addition, the hyperlight-js crates will be published to crates.io in dependency order (`hyperlight-js-common` → `hyperlight-js-runtime` → `hyperlight-js`). You can verify this by going to the [hyperlight-js page on crates.io](https://crates.io/crates/hyperlight-js) and checking that the new version is listed. The npm packages (`@hyperlight-dev/js-host-api` and platform-specific binaries) are also published automatically as part of this workflow. Publishing uses [npm trusted publishing (OIDC)](https://docs.npmjs.com/trusted-publishers) — no `NPM_TOKEN` secret is needed for the `CreateRelease` workflow. Provenance attestations are generated automatically. diff --git a/src/hyperlight-js-common/Cargo.toml b/src/hyperlight-js-common/Cargo.toml new file mode 100644 index 0000000..decbc97 --- /dev/null +++ b/src/hyperlight-js-common/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "hyperlight-js-common" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +readme.workspace = true +description = """ +Shared constants and binary framing utilities for hyperlight-js. + +This crate is `no_std`-compatible (with `alloc`) so it can be used by both +the host-side `hyperlight-js` crate and the guest-side `hyperlight-js-runtime` +crate (which compiles for `x86_64-hyperlight-none`). +""" + +[dependencies] +# no_std + alloc only — no std, no serde, no anyhow diff --git a/src/hyperlight-js-common/src/lib.rs b/src/hyperlight-js-common/src/lib.rs new file mode 100644 index 0000000..acf8d3d --- /dev/null +++ b/src/hyperlight-js-common/src/lib.rs @@ -0,0 +1,493 @@ +/* +Copyright 2026 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Shared constants and binary framing utilities for hyperlight-js. +//! +//! This crate is the **single source of truth** for the wire-format used to +//! pass binary data (`Uint8Array` / `Buffer`) between guest JavaScript and +//! host functions. Both `hyperlight-js` (host) and `hyperlight-js-runtime` +//! (guest, `no_std`) depend on this crate instead of duplicating the logic. +//! +//! # Wire Format — Binary Sidecar +//! +//! Binary blobs are packed into a length-prefixed sidecar: +//! +//! ```text +//! [count: u32-le] [len0: u32-le] [bytes0...] [len1: u32-le] [bytes1...] ... +//! ``` +//! +//! # Wire Format — Tagged Returns +//! +//! Host function returns use a single-byte tag prefix: +//! - `0x00` + payload → JSON string follows +//! - `0x01` + payload → raw binary follows (single buffer return) +//! - `0x02` + sidecar + JSON → JSON with binary blobs in sidecar +//! +//! The `0x02` tag uses the same sidecar format as arguments: +//! `[TAG_JSON_WITH_BINARIES] [sidecar_len: u32-le] [sidecar...] [json...]` + +#![no_std] +extern crate alloc; + +use alloc::fmt; +use alloc::string::String; +use alloc::vec::Vec; + +// ── Constants ──────────────────────────────────────────────────────── + +/// Tag byte indicating the return payload is JSON. +pub const TAG_JSON: u8 = 0x00; + +/// Tag byte indicating the return payload is raw binary. +pub const TAG_BINARY: u8 = 0x01; + +/// Tag byte indicating the return payload is JSON with an embedded +/// binary sidecar. The format is: +/// `[0x02] [sidecar_len: u32-le] [sidecar_bytes...] [json_bytes...]` +/// +/// The JSON may contain `{"__bin__": N}` placeholders that reference +/// blobs in the sidecar, exactly like the argument direction. +pub const TAG_JSON_WITH_BINARIES: u8 = 0x02; + +/// JSON key used as a placeholder in serialised arguments to mark the +/// position of a binary blob that has been moved to the sidecar channel. +/// The value is the zero-based index into the sidecar blob array. +/// +/// **Reserved key:** Do not use `"__bin__"` as a regular key in JSON +/// data passed through `FnReturn::JsonWithBinaries` — it will be +/// interpreted as a binary placeholder. +/// +/// Example: `{"__bin__": 0}` means "insert sidecar blob 0 here". +pub const PLACEHOLDER_BIN: &str = "__bin__"; + +// ── Error type ─────────────────────────────────────────────────────── + +/// Lightweight decoding error — `no_std`-compatible (no `anyhow`, no `std`). +/// +/// Both the host (`hyperlight-js`) and guest (`hyperlight-js-runtime`) +/// convert this into their own error types via `From` impls. +#[derive(Debug, Clone)] +pub struct DecodeError(String); + +impl DecodeError { + /// Create a new decode error with the given message. + pub fn new(msg: impl Into) -> Self { + Self(msg.into()) + } +} + +impl fmt::Display for DecodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +// ── Encoding ───────────────────────────────────────────────────────── + +/// Encodes multiple binary blobs into the sidecar format. +/// +/// Format: `[count: u32-le] [len0: u32-le] [bytes0...] [len1: u32-le] [bytes1...] ...` +/// +/// Accepts any slice of items that implement `AsRef<[u8]>` — e.g. +/// `&[Vec]`, `&[&[u8]]`, `&[Box<[u8]>]` — so callers don't need to +/// build an intermediate `Vec<&[u8]>` just to satisfy the signature. +pub fn encode_binaries>(blobs: &[B]) -> Result, DecodeError> { + // Validate that count fits in u32 — the wire format uses u32-le. + if blobs.len() > u32::MAX as usize { + return Err(DecodeError::new(alloc::format!( + "encode_binaries: blob count ({}) exceeds u32::MAX", + blobs.len() + ))); + } + + // Calculate total size: 4 bytes for count + (4 bytes length + data) per blob. + // Use checked arithmetic to detect overflow — a corrupt or adversarial + // input could otherwise wrap `usize` and cause an undersized allocation. + let total_size = blobs + .iter() + .try_fold(4usize, |acc, b| { + acc.checked_add(4)?.checked_add(b.as_ref().len()) + }) + .ok_or_else(|| DecodeError::new("encode_binaries: total sidecar size overflowed usize"))?; + + let mut buf = Vec::with_capacity(total_size); + + // Write count + buf.extend_from_slice(&(blobs.len() as u32).to_le_bytes()); + + // Write each blob with length prefix + for blob in blobs { + let bytes = blob.as_ref(); + if bytes.len() > u32::MAX as usize { + return Err(DecodeError::new(alloc::format!( + "encode_binaries: blob length ({}) exceeds u32::MAX", + bytes.len() + ))); + } + buf.extend_from_slice(&(bytes.len() as u32).to_le_bytes()); + buf.extend_from_slice(bytes); + } + + Ok(buf) +} + +/// Encodes a JSON return value with the appropriate tag. +pub fn encode_json_return(json: &str) -> Vec { + let mut buf = Vec::with_capacity(1 + json.len()); + buf.push(TAG_JSON); + buf.extend_from_slice(json.as_bytes()); + buf +} + +/// Encodes a binary return value with the appropriate tag. +pub fn encode_binary_return(data: &[u8]) -> Vec { + let mut buf = Vec::with_capacity(1 + data.len()); + buf.push(TAG_BINARY); + buf.extend_from_slice(data); + buf +} + +/// Encodes a JSON return value that contains binary sidecar data. +/// +/// Format: `[TAG_JSON_WITH_BINARIES] [sidecar_len: u32-le] [sidecar...] [json...]` +/// +/// The `sidecar` should be the output of [`encode_binaries`] and the +/// `json` string should contain `{"__bin__": N}` placeholders that +/// reference blobs in the sidecar. +/// +/// Returns an error if the sidecar length exceeds `u32::MAX`. +pub fn encode_json_with_binaries_return( + json: &str, + sidecar: &[u8], +) -> Result, DecodeError> { + let sidecar_len: u32 = sidecar + .len() + .try_into() + .map_err(|_| DecodeError::new("sidecar length exceeds u32::MAX"))?; + // 1 (tag) + 4 (sidecar len) + sidecar + json + let mut buf = Vec::with_capacity(1 + 4 + sidecar.len() + json.len()); + buf.push(TAG_JSON_WITH_BINARIES); + buf.extend_from_slice(&sidecar_len.to_le_bytes()); + buf.extend_from_slice(sidecar); + buf.extend_from_slice(json.as_bytes()); + Ok(buf) +} + +// ── Decoding ───────────────────────────────────────────────────────── + +/// Decodes the sidecar format into individual binary blobs. +/// +/// Returns a [`DecodeError`] if the buffer is malformed (truncated, +/// invalid lengths, or suspiciously large blob counts). +pub fn decode_binaries(data: &[u8]) -> Result>, DecodeError> { + if data.len() < 4 { + return Err(DecodeError::new( + "Binary sidecar too short for count header", + )); + } + + let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize; + + // Sanity check: each blob needs at least 4 bytes for length header. + // This prevents allocation of a huge Vec when count is maliciously large. + let max_possible_blobs = (data.len().saturating_sub(4)) / 4; + if count > max_possible_blobs { + return Err(DecodeError::new(alloc::format!( + "Binary sidecar count ({count}) exceeds maximum possible ({max_possible_blobs})" + ))); + } + + let mut offset: usize = 4; + let mut blobs = Vec::with_capacity(count); + + for i in 0..count { + let header_end = offset.checked_add(4).ok_or_else(|| { + DecodeError::new(alloc::format!( + "Binary sidecar offset overflow at blob {i} length header" + )) + })?; + if header_end > data.len() { + return Err(DecodeError::new(alloc::format!( + "Binary sidecar truncated at blob {i} length header" + ))); + } + + let len = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]) as usize; + offset += 4; + + let blob_end = offset.checked_add(len).ok_or_else(|| { + DecodeError::new(alloc::format!( + "Binary sidecar offset overflow at blob {i} data" + )) + })?; + if blob_end > data.len() { + return Err(DecodeError::new(alloc::format!( + "Binary sidecar truncated at blob {i} data (need {len} bytes, have {})", + data.len() - offset + ))); + } + + blobs.push(data[offset..blob_end].to_vec()); + offset = blob_end; + } + + // Reject trailing data — the sidecar should be fully consumed. + // Trailing bytes could indicate a version mismatch or corruption. + if offset != data.len() { + return Err(DecodeError::new(alloc::format!( + "Binary sidecar has {} trailing bytes after all {count} blobs", + data.len() - offset + ))); + } + + Ok(blobs) +} + +/// Maximum recursion depth for JSON tree traversal. +/// Shared across host and NAPI layers to limit stack usage. +pub const MAX_JSON_DEPTH: usize = 64; + +/// Result of decoding a tagged return value. +#[derive(Debug, Clone)] +pub enum FnReturn { + /// JSON string payload (no embedded binary data). + Json(String), + /// Raw binary payload (single buffer return). + Binary(Vec), + /// JSON string payload with binary sidecar. + /// + /// The JSON contains `{"__bin__": N}` placeholders referencing + /// blobs in the sidecar `Vec` (packed with [`encode_binaries`]). + JsonWithBinaries(String, Vec), +} + +/// Decodes a tagged return value from the host. +/// +/// The first byte is a tag (see [`TAG_JSON`] / [`TAG_BINARY`]), +/// the rest is the payload. +pub fn decode_return(data: &[u8]) -> Result { + if data.is_empty() { + return Err(DecodeError::new("Empty return payload")); + } + + match data[0] { + TAG_JSON => { + let json = core::str::from_utf8(&data[1..]).map_err(|e| { + DecodeError::new(alloc::format!("Invalid UTF-8 in JSON return: {e}")) + })?; + Ok(FnReturn::Json(json.into())) + } + TAG_BINARY => Ok(FnReturn::Binary(data[1..].to_vec())), + TAG_JSON_WITH_BINARIES => { + // [0x02] [sidecar_len: u32-le] [sidecar...] [json...] + if data.len() < 5 { + return Err(DecodeError::new( + "JSON-with-binaries return too short for sidecar length header", + )); + } + let sidecar_len = u32::from_le_bytes([data[1], data[2], data[3], data[4]]) as usize; + let sidecar_end = 5usize.checked_add(sidecar_len).ok_or_else(|| { + DecodeError::new("JSON-with-binaries sidecar length overflows usize") + })?; + if data.len() < sidecar_end { + return Err(DecodeError::new(alloc::format!( + "JSON-with-binaries return truncated: need {sidecar_end} bytes, have {}", + data.len() + ))); + } + let sidecar = data[5..sidecar_end].to_vec(); + let json = core::str::from_utf8(&data[sidecar_end..]).map_err(|e| { + DecodeError::new(alloc::format!( + "Invalid UTF-8 in JSON-with-binaries return: {e}" + )) + })?; + Ok(FnReturn::JsonWithBinaries(json.into(), sidecar)) + } + tag => Err(DecodeError::new(alloc::format!( + "Unknown return tag: 0x{tag:02x}" + ))), + } +} + +// ── Tests ──────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + extern crate alloc; + use alloc::string::ToString; + use alloc::vec; + use alloc::vec::Vec; + + use super::*; + + #[test] + fn test_encode_decode_empty() { + let encoded = encode_binaries::<&[u8]>(&[]).unwrap(); + assert_eq!(encoded, vec![0, 0, 0, 0]); // count = 0 + + let decoded = decode_binaries(&encoded).unwrap(); + assert!(decoded.is_empty()); + } + + #[test] + fn test_encode_decode_single() { + let blob = b"hello"; + let encoded = encode_binaries(&[blob]).unwrap(); + + // count=1, len=5, "hello" + let expected: Vec = vec![1, 0, 0, 0, 5, 0, 0, 0, b'h', b'e', b'l', b'l', b'o']; + assert_eq!(encoded, expected); + + let decoded = decode_binaries(&encoded).unwrap(); + assert_eq!(decoded, vec![b"hello".to_vec()]); + } + + #[test] + fn test_encode_decode_multiple() { + let blobs: &[&[u8]] = &[b"abc", b"", b"xy"]; + let encoded = encode_binaries(blobs).unwrap(); + + let decoded = decode_binaries(&encoded).unwrap(); + assert_eq!(decoded, vec![b"abc".to_vec(), b"".to_vec(), b"xy".to_vec()]); + } + + #[test] + fn test_encode_decode_vec_of_vecs() { + let blobs: Vec> = vec![b"ABC".to_vec(), b"XY".to_vec()]; + let encoded = encode_binaries(&blobs).unwrap(); + + let decoded = decode_binaries(&encoded).unwrap(); + assert_eq!(decoded, blobs); + } + + #[test] + fn test_decode_truncated_count() { + let result = decode_binaries(&[1, 2, 3]); + assert!(result.is_err()); + } + + #[test] + fn test_decode_truncated_length() { + // count=1 but no length header + let result = decode_binaries(&[1, 0, 0, 0]); + assert!(result.is_err()); + } + + #[test] + fn test_decode_truncated_data() { + // count=1, len=10 but only 3 bytes of data + let result = decode_binaries(&[1, 0, 0, 0, 10, 0, 0, 0, 1, 2, 3]); + assert!(result.is_err()); + } + + #[test] + fn test_decode_trailing_data() { + // Valid sidecar with one blob "abc" followed by trailing garbage + let mut data = encode_binaries(&[b"abc" as &[u8]]).unwrap(); + data.push(0xFF); // trailing byte + let result = decode_binaries(&data); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("trailing")); + } + + #[test] + fn test_return_json() { + let json = r#"{"result":42}"#; + let encoded = encode_json_return(json); + assert_eq!(encoded[0], TAG_JSON); + + match decode_return(&encoded).unwrap() { + FnReturn::Json(s) => assert_eq!(s, json), + _ => panic!("Expected JSON return"), + } + } + + #[test] + fn test_return_binary() { + let data = b"\x00\x01\x02\xff"; + let encoded = encode_binary_return(data); + assert_eq!(encoded[0], TAG_BINARY); + + match decode_return(&encoded).unwrap() { + FnReturn::Binary(b) => assert_eq!(b, data), + _ => panic!("Expected binary return"), + } + } + + #[test] + fn test_return_empty() { + let result = decode_return(&[]); + assert!(result.is_err()); + } + + #[test] + fn test_return_unknown_tag() { + let result = decode_return(&[0x99, 1, 2, 3]); + assert!(result.is_err()); + } + + #[test] + fn test_return_json_with_binaries() { + let json = r#"{"data":{"__bin__":0}}"#; + let sidecar = encode_binaries(&[b"hello" as &[u8]]).unwrap(); + let encoded = encode_json_with_binaries_return(json, &sidecar).unwrap(); + assert_eq!(encoded[0], TAG_JSON_WITH_BINARIES); + + match decode_return(&encoded).unwrap() { + FnReturn::JsonWithBinaries(j, s) => { + assert_eq!(j, json); + // Verify the sidecar round-trips correctly + let blobs = decode_binaries(&s).unwrap(); + assert_eq!(blobs, vec![b"hello".to_vec()]); + } + _ => panic!("Expected JsonWithBinaries return"), + } + } + + #[test] + fn test_return_json_with_binaries_empty_sidecar() { + let json = r#"{"result":42}"#; + let sidecar = encode_binaries::<&[u8]>(&[]).unwrap(); + let encoded = encode_json_with_binaries_return(json, &sidecar).unwrap(); + + match decode_return(&encoded).unwrap() { + FnReturn::JsonWithBinaries(j, s) => { + assert_eq!(j, json); + let blobs = decode_binaries(&s).unwrap(); + assert!(blobs.is_empty()); + } + _ => panic!("Expected JsonWithBinaries return"), + } + } + + #[test] + fn test_return_json_with_binaries_truncated() { + // Tag + only 3 bytes (need 4 for sidecar length) + let result = decode_return(&[TAG_JSON_WITH_BINARIES, 1, 2, 3]); + assert!(result.is_err()); + } + + #[test] + fn test_decode_error_display() { + let err = DecodeError::new("something went wrong"); + assert_eq!(err.to_string(), "something went wrong"); + } +} diff --git a/src/hyperlight-js-runtime/Cargo.toml b/src/hyperlight-js-runtime/Cargo.toml index 2c4f7a1..9b9867d 100644 --- a/src/hyperlight-js-runtime/Cargo.toml +++ b/src/hyperlight-js-runtime/Cargo.toml @@ -16,6 +16,7 @@ harness = false test = false [dependencies] +hyperlight-js-common = { workspace = true } anyhow = { version = "1.0", default-features = false } base64 = {version = "0.22", default-features = false, features = ["alloc"] } fn-traits = "0.2.0" @@ -30,6 +31,8 @@ spin = "0.12" tracing = { version = "0.1.44", default-features = false, features = ["log","attributes","max_level_trace"] } [target.'cfg(hyperlight)'.dependencies] +hyperlight-common = { workspace = true, default-features = false } +hyperlight-guest = { workspace = true } hyperlight-guest-bin = { workspace = true } [target.'cfg(not(hyperlight))'.dependencies] @@ -44,7 +47,7 @@ bindgen = "0.72" [features] default = [] -trace_guest = ["hyperlight-guest-bin/trace_guest"] +trace_guest = ["hyperlight-common/trace_guest", "hyperlight-guest/trace_guest", "hyperlight-guest-bin/trace_guest"] [lints.rust] unexpected_cfgs = { level = "allow", check-cfg = ['cfg(hyperlight)'] } diff --git a/src/hyperlight-js-runtime/src/host_fn.rs b/src/hyperlight-js-runtime/src/host_fn.rs index a342126..2df2f5f 100644 --- a/src/hyperlight-js-runtime/src/host_fn.rs +++ b/src/hyperlight-js-runtime/src/host_fn.rs @@ -17,17 +17,20 @@ use alloc::format; use alloc::rc::Rc; use alloc::string::{String, ToString as _}; use alloc::sync::Arc; +use alloc::vec::Vec; use core::cell::{Ref, RefCell, RefMut}; use core::ptr::NonNull; use anyhow::{bail, ensure, Context as _}; use hashbrown::HashMap; +use hyperlight_js_common::{FnReturn, MAX_JSON_DEPTH, PLACEHOLDER_BIN}; use rquickjs::loader::{ImportAttributes, Loader, Resolver}; use rquickjs::module::{Declarations, Exports, ModuleDef}; use rquickjs::prelude::Rest; -use rquickjs::{Ctx, Exception, Function, JsLifetime, Module, Value}; +use rquickjs::{Array, Ctx, Exception, Function, JsLifetime, Module, TypedArray, Value}; use serde::de::DeserializeOwned; use serde::Serialize; +use serde_json::json; /// A clone of rquickjs::Module so that we can access the ctx from it by transmuting. struct NakedModule<'js> { @@ -87,6 +90,241 @@ where f } +/// Checks if a JS value is a Uint8Array and extracts its bytes. +fn try_extract_uint8array(value: &Value<'_>) -> Option> { + let obj = value.as_object()?; + let typed_array = obj.as_typed_array::()?; + typed_array.as_bytes().map(|b| b.to_vec()) +} + +/// Recursively processes a JS value, extracting binary data and replacing with placeholders. +/// Returns a serde_json::Value with placeholders and collects binary blobs. +fn value_to_json_with_binaries<'js>( + ctx: &Ctx<'js>, + value: Value<'js>, + binaries: &mut Vec>, + depth: usize, +) -> anyhow::Result { + if depth > MAX_JSON_DEPTH { + anyhow::bail!("JSON nesting depth exceeds maximum ({MAX_JSON_DEPTH})"); + } + + // Check for Uint8Array first + if let Some(bytes) = try_extract_uint8array(&value) { + let index = binaries.len(); + binaries.push(bytes); + return Ok(json!({PLACEHOLDER_BIN: index})); + } + + // Handle null/undefined + if value.is_null() || value.is_undefined() { + return Ok(serde_json::Value::Null); + } + + // Handle booleans + if let Some(b) = value.as_bool() { + return Ok(serde_json::Value::Bool(b)); + } + + // Handle numbers + // QuickJS stores numbers as doubles internally but optimises small + // integers into SMIs. We check as_int() first for integer fidelity, + // falling back to as_float() for all other numeric values. + // For floats that represent whole numbers (e.g. 42.0 from JSON.parse), + // we emit them as integers to match JSON.stringify behaviour and + // preserve serde integer deserialization on the host side. + if let Some(n) = value.as_int() { + return Ok(serde_json::Value::Number(n.into())); + } + if let Some(n) = value.as_float() { + // Handle NaN and Infinity as null (like JSON.stringify) + if n.is_finite() { + // If the float is a whole number that fits in i64, emit as integer + // to match JSON.stringify behaviour (42.0 → 42, not 42.0) + if n == (n as i64) as f64 && n >= i64::MIN as f64 && n <= i64::MAX as f64 { + return Ok(serde_json::Value::Number((n as i64).into())); + } + if let Some(num) = serde_json::Number::from_f64(n) { + return Ok(serde_json::Value::Number(num)); + } + } + return Ok(serde_json::Value::Null); + } + + // Handle strings + if let Some(s) = value.as_string() { + let s = s.to_string()?; + return Ok(serde_json::Value::String(s)); + } + + // Handle arrays + if let Some(array) = value.as_array() { + let mut json_array = Vec::with_capacity(array.len()); + for item in array.iter::() { + let item = item?; + json_array.push(value_to_json_with_binaries(ctx, item, binaries, depth + 1)?); + } + return Ok(serde_json::Value::Array(json_array)); + } + + // Handle objects + if let Some(obj) = value.as_object() { + // Check for toJSON() method — matches JSON.stringify behaviour + // (e.g., Date.toJSON() returns an ISO string, not {}). + if let Ok(to_json) = obj.get::<_, Function>("toJSON") { + let result: Value = to_json.call((rquickjs::function::This(obj.clone()),))?; + return value_to_json_with_binaries(ctx, result, binaries, depth + 1); + } + + let mut json_obj = serde_json::Map::new(); + for entry in obj.props::() { + let (key, val) = entry?; + json_obj.insert( + key, + value_to_json_with_binaries(ctx, val, binaries, depth + 1)?, + ); + } + return Ok(serde_json::Value::Object(json_obj)); + } + + // Fallback: use JSON.stringify for anything else + let json_str = ctx + .json_stringify(value)? + .map(|s| s.to_string()) + .transpose()? + .unwrap_or_else(|| "null".into()); + let parsed: serde_json::Value = serde_json::from_str(&json_str)?; + Ok(parsed) +} + +/// Extracts binary data from JS arguments, replacing with placeholders. +/// Returns the JSON string with placeholders and the collected binary blobs. +fn extract_binaries<'js>( + ctx: &Ctx<'js>, + args: Vec>, +) -> anyhow::Result<(String, Vec>)> { + let mut binaries = Vec::new(); + let mut json_args = Vec::with_capacity(args.len()); + + for arg in args { + json_args.push(value_to_json_with_binaries(ctx, arg, &mut binaries, 0)?); + } + + let json = serde_json::to_string(&json_args)?; + Ok((json, binaries)) +} + +/// Converts a `serde_json::Value` tree into an `rquickjs::Value`, delegating +/// binary-marker resolution to `resolve_binary`. +/// +/// When the traversal encounters a JSON object, it first calls +/// `resolve_binary(&obj, ctx)`. If the closure returns `Ok(Some(value))` the +/// object is replaced with that JS value (typically a `Uint8Array`). If it +/// returns `Ok(None)` the object is treated as a regular JS object. +/// +/// This keeps the conversion logic in one place while allowing different +/// strategies for resolving binary markers (`__bin__` placeholder path). +fn json_to_js_value<'js, F>( + ctx: &Ctx<'js>, + value: serde_json::Value, + resolve_binary: &F, + depth: usize, +) -> anyhow::Result> +where + F: Fn( + &serde_json::Map, + &Ctx<'js>, + ) -> anyhow::Result>>, +{ + if depth > MAX_JSON_DEPTH { + anyhow::bail!("JSON nesting depth exceeds maximum ({MAX_JSON_DEPTH})"); + } + + match value { + serde_json::Value::Null => Ok(Value::new_null(ctx.clone())), + serde_json::Value::Bool(b) => Ok(Value::new_bool(ctx.clone(), b)), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() + && let Ok(i32_val) = i32::try_from(i) + { + Ok(Value::new_int(ctx.clone(), i32_val)) + } else if let Some(f) = n.as_f64() { + Ok(Value::new_float(ctx.clone(), f)) + } else { + Ok(Value::new_null(ctx.clone())) + } + } + serde_json::Value::String(s) => { + let js_str = rquickjs::String::from_str(ctx.clone(), &s)?; + Ok(js_str.into_value()) + } + serde_json::Value::Array(arr) => { + let js_array = Array::new(ctx.clone())?; + for (i, item) in arr.into_iter().enumerate() { + let js_item = json_to_js_value(ctx, item, resolve_binary, depth + 1)?; + js_array.set(i, js_item)?; + } + Ok(js_array.into_value()) + } + serde_json::Value::Object(obj) => { + // Let the caller decide if this object is a binary marker. + if let Some(resolved) = resolve_binary(&obj, ctx)? { + return Ok(resolved); + } + // Regular object + let js_obj = rquickjs::Object::new(ctx.clone())?; + for (key, val) in obj { + let js_val = json_to_js_value(ctx, val, resolve_binary, depth + 1)?; + js_obj.set(&key, js_val)?; + } + Ok(js_obj.into_value()) + } + } +} + +/// Converts a serde_json Value to a rquickjs Value without any +/// binary marker resolution (plain JSON objects pass through as-is). +fn json_to_plain_value<'js>( + ctx: &Ctx<'js>, + value: serde_json::Value, + depth: usize, +) -> anyhow::Result> { + json_to_js_value(ctx, value, &|_obj, _ctx| Ok(None), depth) +} + +/// Converts a serde_json Value to a rquickjs Value, resolving `{"__bin__": N}` +/// placeholders from the provided binary blobs (optimised sidecar path). +fn json_to_value_with_blobs<'js>( + ctx: &Ctx<'js>, + value: serde_json::Value, + blobs: &[Vec], + depth: usize, +) -> anyhow::Result> { + json_to_js_value( + ctx, + value, + &|obj: &serde_json::Map, ctx: &Ctx<'js>| { + // Check for {"__bin__": N} placeholder + if obj.len() == 1 + && let Some(serde_json::Value::Number(n)) = obj.get(PLACEHOLDER_BIN) + && let Some(idx) = n.as_u64() + { + let idx = idx as usize; + if idx < blobs.len() { + let array = TypedArray::::new(ctx.clone(), blobs[idx].clone())?; + return Ok(Some(array.into_value())); + } + anyhow::bail!( + "Binary placeholder index {idx} out of bounds (have {} blobs)", + blobs.len() + ); + } + Ok(None) + }, + depth, + ) +} + /// A `ModuleDef` implementation that can be used to declare and evaluate host modules. /// This module will look up the module name in the ctx userdata and declare/evaluate /// the functions in the module accordingly. @@ -129,7 +367,7 @@ impl ModuleDef for HostModuleDef { let module: &Module = unsafe { core::mem::transmute(exports) }; let module_name: String = module.name()?; - // We don't have access to self in this function, so we can pass rich data to this function. + // We don't have access to self in this function, so we can't pass rich data to this function. // Instead, we use a userdata in the context to get the list of functions to export. let Some(loader) = ctx.userdata::() else { return Err(Exception::throw_internal(ctx, "HostModuleLoader not found")); @@ -176,7 +414,10 @@ impl HostFunction { func(ctx, args).map_err(|e| match e.downcast::() { Ok(e) => e, Err(e) => { - Exception::throw_internal(ctx, &format!("Host function error: {e:#?}")) + // Use Display chain ({e:#}) instead of Debug struct + // ({e:#?}) to keep the message compact and avoid + // truncation at the hyperlight guest↔host boundary. + Exception::throw_internal(ctx, &format!("Host function error: {e:#}")) } }) }, @@ -188,6 +429,11 @@ impl HostFunction { /// /// This is useful for hyperlight, where we use JSON as the serialization format for communication /// with the host. + /// + /// **Note:** This variant does not support `Uint8Array`/`Buffer` arguments — + /// QuickJS's `JSON.stringify` will serialize them as plain objects with numeric + /// keys (e.g. `{ "0": 1, "1": 2 }`), not as binary blobs. Use [`new_bin`](Self::new_bin) + /// for functions that handle binary data. pub fn new_json(func: impl Fn(String) -> anyhow::Result + 'static) -> Self { Self::new( move |ctx: &Ctx, args: Rest| -> anyhow::Result { @@ -202,6 +448,62 @@ impl HostFunction { ) } + /// Create a new `HostFunction` from a closure that supports binary data. + /// + /// This variant detects `Uint8Array` arguments and passes them + /// through a sidecar binary channel instead of JSON-encoding them. The JSON + /// contains `{"__bin__": N}` placeholders that reference the sidecar blobs. + /// + /// The closure receives: + /// - `args_json`: JSON string with placeholders for binary arguments + /// - `binaries`: Packed binary sidecar (length-prefixed format) + /// + /// The closure returns a tagged result: + /// - `0x00` + JSON = JSON return value + /// - `0x01` + bytes = raw binary return (becomes `Uint8Array` on JS side) + pub fn new_bin(func: impl Fn(String, Vec) -> anyhow::Result> + 'static) -> Self { + Self::new( + move |ctx: &Ctx, args: Rest| -> anyhow::Result { + // Extract binary blobs and replace with placeholders + let (json_args, binaries) = extract_binaries(ctx, args.into_inner())?; + + // Encode binaries into sidecar format — encode_binaries + // accepts &[Vec] directly, no intermediate Vec<&[u8]> needed + let packed = hyperlight_js_common::encode_binaries(&binaries) + .map_err(|e| anyhow::anyhow!("{e}"))?; + + // Call the host function + let result = func(json_args, packed).context("Calling binary host function")?; + + // Decode the tagged return value + match hyperlight_js_common::decode_return(&result) + .map_err(|e| anyhow::anyhow!("{e}"))? + { + FnReturn::Json(json) => { + // Plain JSON return — no binary markers to resolve. + let json_value: serde_json::Value = + serde_json::from_str(&json).context("Parsing JSON return from host")?; + json_to_plain_value(ctx, json_value, 0) + } + FnReturn::JsonWithBinaries(json, sidecar) => { + // Optimised sidecar path: decode blobs and resolve + // {"__bin__": N} placeholders by index lookup. + let blobs = hyperlight_js_common::decode_binaries(&sidecar) + .map_err(|e| anyhow::anyhow!("{e}"))?; + let json_value: serde_json::Value = serde_json::from_str(&json) + .context("Parsing JSON-with-binaries return from host")?; + json_to_value_with_blobs(ctx, json_value, &blobs, 0) + } + FnReturn::Binary(data) => { + // Create a Uint8Array from the binary data + let array = TypedArray::::new(ctx.clone(), data)?; + Ok(array.into_value()) + } + } + }, + ) + } + /// Create a new `HostFunction` from a closure that takes and returns any type that can be /// serialized by serde. /// diff --git a/src/hyperlight-js-runtime/src/lib.rs b/src/hyperlight-js-runtime/src/lib.rs index 588065b..961d7f5 100644 --- a/src/hyperlight-js-runtime/src/lib.rs +++ b/src/hyperlight-js-runtime/src/lib.rs @@ -26,6 +26,7 @@ pub(crate) mod utils; use alloc::format; use alloc::rc::Rc; use alloc::string::{String, ToString}; +use alloc::vec::Vec; use anyhow::{anyhow, Context as _}; use hashbrown::HashMap; @@ -99,44 +100,54 @@ impl JsRuntime { } /// Register a host function in the specified module. - /// The function takes and returns a JSON string, which is deserialized and serialized by the runtime. - /// The arguments are serialized as a JSON array containing all the arguments passed to the function. - pub fn register_json_host_function( + /// The function takes and returns any type that can be (de)serialized by `serde`. + pub fn register_host_function( &mut self, module_name: impl Into, function_name: impl Into, - function: impl Fn(String) -> anyhow::Result + 'static, - ) -> anyhow::Result<()> { + function: impl fn_traits::Fn> + 'static, + ) -> anyhow::Result<()> + where + Args: DeserializeOwned, + Output: Serialize, + { self.context.with(|ctx| { ctx.userdata::() .context("HostModuleLoader not found in context")? .borrow_mut() .entry(module_name.into()) .or_default() - .add_function(function_name.into(), HostFunction::new_json(function)); + .add_function(function_name.into(), HostFunction::new_serde(function)); Ok(()) }) } - /// Register a host function in the specified module. - /// The function takes and returns any type that can be (de)serialized by `serde`. - pub fn register_host_function( + /// Register a binary-capable host function in the specified module. + /// + /// This variant supports `Uint8Array` arguments and returns. + /// Binary data is passed via a sidecar channel instead of JSON encoding, + /// avoiding base64 overhead. + /// + /// The function receives: + /// - `args_json`: JSON string with `{"__bin__": N}` placeholders for binary args + /// - `binaries`: Packed binary sidecar (length-prefixed format) + /// + /// The function returns a tagged result: + /// - `0x00` + JSON = JSON return value + /// - `0x01` + bytes = raw binary return (becomes `Uint8Array` on JS side) + pub fn register_binary_host_function( &mut self, module_name: impl Into, function_name: impl Into, - function: impl fn_traits::Fn> + 'static, - ) -> anyhow::Result<()> - where - Args: DeserializeOwned, - Output: Serialize, - { + function: impl Fn(String, Vec) -> anyhow::Result> + 'static, + ) -> anyhow::Result<()> { self.context.with(|ctx| { ctx.userdata::() .context("HostModuleLoader not found in context")? .borrow_mut() .entry(module_name.into()) .or_default() - .add_function(function_name.into(), HostFunction::new_serde(function)); + .add_function(function_name.into(), HostFunction::new_bin(function)); Ok(()) }) } diff --git a/src/hyperlight-js-runtime/src/main/hyperlight.rs b/src/hyperlight-js-runtime/src/main/hyperlight.rs index 0379468..917a022 100644 --- a/src/hyperlight-js-runtime/src/main/hyperlight.rs +++ b/src/hyperlight-js-runtime/src/main/hyperlight.rs @@ -90,7 +90,12 @@ fn register_handler( } #[host_function("CallHostJsFunction")] -fn call_host_js_function(module_name: String, func_name: String, args: String) -> Result; +fn call_host_js_function( + module_name: String, + func_name: String, + args_json: String, + binaries: Vec, +) -> Result>; #[guest_function("RegisterHostModules")] fn register_host_modules(host_modules_json: String) -> Result<()> { @@ -109,12 +114,28 @@ fn register_host_modules(host_modules_json: String) -> Result<()> { for (module_name, functions) in host_modules { for function_name in functions { let module_name = module_name.clone(); - runtime.register_json_host_function( + // Register binary-capable host function that can handle Uint8Array/Buffer + runtime.register_binary_host_function( module_name.clone(), function_name.clone(), - move |args: String| -> anyhow::Result { - call_host_js_function(module_name.clone(), function_name.clone(), args) - .map_err(|e| anyhow!("Calling host function {module_name:?} {function_name:?} failed: {e:#?}")) + move |args_json: String, binaries: Vec| -> anyhow::Result> { + call_host_js_function( + module_name.clone(), + function_name.clone(), + args_json, + binaries, + ) + .map_err(|e| { + // Use e.message directly — {e:#?} would expand into a + // huge Debug struct that exceeds the hyperlight + // guest↔host error buffer and gets truncated. + // Include the error kind for diagnostics. + anyhow!( + "Calling host function {module_name:?} {function_name:?} failed ({:?}): {}", + e.kind, + e.message + ) + }) }, )?; } diff --git a/src/hyperlight-js/Cargo.toml b/src/hyperlight-js/Cargo.toml index 3cc1584..ca27545 100644 --- a/src/hyperlight-js/Cargo.toml +++ b/src/hyperlight-js/Cargo.toml @@ -14,6 +14,7 @@ It is built on top of Hyperlight. # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +hyperlight-js-common = { workspace = true } anyhow = "1.0.102" fn-traits = "0.2.0" hyperlight-host = { workspace = true } diff --git a/src/hyperlight-js/src/lib.rs b/src/hyperlight-js/src/lib.rs index 05cb209..c6e4604 100644 --- a/src/hyperlight-js/src/lib.rs +++ b/src/hyperlight-js/src/lib.rs @@ -27,6 +27,9 @@ mod script; pub mod sandbox; use hyperlight_host::func::HostFunction; +// Re-export FnReturn for the NAPI bridge (used in register_js signature). +#[doc(hidden)] +pub use sandbox::host_fn::FnReturn; /// A Hyperlight Sandbox with a JavaScript run time loaded but no guest code. pub use sandbox::js_sandbox::JSSandbox; /// A Hyperlight Sandbox with a JavaScript run time loaded and guest code loaded. diff --git a/src/hyperlight-js/src/sandbox/host_fn.rs b/src/hyperlight-js/src/sandbox/host_fn.rs index 46a1e97..a283471 100644 --- a/src/hyperlight-js/src/sandbox/host_fn.rs +++ b/src/hyperlight-js/src/sandbox/host_fn.rs @@ -18,6 +18,7 @@ use std::collections::HashMap; use serde::de::DeserializeOwned; use serde::ser::SerializeSeq; use serde::Serialize; +use serde_json::Value as JsonValue; // Unlike hyperlight-host's Function, this Function trait uses `serde`'s Serialize and DeserializeOwned traits for input and output types. @@ -48,22 +49,57 @@ where } } -type BoxFunction = Box crate::Result + Send + Sync>; +type JsonFn = std::sync::Arc crate::Result + Send + Sync>; + +/// Re-export the unified return type from the common crate. +pub use hyperlight_js_common::FnReturn; + +/// The closure type for JS bridge host functions. +/// +/// Receives the parsed JSON arguments (with `{"__bin__": N}` placeholders +/// still in place) and the decoded individual binary blobs. This avoids a +/// redundant stringify→parse round-trip that would occur if we passed a +/// pre-processed JSON string. +type BinaryFn = + std::sync::Arc>) -> crate::Result + Send + Sync>; + +/// A registered host function — either typed (serde) or JS bridge. +/// +/// This enum allows a single `HashMap` to store both variants, eliminating +/// the need for parallel maps and cross-removal bookkeeping. +#[derive(Clone)] +enum HostFn { + /// Typed: receives a JSON args string, deserializes via serde, + /// returns a JSON result string. Does not support binary args. + Typed(JsonFn), + /// JS bridge: receives parsed JSON args + binary blobs, returns a + /// tagged result (JSON or binary). + JsBridge(BinaryFn), +} fn type_erased( func: impl Function + Send + Sync + 'static, -) -> BoxFunction { - Box::new(move |args: String| { +) -> JsonFn { + std::sync::Arc::new(move |args: String| { let args: Args = serde_json::from_str(&args)?; let output: Output = func.call(args); Ok(serde_json::to_string(&output)?) }) } +/// Decodes the sidecar binary format into individual blobs. +/// +/// Thin wrapper around [`hyperlight_js_common::decode_binaries`] that maps +/// the common crate's `DecodeError` into the host's `HyperlightError`. +pub(crate) fn decode_binaries(data: &[u8]) -> crate::Result>> { + hyperlight_js_common::decode_binaries(data) + .map_err(|e| crate::HyperlightError::Error(e.to_string())) +} + /// A module containing host functions that can be called from the guest JavaScript code. -#[derive(Default)] +#[derive(Default, Clone)] pub struct HostModule { - functions: HashMap, + functions: HashMap, } // The serialization of this struct has to match the deserialization in @@ -79,7 +115,18 @@ impl Serialize for HostModule { } impl HostModule { - /// Register a host function that can be called from the guest JavaScript code. + /// Register a typed host function that can be called from the guest + /// JavaScript code. + /// + /// Arguments are deserialized from JSON via serde and the return value + /// is serialized back to JSON automatically. + /// + /// This variant does **not** support `Uint8Array`/`Buffer` arguments. + /// For binary data support, use the JS bridge API instead. + /// + /// ```text + /// module.register("add", |a: i32, b: i32| a + b); + /// ``` /// /// Registering a function with the same `name` as an existing function /// overwrites the previous registration. @@ -88,32 +135,148 @@ impl HostModule { name: impl Into, func: impl Function + Send + Sync + 'static, ) -> &mut Self { - self.functions.insert(name.into(), type_erased(func)); + self.functions + .insert(name.into(), HostFn::Typed(type_erased(func))); self } - /// Register a raw host function that operates on JSON strings directly. - /// - /// Unlike [`register`](Self::register), which handles serde serialization / - /// deserialization automatically via the [`Function`] trait, this method - /// passes the raw JSON string argument from the guest to the closure and - /// expects a JSON string result. + /// Register a host function for the JavaScript bridge (NAPI layer). /// - /// This is primarily intended for dynamic / bridge scenarios (e.g. NAPI - /// bindings) where argument types are not known at compile time. + /// This is an internal API used by the `js-host-api` NAPI bridge. + /// Rust users should use [`register`](Self::register) instead, which + /// handles binary data transparently via serde. /// - /// Registering a function with the same `name` as an existing function - /// overwrites the previous registration. - pub fn register_raw( + /// The closure receives parsed `JsonValue` args and decoded binary + /// blobs directly. Return [`FnReturn::Json`] or [`FnReturn::Binary`]. + #[doc(hidden)] + pub fn register_js( &mut self, name: impl Into, - func: impl Fn(String) -> crate::Result + Send + Sync + 'static, + func: impl Fn(JsonValue, Vec>) -> crate::Result + Send + Sync + 'static, ) -> &mut Self { - self.functions.insert(name.into(), Box::new(func)); + self.functions + .insert(name.into(), HostFn::JsBridge(std::sync::Arc::new(func))); self } - pub(crate) fn get(&self, name: &str) -> Option<&BoxFunction> { - self.functions.get(name) + /// Dispatch a guest→host function call. + /// + /// Decodes the binary sidecar (if present) and routes to the + /// appropriate handler variant. + /// + /// For `Typed` functions, binary blobs in the sidecar are rejected — + /// use `register_js` for functions that need binary data. + /// + /// Always returns a tagged result: + /// - `TAG_JSON (0x00)` + JSON bytes for JSON returns + /// - `TAG_BINARY (0x01)` + raw bytes for binary returns + pub(crate) fn call( + &self, + name: &str, + args_json: String, + binaries: Option>, + ) -> crate::Result> { + let blobs = if let Some(bin_data) = binaries { + decode_binaries(&bin_data)? + } else { + Vec::new() + }; + + match self.functions.get(name) { + Some(HostFn::JsBridge(func)) => { + // JS bridge path: parse JSON and pass blobs directly. + let json_value: JsonValue = serde_json::from_str(&args_json)?; + match func(json_value, blobs)? { + FnReturn::Json(json) => Ok(hyperlight_js_common::encode_json_return(&json)), + FnReturn::Binary(bytes) => { + Ok(hyperlight_js_common::encode_binary_return(&bytes)) + } + FnReturn::JsonWithBinaries(json, sidecar) => { + hyperlight_js_common::encode_json_with_binaries_return(&json, &sidecar) + .map_err(|e| crate::HyperlightError::Error(e.to_string())) + } + } + } + Some(HostFn::Typed(func)) => { + // Typed path: serde deserializes args from JSON. Binary + // data is not supported — reject if blobs are present. + if !blobs.is_empty() { + return Err(crate::HyperlightError::Error(format!( + concat!( + "Function '{}' received {} binary argument(s) but was registered ", + "with `register` (typed JSON-only). Use `register_js` for functions ", + "that accept Uint8Array/Buffer arguments.", + ), + name, + blobs.len() + ))); + } + let result = func(args_json)?; + Ok(hyperlight_js_common::encode_json_return(&result)) + } + None => Err(crate::HyperlightError::Error(format!( + "Function '{}' not found", + name + ))), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn call_typed_no_binaries() { + let mut module = HostModule::default(); + module.register("add", |a: i32, b: i32| a + b); + + // count=0 sidecar + let sidecar = vec![0u8, 0, 0, 0]; + let result = module + .call("add", "[3,4]".to_string(), Some(sidecar)) + .unwrap(); + assert_eq!(result[0], hyperlight_js_common::TAG_JSON); + assert_eq!(&result[1..], b"7"); + } + + #[test] + fn call_typed_rejects_binary_args() { + let mut module = HostModule::default(); + module.register("add", |a: i32, b: i32| a + b); + + // Sidecar with one blob — typed functions should reject this + let sidecar = hyperlight_js_common::encode_binaries(&[b"ABC" as &[u8]]).unwrap(); + let err = module + .call("add", "[1,2]".to_string(), Some(sidecar)) + .unwrap_err(); + assert!(err.to_string().contains("binary argument")); + assert!(err.to_string().contains("register_js")); + } + + #[test] + fn call_not_found() { + let module = HostModule::default(); + let err = module.call("nope", "[]".to_string(), None).unwrap_err(); + assert!(err.to_string().contains("not found")); + } + + #[test] + fn js_bridge_passes_through_bin_key_without_sidecar() { + // If a host function returns JSON containing {"__bin__": 0} but + // with an empty sidecar (FnReturn::Json), the key must pass through + // as a regular JSON object — it should NOT be treated as a binary + // placeholder because there is no sidecar to resolve against. + let mut module = HostModule::default(); + module.register_js("echo", |args, _blobs| Ok(FnReturn::Json(args.to_string()))); + + // Args contain an object with the reserved key but no actual binary + let args = r#"[{"__bin__": 0}]"#.to_string(); + let sidecar = vec![0u8, 0, 0, 0]; // count=0, no blobs + let result = module.call("echo", args.clone(), Some(sidecar)).unwrap(); + assert_eq!(result[0], hyperlight_js_common::TAG_JSON); + // The returned JSON should contain the __bin__ key as-is + let returned_json = std::str::from_utf8(&result[1..]).unwrap(); + assert!(returned_json.contains("__bin__")); } } diff --git a/src/hyperlight-js/src/sandbox/proto_js_sandbox.rs b/src/hyperlight-js/src/sandbox/proto_js_sandbox.rs index 987f2db..bce9c85 100644 --- a/src/hyperlight-js/src/sandbox/proto_js_sandbox.rs +++ b/src/hyperlight-js/src/sandbox/proto_js_sandbox.rs @@ -136,20 +136,29 @@ impl ProtoJSSandbox { let host_modules_json = serde_json::to_string(&host_modules)?; + // Register the host function that the guest calls for all host + // function invocations. Binary data (if any) is carried in a + // length-prefixed sidecar alongside the JSON args. self.inner.register( "CallHostJsFunction", - move |module_name: String, func_name: String, args: String| -> Result { + move |module_name: String, + func_name: String, + args_json: String, + binaries: Vec| + -> Result> { let module = host_modules .get(&module_name) .ok_or_else(|| new_error!("Host module '{}' not found", module_name))?; - let func = module.get(&func_name).ok_or_else(|| { - new_error!( - "Host function '{}' not found in module '{}'", - func_name, - module_name - ) - })?; - func(args) + module + .call(&func_name, args_json, Some(binaries)) + .map_err(|e| { + new_error!( + "Error calling host function '{}' in module '{}': {}", + func_name, + module_name, + e + ) + }) }, )?; @@ -212,27 +221,6 @@ impl ProtoJSSandbox { self.host_module(module).register(name, func); Ok(()) } - - /// Register a raw host function that operates on JSON strings directly. - /// - /// This is equivalent to calling `sbox.host_module(module).register_raw(name, func)`. - /// - /// Unlike [`register`](Self::register), which handles serde serialization / - /// deserialization automatically, this method passes the raw JSON string - /// from the guest to the callback and expects a JSON string result. - /// - /// Primarily intended for dynamic / bridge scenarios (e.g. NAPI bindings) - /// where argument types are not known at compile time. - #[instrument(err(Debug), skip(self, func), level=Level::INFO)] - pub fn register_raw( - &mut self, - module: impl Into + Debug, - name: impl Into + Debug, - func: impl Fn(String) -> Result + Send + Sync + 'static, - ) -> Result<()> { - self.host_module(module).register_raw(name, func); - Ok(()) - } } impl std::fmt::Debug for ProtoJSSandbox { diff --git a/src/hyperlight-js/tests/host_functions.rs b/src/hyperlight-js/tests/host_functions.rs index 76274f0..82d786e 100644 --- a/src/hyperlight-js/tests/host_functions.rs +++ b/src/hyperlight-js/tests/host_functions.rs @@ -17,7 +17,7 @@ limitations under the License. #![allow(clippy::disallowed_macros)] -use hyperlight_js::{new_error, SandboxBuilder, Script}; +use hyperlight_js::{SandboxBuilder, Script}; #[test] fn can_call_host_functions() { @@ -214,13 +214,21 @@ fn host_fn_with_unusual_names() { assert!(res == "42"); } +// ── Binary data (register_js) tests ────────────────────────────────── +// +// These test the binary sidecar round-trip through the hypervisor using +// register_js directly. register_js is #[doc(hidden)] but still pub — +// it's the foundation of the NAPI bridge and needs integration coverage. + #[test] -fn register_raw_basic() { +fn register_js_binary_arg_round_trip() { + // Guest sends Uint8Array → host receives blobs → returns length as JSON let handler = Script::from_content( r#" - import * as math from "math"; + import * as host from "host"; function handler(event) { - return { result: math.add(10, 32) }; + const data = new Uint8Array([72, 101, 108, 108, 111]); + return { len: host.byte_length(data) }; } "#, ); @@ -229,16 +237,58 @@ fn register_raw_basic() { let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); - // register_raw receives the guest args as a JSON string "[10,32]" - // and must return a JSON string result. - proto_js_sandbox - .register_raw("math", "add", |args: String| { - let parsed: Vec = serde_json::from_str(&args)?; - let sum: i64 = parsed.iter().sum(); - Ok(serde_json::to_string(&sum)?) - }) + proto_js_sandbox.host_module("host").register_js( + "byte_length", + |_args: serde_json::Value, blobs: Vec>| { + // The first arg should be a placeholder {"__bin__": 0} + // and blobs should contain the Uint8Array bytes + let len = if let Some(blob) = blobs.first() { + blob.len() + } else { + // Fallback: try to read from the JSON args + 0 + }; + let result = serde_json::to_string(&len) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("JSON error: {e}")))?; + Ok(hyperlight_js::FnReturn::Json(result)) + }, + ); + + let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded_sandbox = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded_sandbox + .handle_event("handler", event.to_string(), None) .unwrap(); + assert_eq!(res, r#"{"len":5}"#); +} + +#[test] +fn register_js_binary_return() { + // Host returns FnReturn::Binary → guest sees Uint8Array + let handler = Script::from_content( + r#" + import * as host from "host"; + function handler(event) { + const data = host.get_bytes(); + return { len: data.length, first: data[0], last: data[4] }; + } + "#, + ); + + let event = r#"{}"#; + + let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); + + proto_js_sandbox.host_module("host").register_js( + "get_bytes", + |_args: serde_json::Value, _blobs: Vec>| { + Ok(hyperlight_js::FnReturn::Binary(vec![10, 20, 30, 40, 50])) + }, + ); + let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); sandbox.add_handler("handler", handler).unwrap(); let mut loaded_sandbox = sandbox.get_loaded_sandbox().unwrap(); @@ -247,18 +297,18 @@ fn register_raw_basic() { .handle_event("handler", event.to_string(), None) .unwrap(); - assert_eq!(res, r#"{"result":42}"#); + assert_eq!(res, r#"{"len":5,"first":10,"last":50}"#); } #[test] -fn register_raw_mixed_with_typed() { +fn register_js_mixed_args() { + // Guest sends string + Uint8Array + number → host receives all correctly let handler = Script::from_content( r#" - import * as math from "math"; + import * as host from "host"; function handler(event) { - let sum = math.add(10, 32); - let doubled = math.double(sum); - return { result: doubled }; + const data = new Uint8Array([1, 2, 3]); + return { result: host.describe("pfx", data, 42) }; } "#, ); @@ -267,18 +317,64 @@ fn register_raw_mixed_with_typed() { let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); - // Typed registration via the Function trait - proto_js_sandbox - .register("math", "add", |a: i32, b: i32| a + b) + proto_js_sandbox.host_module("host").register_js( + "describe", + |args: serde_json::Value, blobs: Vec>| { + // args is [{"__bin__": 0}, "pfx", 42] or ["pfx", {"__bin__": 0}, 42] + // depending on arg order. Extract what we need. + let arr = args.as_array().unwrap(); + let mut prefix = String::new(); + let mut num = 0i64; + let blob_len = blobs.first().map(|b| b.len()).unwrap_or(0); + + for val in arr { + if let Some(s) = val.as_str() { + prefix = s.to_string(); + } else if let Some(n) = val.as_i64() { + num = n; + } + } + + let result = format!("{prefix}-{blob_len}-{num}"); + let json = serde_json::to_string(&result) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("JSON error: {e}")))?; + Ok(hyperlight_js::FnReturn::Json(json)) + }, + ); + + let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded_sandbox = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded_sandbox + .handle_event("handler", event.to_string(), None) .unwrap(); - // Raw registration alongside typed — both in the same module + assert_eq!(res, r#"{"result":"pfx-3-42"}"#); +} + +#[test] +fn register_typed_rejects_binary_args_e2e() { + // Guest sends Uint8Array to a typed register() function — should error + let handler = Script::from_content( + r#" + import * as host from "host"; + function handler(event) { + try { + host.add(new Uint8Array([1, 2]), 3); + return { error: "should have thrown" }; + } catch (e) { + return { caught: true }; + } + } + "#, + ); + + let event = r#"{}"#; + + let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); proto_js_sandbox - .register_raw("math", "double", |args: String| { - let parsed: Vec = serde_json::from_str(&args)?; - let val = parsed.first().copied().unwrap_or(0); - Ok(serde_json::to_string(&(val * 2))?) - }) + .register("host", "add", |a: i32, b: i32| a + b) .unwrap(); let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); @@ -289,16 +385,19 @@ fn register_raw_mixed_with_typed() { .handle_event("handler", event.to_string(), None) .unwrap(); - assert_eq!(res, r#"{"result":84}"#); + // The guest should catch the error from the typed function rejecting binary + assert_eq!(res, r#"{"caught":true}"#); } #[test] -fn register_raw_error_propagation() { +fn register_js_empty_uint8array() { + // Guest sends empty Uint8Array — should work, blobs[0] is empty vec let handler = Script::from_content( r#" import * as host from "host"; function handler(event) { - return host.fail(); + const data = new Uint8Array(0); + return { len: host.byte_length(data) }; } "#, ); @@ -307,31 +406,79 @@ fn register_raw_error_propagation() { let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); - proto_js_sandbox - .register_raw("host", "fail", |_args: String| { - Err(new_error!("intentional failure from raw host fn")) - }) + proto_js_sandbox.host_module("host").register_js( + "byte_length", + |_args: serde_json::Value, blobs: Vec>| { + let len = blobs.first().map(|b| b.len()).unwrap_or(0); + let result = serde_json::to_string(&len) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + Ok(hyperlight_js::FnReturn::Json(result)) + }, + ); + + let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded_sandbox = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded_sandbox + .handle_event("handler", event.to_string(), None) .unwrap(); + assert_eq!(res, r#"{"len":0}"#); +} + +#[test] +fn register_js_multiple_binary_args() { + // Guest sends two separate Uint8Arrays as args + let handler = Script::from_content( + r#" + import * as host from "host"; + function handler(event) { + const a = new Uint8Array([1, 2, 3]); + const b = new Uint8Array([4, 5]); + return { total: host.total_length(a, b) }; + } + "#, + ); + + let event = r#"{}"#; + + let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); + + proto_js_sandbox.host_module("host").register_js( + "total_length", + |_args: serde_json::Value, blobs: Vec>| { + let total: usize = blobs.iter().map(|b| b.len()).sum(); + let result = serde_json::to_string(&total) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + Ok(hyperlight_js::FnReturn::Json(result)) + }, + ); + let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); sandbox.add_handler("handler", handler).unwrap(); let mut loaded_sandbox = sandbox.get_loaded_sandbox().unwrap(); - let err = loaded_sandbox + let res = loaded_sandbox .handle_event("handler", event.to_string(), None) - .unwrap_err(); + .unwrap(); - assert!(err.to_string().contains("intentional failure")); + assert_eq!(res, r#"{"total":5}"#); } #[test] -fn register_raw_via_host_module() { +fn register_js_binary_in_nested_object() { + // Guest sends an object containing a Uint8Array as a property let handler = Script::from_content( r#" - import * as utils from "utils"; + import * as host from "host"; function handler(event) { - let greeting = utils.greet("World"); - return { greeting }; + const payload = { + name: "test", + data: new Uint8Array([10, 20, 30]), + count: 3, + }; + return { result: host.process(payload) }; } "#, ); @@ -340,14 +487,21 @@ fn register_raw_via_host_module() { let mut proto_js_sandbox = SandboxBuilder::new().build().unwrap(); - // Use host_module() accessor + register_raw() directly on HostModule - proto_js_sandbox - .host_module("utils") - .register_raw("greet", |args: String| { - let parsed: Vec = serde_json::from_str(&args)?; - let name = parsed.first().cloned().unwrap_or_default(); - Ok(serde_json::to_string(&format!("Hello, {}!", name))?) - }); + proto_js_sandbox.host_module("host").register_js( + "process", + |args: serde_json::Value, blobs: Vec>| { + // The object should have {"name": "test", "data": {"__bin__": 0}, "count": 3} + // with blobs containing [10, 20, 30] + let arr = args.as_array().unwrap(); + let obj = arr[0].as_object().unwrap(); + let name = obj.get("name").unwrap().as_str().unwrap(); + let blob_len = blobs.first().map(|b| b.len()).unwrap_or(0); + let result = format!("{name}-{blob_len}"); + let json = serde_json::to_string(&result) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + Ok(hyperlight_js::FnReturn::Json(json)) + }, + ); let mut sandbox = proto_js_sandbox.load_runtime().unwrap(); sandbox.add_handler("handler", handler).unwrap(); @@ -357,5 +511,350 @@ fn register_raw_via_host_module() { .handle_event("handler", event.to_string(), None) .unwrap(); - assert_eq!(res, r#"{"greeting":"Hello, World!"}"#); + assert_eq!(res, r#"{"result":"test-3"}"#); +} + +// ── Numeric type tests ─────────────────────────────────────────────── +// QuickJS stores JSON-parsed numbers as doubles internally. The binary +// host function path (extract_binaries → value_to_json_with_binaries) +// must serialize whole-number floats as integers to preserve serde +// deserialization on the host side. + +#[test] +fn host_fn_with_i32_arg_from_event_data() { + // event.x is parsed from JSON → stored as f64 in QuickJS → must + // arrive at the host as an integer, not 42.0 + let handler = Script::from_content( + r#" + import * as math from "math"; + function handler(event) { + return { result: math.double(event.x) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto.register("math", "double", |x: i32| x * 2).unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", r#"{"x": 42}"#.to_string(), None) + .unwrap(); + assert_eq!(res, r#"{"result":84}"#); +} + +#[test] +fn host_fn_with_i64_arg_from_event_data() { + let handler = Script::from_content( + r#" + import * as math from "math"; + function handler(event) { + return { result: math.negate(event.x) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto.register("math", "negate", |x: i64| -x).unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", r#"{"x": 100}"#.to_string(), None) + .unwrap(); + assert_eq!(res, r#"{"result":-100}"#); +} + +#[test] +fn host_fn_with_f64_arg_preserves_fractional() { + // Actual floats (3.14) must remain as floats, not be truncated + let handler = Script::from_content( + r#" + import * as math from "math"; + function handler(event) { + return { result: math.half(event.x) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto.register("math", "half", |x: f64| x / 2.0).unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", r#"{"x": 3.14}"#.to_string(), None) + .unwrap(); + + let json: serde_json::Value = serde_json::from_str(&res).unwrap(); + let result = json["result"].as_f64().unwrap(); + assert!( + (result - 1.57).abs() < 0.001, + "Expected ~1.57, got {result}" + ); +} + +#[test] +fn host_fn_with_bool_arg() { + let handler = Script::from_content( + r#" + import * as logic from "logic"; + function handler(event) { + return { result: logic.flip(event.flag) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto.register("logic", "flip", |b: bool| !b).unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", r#"{"flag": true}"#.to_string(), None) + .unwrap(); + assert_eq!(res, r#"{"result":false}"#); +} + +#[test] +fn host_fn_with_mixed_numeric_types() { + // i32 + f64 mix in the same call + let handler = Script::from_content( + r#" + import * as math from "math"; + function handler(event) { + return { result: math.weighted_add(event.a, event.b, event.weight) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto + .register("math", "weighted_add", |a: i32, b: i32, w: f64| { + (a as f64 * w + b as f64 * (1.0 - w)) as i32 + }) + .unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event( + "handler", + r#"{"a": 100, "b": 200, "weight": 0.75}"#.to_string(), + None, + ) + .unwrap(); + assert_eq!(res, r#"{"result":125}"#); +} + +#[test] +fn host_fn_with_negative_integer() { + let handler = Script::from_content( + r#" + import * as math from "math"; + function handler(event) { + return { result: math.abs(event.x) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto.register("math", "abs", |x: i32| x.abs()).unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", r#"{"x": -42}"#.to_string(), None) + .unwrap(); + assert_eq!(res, r#"{"result":42}"#); +} + +#[test] +fn host_fn_with_zero() { + let handler = Script::from_content( + r#" + import * as math from "math"; + function handler(event) { + return { result: math.inc(event.x) }; + } + "#, + ); + + let mut proto = SandboxBuilder::new().build().unwrap(); + proto.register("math", "inc", |x: i32| x + 1).unwrap(); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", r#"{"x": 0}"#.to_string(), None) + .unwrap(); + assert_eq!(res, r#"{"result":1}"#); +} + +// ── Nested binary return tests (JsonWithBinaries) ──────────────────── +// These test the sidecar path for host functions that return objects or +// arrays containing binary data alongside JSON fields. + +#[test] +fn register_js_nested_binary_return_in_object() { + // Host returns an object with a nested buffer → guest sees Uint8Array + let handler = Script::from_content( + r#" + import * as host from "host"; + function handler(event) { + const result = host.get_payload(); + return { + name: result.name, + data_len: result.data.length, + first: result.data[0], + last: result.data[2], + }; + } + "#, + ); + + let event = r#"{}"#; + + let mut proto = SandboxBuilder::new().build().unwrap(); + + proto.host_module("host").register_js( + "get_payload", + |_args: serde_json::Value, _blobs: Vec>| { + // Return JSON + sidecar: {"name": "test", "data": {"__bin__": 0}} + let sidecar = hyperlight_js_common::encode_binaries(&[&[10u8, 20, 30][..]]) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + let json = serde_json::json!({ + "name": "test", + "data": { "__bin__": 0 }, + }); + let json_str = serde_json::to_string(&json) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + Ok(hyperlight_js::FnReturn::JsonWithBinaries(json_str, sidecar)) + }, + ); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", event.to_string(), None) + .unwrap(); + + assert_eq!(res, r#"{"name":"test","data_len":3,"first":10,"last":30}"#); +} + +#[test] +fn register_js_nested_binary_return_in_array() { + // Host returns an array containing buffers → guest sees Uint8Arrays + let handler = Script::from_content( + r#" + import * as host from "host"; + function handler(event) { + const items = host.get_items(); + return { + count: items.length, + first_len: items[0].length, + second_len: items[1].length, + first_byte: items[0][0], + }; + } + "#, + ); + + let event = r#"{}"#; + + let mut proto = SandboxBuilder::new().build().unwrap(); + + proto.host_module("host").register_js( + "get_items", + |_args: serde_json::Value, _blobs: Vec>| { + // Return an array of two binary blobs + let sidecar = hyperlight_js_common::encode_binaries(&[&[1u8, 2, 3][..], &[4u8, 5][..]]) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + let json = serde_json::json!([{"__bin__": 0}, {"__bin__": 1}]); + let json_str = serde_json::to_string(&json) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + Ok(hyperlight_js::FnReturn::JsonWithBinaries(json_str, sidecar)) + }, + ); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", event.to_string(), None) + .unwrap(); + + assert_eq!( + res, + r#"{"count":2,"first_len":3,"second_len":2,"first_byte":1}"# + ); +} + +#[test] +fn register_js_deeply_nested_binary_return() { + // Host returns a deeply nested structure with binary data + let handler = Script::from_content( + r#" + import * as host from "host"; + function handler(event) { + const result = host.get_nested(); + return { + inner_len: result.outer.inner.data.length, + first_byte: result.outer.inner.data[0], + label: result.outer.inner.label, + }; + } + "#, + ); + + let event = r#"{}"#; + + let mut proto = SandboxBuilder::new().build().unwrap(); + + proto.host_module("host").register_js( + "get_nested", + |_args: serde_json::Value, _blobs: Vec>| { + let sidecar = hyperlight_js_common::encode_binaries(&[&[0xABu8, 0xCD][..]]) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + let json = serde_json::json!({ + "outer": { + "inner": { + "label": "deep", + "data": { "__bin__": 0 }, + } + } + }); + let json_str = serde_json::to_string(&json) + .map_err(|e| hyperlight_js::HyperlightError::Error(format!("{e}")))?; + Ok(hyperlight_js::FnReturn::JsonWithBinaries(json_str, sidecar)) + }, + ); + + let mut sandbox = proto.load_runtime().unwrap(); + sandbox.add_handler("handler", handler).unwrap(); + let mut loaded = sandbox.get_loaded_sandbox().unwrap(); + + let res = loaded + .handle_event("handler", event.to_string(), None) + .unwrap(); + + assert_eq!(res, r#"{"inner_len":2,"first_byte":171,"label":"deep"}"#); } diff --git a/src/js-host-api/Cargo.toml b/src/js-host-api/Cargo.toml index 48c8931..46e9581 100644 --- a/src/js-host-api/Cargo.toml +++ b/src/js-host-api/Cargo.toml @@ -14,6 +14,7 @@ crate-type = ["cdylib"] [dependencies] arc-swap = "1" hyperlight-js = { workspace = true, features = ["monitor-wall-clock", "monitor-cpu-time", "guest-call-stats"] } +hyperlight-js-common = { workspace = true } napi = { version = "3.9", features = ["tokio_rt", "serde-json"] } napi-derive = "3.5" serde_json = "1" diff --git a/src/js-host-api/README.md b/src/js-host-api/README.md index 59cbd7f..a314c4d 100644 --- a/src/js-host-api/README.md +++ b/src/js-host-api/README.md @@ -315,15 +315,15 @@ sequenceDiagram Note over Guest,Host: Registration (before loadRuntime) Host->>Bridge: proto.hostModule('math').register('add', callback) - Bridge->>HL: register_raw('math', 'add', closure) + Bridge->>HL: Stores closure in HostModule Note over Guest,Host: Invocation (during callHandler) Guest->>HL: math.add(1, 2) - HL->>Bridge: closure("[1,2]") - Bridge->>Host: callback("[1,2]") + HL->>Bridge: Dispatch args + optional binary sidecar + Bridge->>Host: callback(1, 2) Note right of Host: sync: return immediately
async: await Promise - Host-->>Bridge: "3" - Bridge-->>HL: Ok("3") + Host-->>Bridge: 3 + Bridge-->>HL: Tagged result HL-->>Guest: 3 ``` @@ -357,12 +357,15 @@ const result = await loaded.callHandler('handler', { a: 10, b: 32 }); console.log(result); // { result: 42 } ``` -### The JSON Wire Protocol +### The Wire Protocol -All arguments and return values cross the sandbox boundary as **JSON strings**. +Arguments and return values cross the sandbox boundary as **JSON strings**, +except binary data (`Uint8Array`/`Buffer`) which are carried separately in a +raw **binary sidecar**. With `register()`, this is handled automatically — your callback receives individual arguments (parsed from the JSON array) and the return value is -automatically `JSON.stringify`'d. +automatically `JSON.stringify`'d. Top-level `Buffer` arguments and returns +are passed as raw bytes via the sidecar channel. ```javascript // Guest calls: math.add(1, 2) @@ -370,6 +373,11 @@ automatically `JSON.stringify`'d. // Your return value: 3 — automatically stringified to '3' math.register('add', (a, b) => a + b); +// Guest calls: crypto.hash(new Uint8Array([1,2,3])) +// Your callback receives: (Buffer) — a native Node.js Buffer +// Return a Buffer → becomes Uint8Array on guest side +crypto.register('hash', (data) => createHash('sha256').update(data).digest()); + // Guest calls: db.query("users") // Your callback receives: ("users") — the single string arg // Your return value is automatically stringified @@ -397,9 +405,9 @@ const math = proto.hostModule('math'); Throws `ERR_INVALID_ARG` if name is empty. -#### `builder.register(name, callback)` → `HostModule` +#### `builder.register(name, callback)` → `void` -Registers a function within the module. Returns the builder for chaining. +Registers a function within the module. Arguments are auto-parsed from the guest's JSON array and spread into your callback. The return value is automatically `JSON.stringify`'d. @@ -452,6 +460,66 @@ Node.js code. > Node.js main thread and waits for the result via a oneshot channel. This > allows both sync and async JS callbacks to work transparently. +### Binary Data (Buffers) + +Host functions natively support `Uint8Array`/`Buffer` arguments and returns. +Binary data travels through a dedicated sidecar channel, keeping overhead +minimal. Top-level `Buffer` arguments and returns are passed as raw bytes +with no encoding. Nested `Buffer`/`Uint8Array` values inside returned objects +and arrays are also extracted into the binary sidecar and restored on the +guest side. + +```javascript +const { SandboxBuilder } = require('@hyperlight/js-host-api'); +const { createHash } = require('crypto'); +const zlib = require('zlib'); + +const proto = await new SandboxBuilder().build(); + +// Buffer arguments: guest Uint8Array → host Buffer +proto.hostModule('crypto').register('sha256', (data) => { + // data is a Node.js Buffer + return createHash('sha256').update(data).digest(); // returns Buffer +}); + +// Mixed args: regular values and Buffers together +proto.hostModule('io').register('compress', (algorithm, data) => { + // algorithm is a string, data is a Buffer + if (algorithm === 'gzip') return zlib.gzipSync(data); + return data; +}); + +const sandbox = await proto.loadRuntime(); +sandbox.addHandler('handler', ` + import * as crypto from "host:crypto"; + import * as io from "host:io"; + function handler(event) { + const data = new Uint8Array([72, 101, 108, 108, 111]); + const hash = crypto.sha256(data); // Uint8Array + const compressed = io.compress('gzip', data); // Uint8Array + return { hashLen: hash.length, compLen: compressed.length }; + } +`); + +const loaded = await sandbox.getLoadedSandbox(); +const result = await loaded.callHandler('handler', {}); +``` + +**How it works under the hood:** + +1. Guest `Uint8Array` args are extracted from the QuickJS VM and packed + into a length-prefixed binary sidecar alongside JSON placeholders +2. The sidecar crosses the hypervisor boundary as raw bytes (no encoding) +3. On the host side, placeholders are replaced with native Node.js `Buffer` + objects via the NAPI API — your callback receives real Buffers +4. `Buffer` returns (including Buffers nested inside returned objects/arrays) + are detected recursively, extracted into the sidecar, and arrive on the + guest side as `Uint8Array` with JSON placeholders restored + +> **Note:** Nested `Buffer`/`Uint8Array` values in returned objects/arrays are +> fully supported and transported via the same binary sidecar mechanism as +> top-level Buffers, so you can freely mix structured JSON and binary data. + ### Error Handling If your callback throws (sync) or rejects (async), the error propagates @@ -553,8 +621,10 @@ spawn_blocking thread Node.js main thread 7. block_on(receiver) gets the Promise 8. await Promise -9. JSON stringify result -10. Return to guest +9. Return result to guest + (Buffer → binary tag, + other → JSON stringify) +10. Guest receives value ``` This design: diff --git a/src/js-host-api/lib.js b/src/js-host-api/lib.js index 64281bf..1d1e13b 100644 --- a/src/js-host-api/lib.js +++ b/src/js-host-api/lib.js @@ -181,15 +181,34 @@ ProtoJSSandbox.prototype.hostModule = wrapSync(ProtoJSSandbox.prototype.hostModu }); } -// HostModule — register() +// HostModule — register() with Buffer support { const origRegister = HostModule.prototype.register; if (!origRegister) throw new Error('Cannot wrap missing method: HostModule.register'); HostModule.prototype.register = wrapSync(function (name, callback) { - // the rust code expects the host function to return a Promise, so we wrap the callback result in Promise.resolve().then(..) to allow sync functions as well - // note that Promise.resolve(callback(...args)) would not work because if callback throws that would not return a rejected promise, it would just throw before returning the promise. + // Wrap the callback to handle Buffer returns. + // Args: Rust creates native Buffer objects directly via the + // NAPI C API — no conversion needed on the JS side. + // Returns: Top-level Buffer/Uint8Array is ensured to be a Buffer + // so Rust's napi_is_buffer detects it. For other return + // values (objects, arrays), Rust's recursive NAPI walker + // extracts nested Buffers into the binary sidecar + // directly — no base64 conversion needed on the JS side. return origRegister.call(this, name, (...args) => - Promise.resolve().then(() => callback(...args)) + Promise.resolve() + .then(() => callback(...args)) + .then((result) => { + if (Buffer.isBuffer(result)) { + // Already a Buffer — return as-is to avoid an unnecessary copy. + return result; + } + if (result instanceof Uint8Array) { + // Plain Uint8Array — wrap in Buffer so Rust's + // napi_is_buffer detects it. + return Buffer.from(result); + } + return result; + }) ); }); } diff --git a/src/js-host-api/src/lib.rs b/src/js-host-api/src/lib.rs index b3c1e88..100455e 100644 --- a/src/js-host-api/src/lib.rs +++ b/src/js-host-api/src/lib.rs @@ -22,7 +22,7 @@ use hyperlight_js::{ CpuTimeMonitor, ExecutionStats, HyperlightError, InterruptHandle, JSSandbox, LoadedJSSandbox, ProtoJSSandbox, SandboxBuilder, Script, Snapshot, WallClockMonitor, }; -use napi::bindgen_prelude::{JsValuesTupleIntoVec, Promise, ToNapiValue}; +use napi::bindgen_prelude::{FromNapiValue, JsValuesTupleIntoVec, Promise, ToNapiValue}; use napi::sys::{napi_env, napi_value}; use napi::threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode}; use napi::{tokio, Status}; @@ -526,9 +526,9 @@ impl ProtoJSSandboxWrapper { module_name: String, function_name: String, func: ThreadsafeFunction< - Rest>, - Promise>, - Rest>, + Rest>, + Promise>, + Rest>, Status, false, true, @@ -552,6 +552,680 @@ impl JsValuesTupleIntoVec for Rest { } } +// ── Native Buffer marshalling ──────────────────────────────────────── +// +// These types handle binary data between Rust and JS callbacks. Blobs +// are extracted from the JS object tree and placed in a binary sidecar +// with `{"__bin__": N}` placeholders in the JSON. On the JS side we +// create/detect native Node.js `Buffer` objects directly via the NAPI +// C API — no base64 encoding needed. + +/// A JS argument value that can contain native Buffers in place of +/// `{"__bin__": N}` placeholder objects. +/// +/// When napi-rs calls `ToNapiValue` to convert this into a JS value, +/// the recursive converter walks the JSON tree and creates real Buffer +/// objects at placeholder positions — no base64 encoding needed. +pub struct JsArg { + /// The JSON value tree, potentially containing `{"__bin__": N}` placeholders. + value: serde_json::Value, + /// Shared reference to the decoded binary blobs. Placeholders index + /// into this Vec. + blobs: Arc>>, +} + +impl ToNapiValue for JsArg { + /// # Safety + /// + /// Must be called on the JS thread with a valid `napi_env`. + unsafe fn to_napi_value(env: napi_env, val: Self) -> napi::Result { + if val.blobs.is_empty() { + // Fast path: no binary data — delegate entirely to napi-rs's + // built-in serde_json conversion (avoids the recursive walk). + // SAFETY: env is valid, val.value is a valid serde_json::Value. + return unsafe { serde_json::Value::to_napi_value(env, val.value) }; + } + // SAFETY: env is valid, blobs contains valid byte slices. + unsafe { json_to_napi_with_buffers(env, val.value, &val.blobs, 0) } + } +} + +/// A JS return value that may contain native Buffers. +/// +/// When napi-rs converts the JS callback's return value, we recursively +/// walk the value tree, extracting any nested Buffers/Uint8Arrays into +/// a blob Vec and replacing them with `{"__bin__": N}` placeholders — +/// the same sidecar pattern used for arguments. +pub enum JsReturn { + /// JSON value with any nested Buffers extracted into blobs. + /// The JSON may contain `{"__bin__": N}` placeholders. + Value(serde_json::Value, Vec>), + /// Top-level Buffer — extracted directly, no JSON wrapping. + Buffer(Vec), +} + +impl FromNapiValue for JsReturn { + /// # Safety + /// + /// Must be called on the JS thread with a valid `napi_env` and `napi_value`. + unsafe fn from_napi_value(env: napi_env, val: napi_value) -> napi::Result { + // Check for Buffer first — this is a fast C-level type check. + let mut is_buffer = false; + // SAFETY: env and val are valid napi handles. + let status = unsafe { napi::sys::napi_is_buffer(env, val, &mut is_buffer) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to check buffer type", + )); + } + if is_buffer { + // SAFETY: env and val are valid, val is confirmed to be a buffer. + let bytes = unsafe { extract_buffer_bytes(env, val)? }; + return Ok(JsReturn::Buffer(bytes)); + } + + // Not a top-level buffer — recursively walk the value tree, + // extracting any nested Buffers/Uint8Arrays into blobs. + let mut blobs = Vec::new(); + // SAFETY: env and val are valid napi handles. + let json = unsafe { napi_to_json_with_buffer_extraction(env, val, &mut blobs, 0)? }; + Ok(JsReturn::Value(json, blobs)) + } +} + +/// Recursively converts a `serde_json::Value` into a `napi_value`, +/// replacing `{"__bin__": N}` placeholders with native Node.js Buffers. +/// +/// Non-container values (strings, numbers, booleans, null) are delegated +/// to napi-rs's built-in `serde_json::Value` → JS conversion. +/// +/// # Safety +/// +/// Caller must ensure `env` is a valid napi environment. +unsafe fn json_to_napi_with_buffers( + env: napi_env, + value: serde_json::Value, + blobs: &[Vec], + depth: usize, +) -> napi::Result { + use hyperlight_js_common::PLACEHOLDER_BIN; + + if depth > hyperlight_js_common::MAX_JSON_DEPTH { + return Err(napi::Error::from_reason(format!( + "JSON nesting depth exceeds maximum ({})", + hyperlight_js_common::MAX_JSON_DEPTH + ))); + } + + match value { + serde_json::Value::Object(obj) => { + // Check for __bin__ placeholder: {"__bin__": N} + if obj.len() == 1 + && let Some(serde_json::Value::Number(n)) = obj.get(PLACEHOLDER_BIN) + && let Some(idx) = n.as_u64() + { + let idx = idx as usize; + if idx < blobs.len() { + // SAFETY: env is valid, blobs[idx] is a valid byte slice. + return unsafe { create_napi_buffer(env, &blobs[idx]) }; + } + return Err(napi::Error::from_reason(format!( + "Binary placeholder index {idx} out of bounds (have {} blobs)", + blobs.len() + ))); + } + + // Regular object — recursively convert properties + let mut js_obj: napi_value = std::ptr::null_mut(); + // SAFETY: env is valid. + let status = unsafe { napi::sys::napi_create_object(env, &mut js_obj) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to create JS object", + )); + } + + for (key, val) in obj { + // SAFETY: env is valid, recursive call maintains invariants. + let js_val = unsafe { json_to_napi_with_buffers(env, val, blobs, depth + 1)? }; + // Use napi_create_string_utf8 + napi_set_property instead of + // CString + napi_set_named_property so keys with embedded NUL + // bytes are supported. + let key_bytes = key.as_bytes(); + let mut key_val: napi_value = std::ptr::null_mut(); + // SAFETY: env is valid; key_bytes is valid for its length. + let status = unsafe { + napi::sys::napi_create_string_utf8( + env, + key_bytes.as_ptr().cast(), + key_bytes.len(), + &mut key_val, + ) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to create property key string", + )); + } + // SAFETY: env, js_obj, key_val, js_val are all valid. + let status = unsafe { napi::sys::napi_set_property(env, js_obj, key_val, js_val) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to set object property", + )); + } + } + Ok(js_obj) + } + serde_json::Value::Array(arr) => { + let len = arr.len(); + if len > u32::MAX as usize { + return Err(napi::Error::from_reason(format!( + "Array length {len} exceeds u32::MAX" + ))); + } + let mut js_arr: napi_value = std::ptr::null_mut(); + // SAFETY: env is valid. + let status = unsafe { napi::sys::napi_create_array_with_length(env, len, &mut js_arr) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to create JS array", + )); + } + + for (i, val) in arr.into_iter().enumerate() { + // SAFETY: env is valid, recursive call maintains invariants. + let js_val = unsafe { json_to_napi_with_buffers(env, val, blobs, depth + 1)? }; + // SAFETY: env, js_arr, js_val are valid; i is in bounds. + let status = unsafe { napi::sys::napi_set_element(env, js_arr, i as u32, js_val) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to set array element", + )); + } + } + Ok(js_arr) + } + // Non-container values can't contain placeholders — delegate to napi-rs + // SAFETY: env is valid, other is a valid serde_json::Value. + other => unsafe { serde_json::Value::to_napi_value(env, other) }, + } +} + +/// Creates a native Node.js Buffer by copying raw bytes into V8's heap. +/// +/// # Safety +/// +/// Caller must ensure `env` is a valid napi environment. +unsafe fn create_napi_buffer(env: napi_env, data: &[u8]) -> napi::Result { + let mut buf: napi_value = std::ptr::null_mut(); + // SAFETY: env is valid, data is a valid byte slice. + let status = unsafe { + napi::sys::napi_create_buffer_copy( + env, + data.len(), + data.as_ptr().cast(), + std::ptr::null_mut(), // we don't need the result_data pointer + &mut buf, + ) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to create Buffer", + )); + } + Ok(buf) +} + +/// Extracts raw bytes from a Node.js Buffer (`napi_is_buffer` must be true). +/// +/// # Safety +/// +/// Caller must ensure `env` is valid and `val` is a confirmed buffer. +unsafe fn extract_buffer_bytes(env: napi_env, val: napi_value) -> napi::Result> { + let mut data = std::ptr::null_mut(); + let mut len = 0; + // SAFETY: env and val are valid, val is confirmed to be a buffer. + let status = unsafe { napi::sys::napi_get_buffer_info(env, val, &mut data, &mut len) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get buffer info", + )); + } + // Handle empty buffers: napi_get_buffer_info returns data=null, len=0 + // for empty buffers. slice::from_raw_parts requires non-null pointer + // even for zero-length slices, so we handle this case specially. + if len == 0 { + return Ok(Vec::new()); + } + // Non-empty buffer: data must be valid and non-null. + if data.is_null() { + return Err(napi::Error::from_reason( + "Buffer has null data pointer with non-zero length - backing store may have been garbage collected", + )); + } + // SAFETY: data points to len bytes of valid buffer memory. + Ok(unsafe { std::slice::from_raw_parts(data as *const u8, len) }.to_vec()) +} + +/// Extracts raw bytes from a Uint8Array typed array. +/// +/// # Safety +/// +/// Caller must ensure `env` is valid and `val` is a confirmed typed array +/// of type `napi_uint8_array`. +unsafe fn extract_typed_array_bytes(env: napi_env, val: napi_value) -> napi::Result> { + let mut array_type = napi::sys::TypedarrayType::int8_array; + let mut length = 0; + let mut data = std::ptr::null_mut(); + let mut arraybuffer: napi_value = std::ptr::null_mut(); + let mut byte_offset = 0; + // SAFETY: env and val are valid, val is a typed array. + let status = unsafe { + napi::sys::napi_get_typedarray_info( + env, + val, + &mut array_type, + &mut length, + &mut data, + &mut arraybuffer, + &mut byte_offset, + ) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get typed array info", + )); + } + if length == 0 { + return Ok(Vec::new()); + } + if data.is_null() { + return Err(napi::Error::from_reason( + "TypedArray has null data pointer with non-zero length", + )); + } + // SAFETY: data points to length bytes of valid memory (u8 elements). + Ok(unsafe { std::slice::from_raw_parts(data as *const u8, length) }.to_vec()) +} + +/// Extracts a UTF-8 string from a napi string value. +/// +/// # Safety +/// +/// Caller must ensure `env` is valid and `val` is a string-type napi_value. +unsafe fn napi_string_to_rust(env: napi_env, val: napi_value) -> napi::Result { + let mut len = 0; + // First call with null buffer to get the length (excluding null terminator). + // SAFETY: env and val are valid. + let status = unsafe { + napi::sys::napi_get_value_string_utf8(env, val, std::ptr::null_mut(), 0, &mut len) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get string length", + )); + } + let mut buf = vec![0u8; len + 1]; + let mut written = 0; + // SAFETY: env and val are valid, buf is large enough. + let status = unsafe { + napi::sys::napi_get_value_string_utf8( + env, + val, + buf.as_mut_ptr().cast::(), + len + 1, + &mut written, + ) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get string value", + )); + } + String::from_utf8(buf[..written].to_vec()) + .map_err(|_| napi::Error::from_reason("String contains invalid UTF-8")) +} + +/// Recursively converts a `napi_value` into a `serde_json::Value`, +/// extracting any nested `Buffer`/`Uint8Array` values into `blobs` +/// and replacing them with `{"__bin__": N}` placeholders. +/// +/// This is the inverse of [`json_to_napi_with_buffers`] — used on +/// the return path to eliminate the base64 round-trip that previously +/// occurred for nested binary data in host function returns. +/// +/// # Safety +/// +/// Caller must ensure `env` is a valid napi environment and `val` +/// is a valid napi_value. +unsafe fn napi_to_json_with_buffer_extraction( + env: napi_env, + val: napi_value, + blobs: &mut Vec>, + depth: usize, +) -> napi::Result { + use hyperlight_js_common::PLACEHOLDER_BIN; + + if depth > hyperlight_js_common::MAX_JSON_DEPTH { + return Err(napi::Error::from_reason(format!( + "Return value nesting depth exceeds maximum ({})", + hyperlight_js_common::MAX_JSON_DEPTH + ))); + } + + // Get the JS type of this value. + let mut value_type = napi::sys::ValueType::napi_undefined; + // SAFETY: env and val are valid. + let status = unsafe { napi::sys::napi_typeof(env, val, &mut value_type) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get value type", + )); + } + + match value_type { + napi::sys::ValueType::napi_undefined | napi::sys::ValueType::napi_null => { + Ok(serde_json::Value::Null) + } + napi::sys::ValueType::napi_boolean => { + let mut result = false; + // SAFETY: env and val are valid, val is a boolean. + let status = unsafe { napi::sys::napi_get_value_bool(env, val, &mut result) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get boolean value", + )); + } + Ok(serde_json::Value::Bool(result)) + } + napi::sys::ValueType::napi_number => { + let mut result = 0.0; + // SAFETY: env and val are valid, val is a number. + let status = unsafe { napi::sys::napi_get_value_double(env, val, &mut result) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get number value", + )); + } + if result.is_finite() { + // Emit whole-number floats as integers to match JSON.stringify + // behaviour (e.g. 42.0 → 42, not 42.0). + if result == (result as i64) as f64 + && result >= i64::MIN as f64 + && result <= i64::MAX as f64 + { + return Ok(serde_json::Value::Number((result as i64).into())); + } + if let Some(n) = serde_json::Number::from_f64(result) { + return Ok(serde_json::Value::Number(n)); + } + } + // NaN / Infinity → null (like JSON.stringify) + Ok(serde_json::Value::Null) + } + napi::sys::ValueType::napi_string => { + // SAFETY: env and val are valid, val is a string. + let s = unsafe { napi_string_to_rust(env, val)? }; + Ok(serde_json::Value::String(s)) + } + napi::sys::ValueType::napi_object => { + // Check for Buffer first. + let mut is_buffer = false; + // SAFETY: env and val are valid. + let status = unsafe { napi::sys::napi_is_buffer(env, val, &mut is_buffer) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to check buffer type", + )); + } + if is_buffer { + // SAFETY: val is confirmed to be a buffer. + let bytes = unsafe { extract_buffer_bytes(env, val)? }; + let idx = blobs.len(); + blobs.push(bytes); + return Ok(serde_json::json!({ PLACEHOLDER_BIN: idx })); + } + + // Check for Uint8Array (TypedArray with u8 element type). + let mut is_typedarray = false; + // SAFETY: env and val are valid. + let status = unsafe { napi::sys::napi_is_typedarray(env, val, &mut is_typedarray) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to check typed array type", + )); + } + if is_typedarray { + // Probe the typed array type — only extract Uint8Array. + let mut array_type = napi::sys::TypedarrayType::int8_array; + // N-API requires valid pointers for all out-parameters. + let mut length: usize = 0; + let mut data: *mut std::ffi::c_void = std::ptr::null_mut(); + let mut arraybuffer: napi_value = std::ptr::null_mut(); + let mut byte_offset: usize = 0; + // SAFETY: env and val are valid, val is a typed array. + let status = unsafe { + napi::sys::napi_get_typedarray_info( + env, + val, + &mut array_type, + &mut length, + &mut data, + &mut arraybuffer, + &mut byte_offset, + ) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get typed array type info", + )); + } + if array_type == napi::sys::TypedarrayType::uint8_array { + // SAFETY: val is confirmed Uint8Array. + let bytes = unsafe { extract_typed_array_bytes(env, val)? }; + let idx = blobs.len(); + blobs.push(bytes); + return Ok(serde_json::json!({ PLACEHOLDER_BIN: idx })); + } + // Other typed arrays (Int16, Float32, etc.) — let them fall + // through to the standard object property iteration, which + // will serialize their numeric indices like JSON.stringify. + } + + // Check for Array. + let mut is_array = false; + // SAFETY: env and val are valid. + let status = unsafe { napi::sys::napi_is_array(env, val, &mut is_array) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to check array type", + )); + } + if is_array { + let mut length: u32 = 0; + // SAFETY: env and val are valid, val is an array. + let status = unsafe { napi::sys::napi_get_array_length(env, val, &mut length) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get array length", + )); + } + let mut arr = Vec::with_capacity(length as usize); + for i in 0..length { + let mut elem: napi_value = std::ptr::null_mut(); + // SAFETY: env, val are valid; i is in bounds. + let status = unsafe { napi::sys::napi_get_element(env, val, i, &mut elem) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get array element", + )); + } + // SAFETY: env and elem are valid. + arr.push(unsafe { + napi_to_json_with_buffer_extraction(env, elem, blobs, depth + 1)? + }); + } + return Ok(serde_json::Value::Array(arr)); + } + + // Check for objects with toJSON() (Date, custom serializable types). + // JSON.stringify calls toJSON() when present — we do the same to + // match its behaviour (e.g. Date → ISO string, not empty object). + { + let method_name = b"toJSON\0"; + let mut has_method = false; + // SAFETY: env, val are valid; method_name is null-terminated. + let status = unsafe { + napi::sys::napi_has_named_property( + env, + val, + method_name.as_ptr().cast(), + &mut has_method, + ) + }; + if status == napi::sys::Status::napi_ok && has_method { + let mut to_json_fn: napi_value = std::ptr::null_mut(); + let mut method_key: napi_value = std::ptr::null_mut(); + // SAFETY: env is valid, "toJSON" is valid UTF-8. + let status = unsafe { + napi::sys::napi_create_string_utf8( + env, + method_name.as_ptr().cast(), + 6, // length of "toJSON" without null + &mut method_key, + ) + }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to create toJSON key string", + )); + } + // SAFETY: env, val, method_key are valid. + let status = unsafe { + napi::sys::napi_get_property(env, val, method_key, &mut to_json_fn) + }; + if status == napi::sys::Status::napi_ok { + // Verify it's a function. + let mut fn_type = napi::sys::ValueType::napi_undefined; + let _ = unsafe { napi::sys::napi_typeof(env, to_json_fn, &mut fn_type) }; + if fn_type == napi::sys::ValueType::napi_function { + let mut json_result: napi_value = std::ptr::null_mut(); + // SAFETY: env, to_json_fn (function), val (this) are valid. + let status = unsafe { + napi::sys::napi_call_function( + env, + val, + to_json_fn, + 0, + std::ptr::null(), + &mut json_result, + ) + }; + if status != napi::sys::Status::napi_ok { + // toJSON() threw — propagate the exception rather + // than falling through to property iteration + // (matches JSON.stringify behaviour). + return Err(napi::Error::new( + napi::Status::from(status), + "toJSON() call failed", + )); + } + // Recurse on the toJSON() result (e.g. Date returns a string). + return unsafe { + napi_to_json_with_buffer_extraction( + env, + json_result, + blobs, + depth + 1, + ) + }; + } + } + } + } + + // Regular object — iterate own enumerable property names. + let mut property_names: napi_value = std::ptr::null_mut(); + // SAFETY: env and val are valid. + let status = + unsafe { napi::sys::napi_get_property_names(env, val, &mut property_names) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get property names", + )); + } + let mut num_keys: u32 = 0; + // SAFETY: env and property_names are valid (property_names is an array). + let status = + unsafe { napi::sys::napi_get_array_length(env, property_names, &mut num_keys) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get property count", + )); + } + + let mut obj = serde_json::Map::with_capacity(num_keys as usize); + for i in 0..num_keys { + let mut key: napi_value = std::ptr::null_mut(); + // SAFETY: env and property_names are valid; i is in bounds. + let status = + unsafe { napi::sys::napi_get_element(env, property_names, i, &mut key) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get property name", + )); + } + // SAFETY: key is a valid string napi_value. + let key_str = unsafe { napi_string_to_rust(env, key)? }; + let mut prop_val: napi_value = std::ptr::null_mut(); + // SAFETY: env, val, key are valid. + let status = unsafe { napi::sys::napi_get_property(env, val, key, &mut prop_val) }; + if status != napi::sys::Status::napi_ok { + return Err(napi::Error::new( + napi::Status::from(status), + "Failed to get property value", + )); + } + // SAFETY: env and prop_val are valid. + let json_val = unsafe { + napi_to_json_with_buffer_extraction(env, prop_val, blobs, depth + 1)? + }; + obj.insert(key_str, json_val); + } + Ok(serde_json::Value::Object(obj)) + } + // Symbols, functions, bigints, external — currently coerced to JSON null for host + // return values. Note: this intentionally differs from JSON.stringify, which e.g. + // throws on BigInt and omits certain properties instead of serializing them to null. + _ => Ok(serde_json::Value::Null), + } +} + // ── HostModule ─────────────────────────────────────────────────────── /// A builder for registering host functions in a named module. @@ -598,6 +1272,11 @@ impl HostModuleWrapper { /// Both sync and async callbacks are supported — if the callback /// returns a `Promise`, the bridge awaits it automatically. /// + /// **Binary data support**: `Uint8Array`/`Buffer` arguments from guest + /// code are automatically converted to Node.js `Buffer` objects before + /// being passed to your callback. If your callback returns a `Buffer`, + /// it will be converted back to a `Uint8Array` on the guest side. + /// /// Registering a function with the same name as an existing one in /// this module overwrites the previous registration. /// @@ -612,6 +1291,12 @@ impl HostModuleWrapper { /// const res = await fetch(url); /// return res.json(); /// }); + /// + /// // Binary data — Buffer args/returns work natively + /// math.register('compress', (data) => { + /// // data is a Buffer if guest passed Uint8Array + /// return zlib.gzipSync(data); // Return Buffer → Uint8Array on guest + /// }); /// ``` /// /// @param name - Function name within the module (must be non-empty) @@ -623,9 +1308,9 @@ impl HostModuleWrapper { &self, name: String, func: ThreadsafeFunction< - Rest>, - Promise>, - Rest>, + Rest>, + Promise>, + Rest>, Status, false, true, @@ -634,14 +1319,46 @@ impl HostModuleWrapper { if name.is_empty() { return Err(invalid_arg_error("Function name must not be empty")); } - let wrapper = move |args: String| -> hyperlight_js::Result { + + // Use binary-capable registration to support Buffer arguments. + // The closure receives parsed JsonValue args (with {"__bin__": N} + // placeholders) and decoded binary blobs. JsArg's ToNapiValue + // impl converts placeholders directly to native Node.js Buffers + // via the NAPI API — no base64 encoding needed. + let wrapper = move |args: serde_json::Value, + blobs: Vec>| + -> hyperlight_js::Result { + use hyperlight_js::FnReturn; use ThreadsafeFunctionCallMode::NonBlocking; - let args: Vec> = serde_json::from_str(&args)?; + + let blobs = Arc::new(blobs); + + // Spread the JSON array into individual JsArg values. + // Each JsArg carries a reference to the blobs so its + // ToNapiValue impl can resolve __bin__ placeholders at + // any nesting depth. + let js_args: Vec> = match args { + JsonValue::Array(arr) => arr + .into_iter() + .map(|v| { + Some(JsArg { + value: v, + blobs: blobs.clone(), + }) + }) + .collect(), + other => vec![Some(JsArg { + value: other, + blobs: blobs.clone(), + })], + }; + let (tx, rx) = oneshot::channel(); - let status = func.call_with_return_value(Rest(args), NonBlocking, move |result, _| { - let _ = tx.send(result); - Ok(()) - }); + let status = + func.call_with_return_value(Rest(js_args), NonBlocking, move |result, _| { + let _ = tx.send(result); + Ok(()) + }); if status != Status::Ok { return Err(HyperlightError::Error(format!( "Host function call failed: {status:?}" @@ -657,14 +1374,28 @@ impl HostModuleWrapper { .await .map_err(|err| HyperlightError::Error(format!("{err}")))?; - let value = serde_json::to_string(&value)?; - Ok(value) + // JsReturn extracts nested Buffers into blobs via the + // recursive NAPI walker — no base64 round-trip needed. + match value { + Some(JsReturn::Buffer(bytes)) => Ok(FnReturn::Binary(bytes)), + Some(JsReturn::Value(v, blobs)) => { + let json = serde_json::to_string(&v)?; + if blobs.is_empty() { + Ok(FnReturn::Json(json)) + } else { + let sidecar = hyperlight_js_common::encode_binaries(&blobs) + .map_err(|e| HyperlightError::Error(format!("{e}")))?; + Ok(FnReturn::JsonWithBinaries(json, sidecar)) + } + } + None => Ok(FnReturn::Json("null".into())), + } }) }; self.sandbox.with_inner_mut(|sandbox| { sandbox .host_module(&self.module_name) - .register_raw(name, wrapper); + .register_js(name, wrapper); Ok(()) })?; Ok(()) diff --git a/src/js-host-api/tests/host-functions.test.js b/src/js-host-api/tests/host-functions.test.js index 9626610..4e3be5a 100644 --- a/src/js-host-api/tests/host-functions.test.js +++ b/src/js-host-api/tests/host-functions.test.js @@ -543,3 +543,291 @@ describe('Multi-sandbox isolation', () => { expect(resultB).toEqual({ sum: 21, product: 10 }); }); }); + +// ── Binary data (Buffer/Uint8Array) ────────────────────────────────── + +describe('Binary data support', () => { + it('should pass Buffer args from guest Uint8Array to host', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('byte_length', (data) => { + expect(Buffer.isBuffer(data)).toBe(true); + return data.length; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const data = new Uint8Array([72, 101, 108, 108, 111]); + return { len: host.byte_length(data) }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ len: 5 }); + }); + + it('should return Buffer from host as Uint8Array on guest', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('get_bytes', () => { + return Buffer.from([1, 2, 3, 4, 5]); + }); + }, + ` + import * as host from "host:host"; + function handler() { + const data = host.get_bytes(); + return { len: data.length, first: data[0], last: data[4] }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ len: 5, first: 1, last: 5 }); + }); + + it('should handle mixed Buffer and JSON args', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('describe', (prefix, data, num) => { + expect(typeof prefix).toBe('string'); + expect(Buffer.isBuffer(data)).toBe(true); + expect(typeof num).toBe('number'); + return `${prefix}-${data.length}-${num}`; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const data = new Uint8Array([10, 20, 30]); + return { result: host.describe("pfx", data, 42) }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ result: 'pfx-3-42' }); + }); + + it('should handle empty Uint8Array', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('check_empty', (data) => { + expect(Buffer.isBuffer(data)).toBe(true); + return data.length; + }); + }, + ` + import * as host from "host:host"; + function handler() { + return { len: host.check_empty(new Uint8Array(0)) }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ len: 0 }); + }); + + it('should handle host returning empty Buffer', async () => { + // Regression: napi_get_buffer_info returns data=null, len=0 for + // empty buffers. JsReturn::from_napi_value must not panic on the + // null pointer — it should return an empty Vec instead. + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('empty_response', () => { + return Buffer.alloc(0); + }); + }, + ` + import * as host from "host:host"; + function handler() { + const data = host.empty_response(); + return { len: data.length, isUint8: data instanceof Uint8Array }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ len: 0, isUint8: true }); + }); + + it('should round-trip binary data (send and receive)', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('echo_bytes', (data) => { + // Return the same Buffer back + return data; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const input = new Uint8Array([0, 127, 128, 255]); + const output = host.echo_bytes(input); + // Verify round-trip preserves all byte values + return { + len: output.length, + b0: output[0], + b1: output[1], + b2: output[2], + b3: output[3], + }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ len: 4, b0: 0, b1: 127, b2: 128, b3: 255 }); + }); + + // ── Nested binary returns ──────────────────────────────────────── + + it('should handle nested Buffer in returned object', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('get_payload', () => { + return { + name: 'test', + data: Buffer.from([10, 20, 30]), + }; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const result = host.get_payload(); + return { + name: result.name, + data_len: result.data.length, + first: result.data[0], + last: result.data[2], + }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ + name: 'test', + data_len: 3, + first: 10, + last: 30, + }); + }); + + it('should handle nested Buffer in returned array', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('get_items', () => { + return [Buffer.from([1, 2, 3]), Buffer.from([4, 5])]; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const items = host.get_items(); + return { + count: items.length, + first_len: items[0].length, + second_len: items[1].length, + first_byte: items[0][0], + }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ + count: 2, + first_len: 3, + second_len: 2, + first_byte: 1, + }); + }); + + it('should handle deeply nested Buffer in returned object', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('get_nested', () => { + return { + outer: { + inner: { + label: 'deep', + data: Buffer.from([0xab, 0xcd]), + }, + }, + }; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const result = host.get_nested(); + return { + inner_len: result.outer.inner.data.length, + first_byte: result.outer.inner.data[0], + label: result.outer.inner.label, + }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ + inner_len: 2, + first_byte: 0xab, + label: 'deep', + }); + }); + + it('should handle mixed Buffers and JSON in returned object', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('get_mixed', () => { + return { + header: Buffer.from([1, 2]), + status: 'ok', + payload: Buffer.from([3, 4, 5]), + count: 42, + }; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const r = host.get_mixed(); + return { + header_len: r.header.length, + payload_len: r.payload.length, + status: r.status, + count: r.count, + h0: r.header[0], + p2: r.payload[2], + }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ + header_len: 2, + payload_len: 3, + status: 'ok', + count: 42, + h0: 1, + p2: 5, + }); + }); + + it('should handle nested Uint8Array (not just Buffer)', async () => { + const loaded = await buildLoadedSandbox( + (proto) => { + proto.hostModule('host').register('get_uint8', () => { + return { data: new Uint8Array([7, 8, 9]) }; + }); + }, + ` + import * as host from "host:host"; + function handler() { + const r = host.get_uint8(); + return { len: r.data.length, first: r.data[0] }; + } + ` + ); + const result = await loaded.callHandler('handler', {}); + expect(result).toEqual({ len: 3, first: 7 }); + }); +});