micro-optimize json_escape to avoid unnecessary mallocs

This commit is contained in:
Mike Dilger 2024-02-17 12:05:50 +13:00
parent 5254a6450d
commit 8cd3f37a8e
2 changed files with 30 additions and 42 deletions

View File

@ -118,13 +118,10 @@ impl<'a> Event<'a> {
output.extend(br#","tags":"#); output.extend(br#","tags":"#);
output.extend(self.tags()?.as_json()); output.extend(self.tags()?.as_json());
output.extend(br#","content":""#); output.extend(br#","content":""#);
// FIXME: we cannot just extend with the raw content, we have to // This is okay if it is not accurate. It generally avoids
// json_escape it first. // lots of little mallocs when the capacity is already allocated
// BUT unfortunately that currently requires a malloc. We will fix output.reserve(self.content().len() * 7 / 6);
// that in the next commit let mut output = json_escape(self.content(), output)?;
let mut escaped_content = vec![0; self.content().len() * 2];
let content_outlen = json_escape(self.content(), &mut escaped_content[..])?;
output.extend(&escaped_content[..content_outlen]);
output.extend(br#"","sig":""#); output.extend(br#"","sig":""#);
let pos = output.len(); let pos = output.len();
output.resize(pos + 128, 0); output.resize(pos + 128, 0);
@ -138,8 +135,10 @@ impl<'a> Event<'a> {
use secp256k1::schnorr::Signature; use secp256k1::schnorr::Signature;
use secp256k1::{Message, XOnlyPublicKey}; use secp256k1::{Message, XOnlyPublicKey};
let mut escaped_content = vec![0; self.content().len() * 2]; // This is okay if it is not accurate. It generally avoids
let outlen = json_escape(self.content(), &mut escaped_content[..])?; // lots of little mallocs when the capacity is already allocated
let escaped_content = Vec::with_capacity(self.content().len() * 7 / 6);
let escaped_content = json_escape(self.content(), escaped_content)?;
let signable = format!( let signable = format!(
r#"[0,"{}",{},{},{},"{}"]"#, r#"[0,"{}",{},{},{},"{}"]"#,
@ -147,7 +146,7 @@ impl<'a> Event<'a> {
self.created_at(), self.created_at(),
self.kind(), self.kind(),
self.tags()?, self.tags()?,
unsafe { std::str::from_utf8_unchecked(&escaped_content[0..outlen]) }, unsafe { std::str::from_utf8_unchecked(&escaped_content[..]) },
); );
drop(escaped_content); drop(escaped_content);

View File

@ -5,49 +5,38 @@ use crate::error::{ChorusError, Error};
// ESCAPES: \" \\ \/ /b /f /n /r /t // ESCAPES: \" \\ \/ /b /f /n /r /t
// UTF ESCAPE: \uXXXX or \uXXXX\uXXXX // UTF ESCAPE: \uXXXX or \uXXXX\uXXXX
#[allow(dead_code)] // FIXME // This escapes a string with JSON escapes. It takes an output buffer,
pub fn json_escape(input: &[u8], out: &mut [u8]) -> Result<usize, Error> { // and returns it filled with the escaped string.
// Write position in the output buffer // For performance you should try to pass in a buffer that is already
let mut write_pos = 0; // allocated big enough.
pub fn json_escape(input: &[u8], mut out: Vec<u8>) -> Result<Vec<u8>, Error> {
// closure to output bytes
let mut output = |s: &[u8]| -> Result<(), Error> {
if out.len() < write_pos + s.len() {
Err(ChorusError::BufferTooSmall.into())
} else {
out[write_pos..write_pos + s.len()].copy_from_slice(s);
write_pos += s.len();
Ok(())
}
};
let mut read_pos: usize = 0; let mut read_pos: usize = 0;
while let Some((codepoint, size)) = next_code_point(&input[read_pos..])? { while let Some((codepoint, size)) = next_code_point(&input[read_pos..])? {
if is_safe_char(codepoint) { if is_safe_char(codepoint) {
output(&input[read_pos..read_pos + size])?; out.extend(&input[read_pos..read_pos + size]);
} else { } else {
match codepoint { match codepoint {
0x08 => output("\\b".as_bytes())?, 0x08 => out.extend("\\b".as_bytes()),
0x09 => output("\\t".as_bytes())?, 0x09 => out.extend("\\t".as_bytes()),
0x0A => output("\\n".as_bytes())?, 0x0A => out.extend("\\n".as_bytes()),
0x0C => output("\\f".as_bytes())?, 0x0C => out.extend("\\f".as_bytes()),
0x0D => output("\\r".as_bytes())?, 0x0D => out.extend("\\r".as_bytes()),
0x22 => output("\\\"".as_bytes())?, 0x22 => out.extend("\\\"".as_bytes()),
0x5C => output("\\\\".as_bytes())?, 0x5C => out.extend("\\\\".as_bytes()),
_ => { _ => {
if codepoint > 0x20 { if codepoint > 0x20 {
panic!("unnecessary encoding requested"); panic!("unnecessary encoding requested");
} }
// This violates NIP-01 which doesn't allow characters like 0x00 // This violates NIP-01 which doesn't allow characters like 0x00
// even though JSON UTF-8 does. // even though JSON UTF-8 does.
output(format!("\\u{:04x}", codepoint).as_bytes())?; out.extend(format!("\\u{:04x}", codepoint).as_bytes());
} }
} }
} }
read_pos += size; read_pos += size;
} }
Ok(write_pos) Ok(out)
} }
macro_rules! output_slice { macro_rules! output_slice {
@ -165,33 +154,33 @@ mod test {
#[test] #[test]
fn test_json_escape() { fn test_json_escape() {
let mut buffer: [u8; 255] = [255; 255]; let mut buffer = Vec::with_capacity(255);
let input = "hello\t\tworld let input = "hello\t\tworld
!!!"; !!!";
let _size = json_escape(input.as_bytes(), &mut buffer).unwrap(); let buffer = json_escape(input.as_bytes(), buffer).unwrap();
assert_eq!(&buffer[0..19], br#"hello\t\tworld\n!!!"#); assert_eq!(&buffer[0..19], br#"hello\t\tworld\n!!!"#);
let input: [u8; 11] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; let input: [u8; 11] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let _size = json_escape(input.as_slice(), &mut buffer).unwrap(); let buffer = json_escape(input.as_slice(), buffer).unwrap();
assert_eq!( assert_eq!(
&buffer[0..54], &buffer[0..54],
br#"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n"# br#"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n"#
); );
let input: [u8; 12] = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]; let input: [u8; 12] = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22];
let _size = json_escape(input.as_slice(), &mut buffer).unwrap(); let buffer = json_escape(input.as_slice(), buffer).unwrap();
assert_eq!( assert_eq!(
&buffer[0..64], &buffer[0..64],
br#"\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016"# br#"\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016"#
); );
let input: [u8; 4] = [32, 33, 34, 35]; let input: [u8; 4] = [32, 33, 34, 35];
let _size = json_escape(input.as_slice(), &mut buffer).unwrap(); let buffer = json_escape(input.as_slice(), buffer).unwrap();
assert_eq!(&buffer[0..5], br##" !\"#"##); assert_eq!(&buffer[0..5], br##" !\"#"##);
let input: [u8; 1] = [92]; let input: [u8; 1] = [92];
let _size = json_escape(input.as_slice(), &mut buffer).unwrap(); let buffer = json_escape(input.as_slice(), buffer).unwrap();
assert_eq!(&buffer[0..2], br#"\\"#); assert_eq!(&buffer[0..2], br#"\\"#);
} }