From 575eec6787645f31755005bad7bccf653d6a47b2 Mon Sep 17 00:00:00 2001 From: Mike Dilger Date: Wed, 13 Dec 2023 17:28:52 +1300 Subject: [PATCH] json event: JSON parsing of event --- src/types/event/json_event.rs | 230 ++++++++++++++++++++++++++++++++++ src/types/event/mod.rs | 12 ++ 2 files changed, 242 insertions(+) create mode 100644 src/types/event/json_event.rs diff --git a/src/types/event/json_event.rs b/src/types/event/json_event.rs new file mode 100644 index 0000000..d3e722b --- /dev/null +++ b/src/types/event/json_event.rs @@ -0,0 +1,230 @@ +use crate::types::parse::json_parse::*; +use crate::Error; + +/// Parses a JSON event from the `input` buffer. Places the parsed event into the `output` buffer. +/// Returns the count of consumed bytes and output bytes +pub fn parse_json_event(input: &[u8], output: &mut [u8]) -> Result<(usize, usize), Error> { + // Minimum-sized JSON event is 204 characters long + // NOTE: 152 is the minimum binary event + if input.len() < 204 { + return Err(Error::JsonBadEvent("Too Short", 0)); + } + + // This tracks where we are currently looking in the input as we scan forward. + // It is short for INput POSition. + let mut inpos = 0; + + // If tags comes before content, content can use this to know where to put itself. + // This is the length of the tags output section. 0 means it hasn't been written yet. + let mut tags_size: usize = 0; + + // If content comes before tags, we cannot write it because we don't know how much + // space Tags will take. So we instead just remember where the content string + // begins so we can write it later. + let mut content_input_start: usize = 0; + + // Remember which fields we have read using bit flags. + // We must get all seven of these fields for an event to be valid. + const HAVE_ID: u8 = 0x1 << 0; + const HAVE_PUBKEY: u8 = 0x1 << 1; + const HAVE_SIG: u8 = 0x1 << 2; + const HAVE_CREATED_AT: u8 = 0x1 << 3; + const HAVE_KIND: u8 = 0x1 << 4; + const HAVE_CONTENT: u8 = 0x1 << 5; + const HAVE_TAGS: u8 = 0x1 << 6; + let mut complete: u8 = 0; + + eat_whitespace(input, &mut inpos); + verify_char(input, b'{', &mut inpos)?; + loop { + eat_whitespace(input, &mut inpos); + + // Presuming that we must have at least one field, we don't have to look + // for the end of the object yet. + + // Move to the start of the field name + verify_char(input, b'"', &mut inpos)?; + + // No matter which field is next, we need at least 7 bytes for the smallest + // field and value: kind":1 + // This allows us to skip length tests below that are shorter than inpos+7 + if inpos + 7 > input.len() { + return Err(Error::JsonBadEvent("Too Short or Missing Fields", inpos)); + } + + if &input[inpos..inpos + 3] == b"id\"" { + if complete & HAVE_ID == HAVE_ID { + return Err(Error::JsonBadEvent("Duplicate id field", inpos)); + } + inpos += 3; + eat_colon_with_whitespace(input, &mut inpos)?; + read_id(input, &mut inpos, &mut output[16..48])?; + complete |= HAVE_ID; + } else if &input[inpos..inpos + 4] == b"sig\"" { + if complete & HAVE_SIG == HAVE_SIG { + return Err(Error::JsonBadEvent("Duplicate sig field", inpos)); + } + inpos += 4; + eat_colon_with_whitespace(input, &mut inpos)?; + read_sig(input, &mut inpos, output)?; + complete |= HAVE_SIG; + } else if &input[inpos..inpos + 5] == b"kind\"" { + if complete & HAVE_KIND == HAVE_KIND { + return Err(Error::JsonBadEvent("Duplicate kind field", inpos)); + } + inpos += 5; + eat_colon_with_whitespace(input, &mut inpos)?; + let kind = read_kind(input, &mut inpos)?; + output[4..6].copy_from_slice(kind.to_ne_bytes().as_slice()); + complete |= HAVE_KIND; + } else if &input[inpos..inpos + 5] == b"tags\"" { + if complete & HAVE_TAGS == HAVE_TAGS { + return Err(Error::JsonBadEvent("Duplicate tags field", inpos)); + } + inpos += 5; + eat_colon_with_whitespace(input, &mut inpos)?; + tags_size = read_tags_array(input, &mut inpos, &mut output[144..])?; + complete |= HAVE_TAGS; + if content_input_start != 0 { + // Content was found earlier than tags. + // Now that tags have been read, we should read the content + read_content(input, &mut content_input_start, output, 144 + tags_size)?; + complete |= HAVE_CONTENT; + } + } else if &input[inpos..inpos + 7] == b"pubkey\"" { + if complete & HAVE_PUBKEY == HAVE_PUBKEY { + return Err(Error::JsonBadEvent("Duplicate pubkey field", inpos)); + } + inpos += 7; + eat_colon_with_whitespace(input, &mut inpos)?; + read_pubkey(input, &mut inpos, &mut output[48..80])?; + complete |= HAVE_PUBKEY; + } else if inpos + 8 <= input.len() && &input[inpos..inpos + 8] == b"content\"" { + if complete & HAVE_CONTENT == HAVE_CONTENT { + return Err(Error::JsonBadEvent("Duplicate pubkey field", inpos)); + } + inpos += 8; + eat_colon_with_whitespace(input, &mut inpos)?; + if tags_size == 0 { + // Oops, we haven't read the tags yet. That means we don't yet know where + // to place the content. In this case we just remember the offset where + // this needs to be done, so we can do this later. + content_input_start = inpos; + // skip past it so we can read the subsequent fields + verify_char(input, b'"', &mut inpos)?; + burn_string(input, &mut inpos)?; + } else { + read_content(input, &mut inpos, output, 144 + tags_size)?; + complete |= HAVE_CONTENT; + } + } else if inpos + 11 <= input.len() && &input[inpos..inpos + 11] == b"created_at\"" { + if complete & HAVE_CREATED_AT == HAVE_CREATED_AT { + return Err(Error::JsonBadEvent("Duplicate created_at field", inpos)); + } + inpos += 11; + eat_colon_with_whitespace(input, &mut inpos)?; + let u = read_u64(input, &mut inpos)?; + output[8..16].copy_from_slice(u.to_ne_bytes().as_slice()); + complete |= HAVE_CREATED_AT; + } else { + burn_key_and_value(input, &mut inpos)?; + } + + // get past the comma, or detect the close brace and exit + if next_object_field(input, &mut inpos)? { + break; + } + } + + if complete == 0b0111_1111 { + Ok(( + inpos, + u32::from_ne_bytes(output[0..4].try_into().unwrap()) as usize, + )) + } else { + Err(Error::JsonBadEvent("Missing Fields", inpos)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + // FIXME this will fail on big-endian machines + #[test] + fn test_parse_json_event() { + let json = br#"{"id":"a9663055164ab8b30d9524656370c4bf93393bb051b7edf4556f40c5298dc0c7","pubkey":"ee11a5dff40c19a555f41fe42b48f00e618c91225622ae37b6c2bb67b76c4e49","created_at":1681778790,"kind":1,"sig":"4dfea1a6f73141d5691e43afc3234dbe73016db0fb207cf247e0127cc2591ee6b4be5b462272030a9bde75882aae810f359682b1b6ce6cbb97201141c576db42","content":"He got snowed in","tags":[["client","gossip"],["p","e2ccf7cf20403f3f2a4a55b328f0de3be38558a7d5f33632fdaaefc726c1c8eb"],["e","2c86abcc98f7fd8a6750aab8df6c1863903f107206cc2d72e8afeb6c38357aed","wss://nostr-pub.wellorder.net/","root"]]}"#; + let mut buffer: Vec = Vec::with_capacity(4096); + buffer.resize(4096, 0); + let (_insize, size) = parse_json_event(&json[..], &mut buffer).unwrap(); + assert_eq!(size, 372); + assert_eq!( + &buffer[0..size], + &[ + 116, 1, 0, 0, // 372 bytes long + 1, 0, 0, 0, // kind 1 + 102, 232, 61, 100, 0, 0, 0, 0, // created at 1681778790 + 169, 102, 48, 85, 22, 74, 184, 179, 13, 149, 36, 101, 99, 112, 196, 191, 147, 57, + 59, 176, 81, 183, 237, 244, 85, 111, 64, 197, 41, 141, 192, 199, // id + 238, 17, 165, 223, 244, 12, 25, 165, 85, 244, 31, 228, 43, 72, 240, 14, 97, 140, + 145, 34, 86, 34, 174, 55, 182, 194, 187, 103, 183, 108, 78, 73, // pubkey + 77, 254, 161, 166, 247, 49, 65, 213, 105, 30, 67, 175, 195, 35, 77, 190, 115, 1, + 109, 176, 251, 32, 124, 242, 71, 224, 18, 124, 194, 89, 30, 230, 180, 190, 91, 70, + 34, 114, 3, 10, 155, 222, 117, 136, 42, 174, 129, 15, 53, 150, 130, 177, 182, 206, + 108, 187, 151, 32, 17, 65, 197, 118, 219, 66, // sig + // 144: + 208, 0, // tags section is 208 bytes long + 3, 0, // there are three tags + 10, 0, // first tag is at offset 10 + 28, 0, // second tag is at offset 28 + 99, 0, // third tag is at offset 99 + // 154: (144+10) + 2, 0, // the first tag has 2 strings + 6, 0, // the first string is 6 bytes long + 99, 108, 105, 101, 110, 116, // "client" + 6, 0, // the second string is 6 bytes long + 103, 111, 115, 115, 105, 112, // "gossip" + // 172: (144+28) + 2, 0, // the second tag has two strings + 1, 0, // the first string is 1 char long + 112, // "p" + 64, 0, // the second string is 64 bytes long + 101, 50, 99, 99, 102, 55, 99, 102, 50, 48, 52, 48, 51, 102, 51, 102, 50, 97, 52, + 97, 53, 53, 98, 51, 50, 56, 102, 48, 100, 101, 51, 98, 101, 51, 56, 53, 53, 56, 97, + 55, 100, 53, 102, 51, 51, 54, 51, 50, 102, 100, 97, 97, 101, 102, 99, 55, 50, 54, + 99, 49, 99, 56, 101, + 98, // "e2ccf7cf20403f3f2a4a55b328f0de3be38558a7d5f33632fdaaefc726c1c8eb" + // 243: (144+99) + 4, 0, // the third tag has 4 strings + 1, 0, // the first string is 1 char long + 101, // "e" + 64, 0, // the second string is 64 bytes long + 50, 99, 56, 54, 97, 98, 99, 99, 57, 56, 102, 55, 102, 100, 56, 97, 54, 55, 53, 48, + 97, 97, 98, 56, 100, 102, 54, 99, 49, 56, 54, 51, 57, 48, 51, 102, 49, 48, 55, 50, + 48, 54, 99, 99, 50, 100, 55, 50, 101, 56, 97, 102, 101, 98, 54, 99, 51, 56, 51, 53, + 55, 97, 101, + 100, // "2c86abcc98f7fd8a6750aab8df6c1863903f107206cc2d72e8afeb6c38357aed" + 30, 0, // the third string is 30 bytes long + 119, 115, 115, 58, 47, 47, 110, 111, 115, 116, 114, 45, 112, 117, 98, 46, 119, 101, + 108, 108, 111, 114, 100, 101, 114, 46, 110, 101, 116, + 47, // "wss://nostr-pub.wellorder.net/" + 4, 0, // the fourth string is 4 bytes long + 114, 111, 111, 116, // "root" + // 352: (144+208) + 16, 0, 0, 0, // the content is 16 bytes long + 72, 101, 32, 103, 111, 116, 32, 115, 110, 111, 119, 101, 100, 32, 105, + 110, // "He got snowed in" + + // 372: + ] + ); + + // Same event in a different order + let json2 = br#"{"kind":1,"pubkey":"ee11a5dff40c19a555f41fe42b48f00e618c91225622ae37b6c2bb67b76c4e49","created_at":1681778790,"sig":"4dfea1a6f73141d5691e43afc3234dbe73016db0fb207cf247e0127cc2591ee6b4be5b462272030a9bde75882aae810f359682b1b6ce6cbb97201141c576db42","tags":[["client","gossip"],["p","e2ccf7cf20403f3f2a4a55b328f0de3be38558a7d5f33632fdaaefc726c1c8eb"],["e","2c86abcc98f7fd8a6750aab8df6c1863903f107206cc2d72e8afeb6c38357aed","wss://nostr-pub.wellorder.net/","root"]],"id":"a9663055164ab8b30d9524656370c4bf93393bb051b7edf4556f40c5298dc0c7","content":"He got snowed in"}"#; + let mut buffer2: Vec = Vec::with_capacity(4096); + buffer2.resize(4096, 0); + let (_insize, size) = parse_json_event(&json2[..], &mut buffer2).unwrap(); + assert_eq!(size, 372); + assert_eq!(&buffer[..372], &buffer2[..372]); + } +} diff --git a/src/types/event/mod.rs b/src/types/event/mod.rs index f22a40c..a384d20 100644 --- a/src/types/event/mod.rs +++ b/src/types/event/mod.rs @@ -2,6 +2,9 @@ use super::{Id, Kind, Pubkey, Sig, Tags, Time}; use crate::Error; use std::fmt; +mod json_event; +use json_event::parse_json_event; + /* * 0 [4 bytes] length of the event structure * 4 [2 bytes] kind @@ -20,6 +23,15 @@ use std::fmt; pub struct Event<'a>(&'a [u8]); impl<'a> Event<'a> { + // Parse json into an Event. Returns the count of consumed input bytes and the Event + pub fn from_json( + json: &[u8], + output_buffer: &'a mut [u8], + ) -> Result<(usize, Event<'a>), Error> { + let (incount, outcount) = parse_json_event(json, output_buffer)?; + Ok((incount, Event(&output_buffer[..outcount]))) + } + // this marks off the slice of bytes that represent an event from a potentially longer input pub fn delineate(input: &'a [u8]) -> Result, Error> { if input.len() < 144 + 4 + 4 {