pub use private::{
MerkleNode, MerkleStream, MerkleTree, MerkleTreeKind, Sha2_256Node, VariantMerkleTree,
VecMerkleTree,
};
mod private {
use crate::{
bterr,
crypto::{Error, HashKind, Result},
trailered::Trailered,
BoxInIoErr, Decompose, FlushMeta, MetaReader, Sectored, WriteInteg, SECTOR_SZ_DEFAULT,
};
use positioned_io::{ReadAt, Size, WriteAt};
use serde::{Deserialize, Serialize};
use std::io;
use strum::EnumDiscriminants;
pub(super) fn log2(n: usize) -> isize {
if 0 == n {
-1
} else {
n.ilog2().try_into().unwrap()
}
}
pub(super) fn exp2(x: isize) -> usize {
if x < 0 {
0
} else {
1 << x
}
}
pub trait MerkleNode: Default + Serialize + for<'de> Deserialize<'de> {
const KIND: HashKind;
fn new<'a, I: Iterator<Item = &'a [u8]>>(parts: I) -> Result<Self>;
fn combine<'a, I: Iterator<Item = &'a [u8]>>(
&mut self,
prefix: I,
left: Option<&'a Self>,
right: Option<&'a Self>,
) -> Result<()>;
fn assert_contains(&self, hash_data: Option<&[u8]>) -> Result<()>;
fn assert_contains_hash_of<'a, I: Iterator<Item = &'a [u8]>>(&self, parts: I)
-> Result<()>;
fn assert_parent_of<'a, I: Iterator<Item = &'a [u8]>>(
&self,
prefix: I,
left: Option<&'a Self>,
right: Option<&'a Self>,
) -> Result<()>;
fn try_as_slice(&self) -> Result<&[u8]>;
fn digest<'a, I: Iterator<Item = &'a [u8]>>(dest: &mut [u8], parts: I) -> Result<()> {
Self::KIND.digest(dest, parts)
}
}
#[derive(Default, Serialize, Deserialize)]
pub struct Sha2_256Node(Option<[u8; HashKind::Sha2_256.len()]>);
impl Sha2_256Node {
fn as_slice(&self) -> Option<&[u8]> {
self.0.as_ref().map(|e| e.as_slice())
}
fn mut_or_init(&mut self) -> &mut [u8] {
if self.0.is_none() {
self.0 = Some([0; HashKind::Sha2_256.len()])
}
self.0.as_mut().unwrap()
}
fn combine_hash_data<'a, I: Iterator<Item = &'a [u8]>, F: FnOnce() -> Result<()>>(
dest: &mut [u8],
prefix: I,
left: Option<&'a [u8]>,
right: Option<&'a [u8]>,
when_neither: F,
) -> Result<()> {
match (left, right) {
(Some(left), Some(right)) => {
Self::digest(dest, prefix.chain([left, right].into_iter()))
}
(Some(left), None) => Self::digest(dest, prefix.chain([left, b"None"].into_iter())),
(None, Some(right)) => {
Self::digest(dest, prefix.chain([b"None", right].into_iter()))
}
(None, None) => when_neither(),
}
}
}
impl MerkleNode for Sha2_256Node {
const KIND: HashKind = HashKind::Sha2_256;
fn new<'a, I: Iterator<Item = &'a [u8]>>(parts: I) -> Result<Self> {
let mut array = [0u8; Self::KIND.len()];
Self::digest(&mut array, parts)?;
Ok(Sha2_256Node(Some(array)))
}
fn combine<'a, I: Iterator<Item = &'a [u8]>>(
&mut self,
prefix: I,
left: Option<&'a Self>,
right: Option<&'a Self>,
) -> Result<()> {
let left = left.and_then(|e| e.as_slice());
let right = right.and_then(|e| e.as_slice());
Self::combine_hash_data(self.mut_or_init(), prefix, left, right, || {
Err(bterr!(
"at least one argument to combine needs to supply data",
))
})
}
fn assert_contains(&self, hash_data: Option<&[u8]>) -> Result<()> {
if self.as_slice() == hash_data {
Ok(())
} else {
Err(bterr!(Error::HashCmpFailure))
}
}
fn assert_contains_hash_of<'a, I: Iterator<Item = &'a [u8]>>(
&self,
parts: I,
) -> Result<()> {
let mut buf = [0u8; Self::KIND.len()];
Self::digest(&mut buf, parts)?;
self.assert_contains(Some(&buf))
}
fn assert_parent_of<'a, I: Iterator<Item = &'a [u8]>>(
&self,
prefix: I,
left: Option<&'a Self>,
right: Option<&'a Self>,
) -> Result<()> {
let slice = match self.as_slice() {
Some(slice) => slice,
None => return Err(bterr!(Error::HashCmpFailure)),
};
let buf = {
let mut buf = [0u8; Self::KIND.len()];
let left = left.and_then(|e| e.as_slice());
let right = right.and_then(|e| e.as_slice());
Self::combine_hash_data(&mut buf, prefix, left, right, || {
Err(bterr!(
"logic error encountered, left or right should have been Some"
))
})?;
buf
};
if slice == buf {
Ok(())
} else {
Err(bterr!(Error::HashCmpFailure))
}
}
fn try_as_slice(&self) -> Result<&[u8]> {
self.0
.as_ref()
.map(|arr| arr.as_slice())
.ok_or_else(|| bterr!("this merkle node is empty"))
}
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct BinTreeIndex(usize);
impl BinTreeIndex {
fn left(self) -> Self {
Self(2 * self.0 + 1)
}
fn right(self) -> Self {
Self(2 * (self.0 + 1))
}
fn parent(self) -> Option<Self> {
if self.0 > 0 {
Some(Self((self.0 - 1) / 2))
} else {
None
}
}
fn ancestors(self) -> impl Iterator<Item = BinTreeIndex> {
struct ParentIter(Option<BinTreeIndex>);
impl Iterator for ParentIter {
type Item = BinTreeIndex;
fn next(&mut self) -> Option<Self::Item> {
let parent = match self.0 {
Some(curr) => curr.parent(),
None => None,
};
self.0 = parent;
parent
}
}
ParentIter(Some(self))
}
}
pub trait MerkleTree: Sectored {
fn assert_root_contains(&mut self, hash_data: Option<&[u8]>) -> Result<()>;
fn write(&mut self, offset: usize, data: &[u8]) -> Result<()>;
fn verify(&self, offset: usize, data: &[u8]) -> Result<()>;
fn root_hash(&self) -> Result<&[u8]>;
}
#[derive(Serialize, Deserialize)]
pub struct VecMerkleTree<T> {
nodes: Vec<T>,
sector_sz: usize,
#[serde(skip)]
root_verified: bool,
}
impl<T> VecMerkleTree<T> {
const LEAF_PREFIX: &'static [u8] = b"Leaf";
const INTERIOR_PREFIX: &'static [u8] = b"Interior";
pub fn empty(sector_sz: usize) -> VecMerkleTree<T> {
VecMerkleTree {
nodes: Vec::new(),
sector_sz,
root_verified: true,
}
}
fn generations(&self) -> isize {
log2(self.nodes.len())
}
fn len(generations: isize) -> usize {
if generations >= 0 {
exp2(generations + 1) - 1
} else {
0
}
}
fn hash_at(&self, index: BinTreeIndex) -> Result<&T> {
self.nodes.get(index.0).ok_or_else(|| {
bterr!(Error::IndexOutOfBounds {
index: index.0,
limit: self.nodes.len(),
})
})
}
fn offset_to_index(&self, offset: usize) -> Result<BinTreeIndex> {
let gens = self.generations();
let sector_index = offset / self.sector_sz;
let index_limit = exp2(gens);
if sector_index >= index_limit {
return Err(bterr!(Error::InvalidOffset {
actual: offset,
limit: index_limit * self.sector_sz,
}));
}
Ok(BinTreeIndex(exp2(gens) - 1 + sector_index))
}
fn leaf_parts(data: &[u8]) -> impl Iterator<Item = &[u8]> {
[Self::LEAF_PREFIX, data].into_iter()
}
fn interior_prefix<'a>() -> impl Iterator<Item = &'a [u8]> {
[Self::INTERIOR_PREFIX].into_iter()
}
}
impl<T: MerkleNode> VecMerkleTree<T> {
fn perc_up(&mut self, start: BinTreeIndex) -> Result<()> {
for index in start.ancestors() {
self.combine_children(index)?;
}
Ok(())
}
fn combine_children(&mut self, index: BinTreeIndex) -> Result<()> {
let left = index.left();
let right = index.right();
let split = index.0 + 1;
let (front, back) = self.nodes.split_at_mut(split);
let dest = &mut front[front.len() - 1];
let left = back.get(left.0 - split);
let right = back.get(right.0 - split);
dest.combine(Self::interior_prefix(), left, right)
.map_err(|_| {
bterr!(Error::IndexOutOfBounds {
index: index.0,
limit: Self::len(self.generations() - 1),
})
})
}
}
impl<T: MerkleNode> MerkleTree for VecMerkleTree<T> {
fn assert_root_contains(&mut self, hash_data: Option<&[u8]>) -> Result<()> {
match self.hash_at(BinTreeIndex(0)) {
Ok(root) => {
root.assert_contains(hash_data)?;
self.root_verified = true;
Ok(())
}
Err(err) => {
let err = err.downcast::<Error>()?;
match err {
Error::IndexOutOfBounds { .. } => {
if hash_data.is_none() {
Ok(())
} else {
Err(bterr!(Error::HashCmpFailure))
}
}
_ => Err(bterr!(err)),
}
}
}
}
fn write(&mut self, offset: usize, data: &[u8]) -> Result<()> {
self.assert_sector_sz(data.len())?;
let sector_index = offset / self.sector_sz;
let generations = self.generations();
let sector_index_sup = exp2(generations);
if sector_index >= sector_index_sup {
let generations_new = log2(sector_index) + 1;
let new_cap = Self::len(generations_new) - self.nodes.len();
self.nodes.reserve_exact(new_cap);
let leaf_ct = self.nodes.len() - Self::len(generations - 1);
let new_len = Self::len(generations_new - 1) + sector_index + 1;
self.nodes.resize_with(new_len, T::default);
let generation_gap = generations_new - generations;
for gen in (0..(generations + 1)).rev() {
let shift = exp2(gen + generation_gap) - exp2(gen);
let start = exp2(gen) - 1;
let end = start
+ if gen == generations {
leaf_ct
} else {
exp2(gen)
};
for index in start..end {
let new_index = index + shift;
self.nodes.swap(index, new_index);
}
}
if generation_gap > 1 && generations >= 0 {
self.perc_up(BinTreeIndex(exp2(generation_gap) - 1))?;
}
}
let index = self.offset_to_index(offset)?;
if index.0 >= self.nodes.len() {
self.nodes.resize_with(index.0 + 1, T::default);
}
self.nodes[index.0] = T::new(Self::leaf_parts(data))?;
self.perc_up(index)
}
fn verify(&self, offset: usize, data: &[u8]) -> Result<()> {
if !self.root_verified {
return Err(bterr!(Error::RootHashNotVerified));
}
self.assert_sector_sz(data.len())?;
let start = self.offset_to_index(offset)?;
self.hash_at(start)?
.assert_contains_hash_of(Self::leaf_parts(data))?;
for index in start.ancestors() {
let parent = self.hash_at(index)?;
let left = self.hash_at(index.left()).ok();
let right = self.hash_at(index.right()).ok();
parent.assert_parent_of(Self::interior_prefix(), left, right)?;
}
Ok(())
}
fn root_hash(&self) -> Result<&[u8]> {
self.nodes
.first()
.map(|node| node.try_as_slice())
.ok_or_else(|| bterr!("the Merkle tree is empty"))?
}
}
impl<T> Sectored for VecMerkleTree<T> {
fn sector_sz(&self) -> usize {
self.sector_sz
}
}
impl<T> Default for VecMerkleTree<T> {
fn default() -> Self {
Self::empty(SECTOR_SZ_DEFAULT)
}
}
#[derive(Serialize, Deserialize, EnumDiscriminants)]
#[strum_discriminants(name(MerkleTreeKind))]
pub enum VariantMerkleTree {
Sha2_256(VecMerkleTree<Sha2_256Node>),
}
impl VariantMerkleTree {
pub fn empty(kind: MerkleTreeKind, sector_sz: usize) -> VariantMerkleTree {
match kind {
MerkleTreeKind::Sha2_256 => {
Self::Sha2_256(VecMerkleTree::<Sha2_256Node>::empty(sector_sz))
}
}
}
}
impl Sectored for VariantMerkleTree {
fn sector_sz(&self) -> usize {
match self {
Self::Sha2_256(tree) => tree.sector_sz(),
}
}
}
impl MerkleTree for VariantMerkleTree {
fn assert_root_contains(&mut self, hash_data: Option<&[u8]>) -> Result<()> {
match self {
Self::Sha2_256(tree) => tree.assert_root_contains(hash_data),
}
}
fn root_hash(&self) -> Result<&[u8]> {
match self {
Self::Sha2_256(tree) => tree.root_hash(),
}
}
fn verify(&self, offset: usize, data: &[u8]) -> Result<()> {
match self {
Self::Sha2_256(tree) => tree.verify(offset, data),
}
}
fn write(&mut self, offset: usize, data: &[u8]) -> Result<()> {
match self {
Self::Sha2_256(tree) => tree.write(offset, data),
}
}
}
impl Default for MerkleTreeKind {
fn default() -> Self {
Self::Sha2_256
}
}
pub struct MerkleStream<T> {
trailered: Trailered<T, VariantMerkleTree>,
tree: VariantMerkleTree,
}
impl<T: MetaReader> MerkleStream<T> {
pub fn assert_root_integrity(&mut self) -> Result<()> {
let hash_data = self.trailered.meta_body().integrity();
self.tree.assert_root_contains(hash_data)
}
}
impl<T: ReadAt + Size + Sectored> MerkleStream<T> {
pub fn new(inner: T) -> Result<MerkleStream<T>> {
let (trailered, tree) = Trailered::new(inner)?;
let tree = tree.unwrap_or_else(|| {
VariantMerkleTree::empty(MerkleTreeKind::default(), trailered.sector_sz())
});
Ok(MerkleStream { trailered, tree })
}
pub fn with_tree(inner: T, tree: VariantMerkleTree) -> Result<MerkleStream<T>> {
let (trailered, trailer) = Trailered::new(inner)?;
if trailer.is_some() {
return Err(bterr!("stream already contained a serialized merkle tree",));
}
Ok(MerkleStream { trailered, tree })
}
}
impl<T> Sectored for MerkleStream<T> {
fn sector_sz(&self) -> usize {
self.tree.sector_sz()
}
}
impl<T> Decompose<T> for MerkleStream<T> {
fn into_inner(self) -> T {
self.trailered.into_inner()
}
}
impl<T: WriteInteg + Size> WriteAt for MerkleStream<T> {
fn write_at(&mut self, pos: u64, buf: &[u8]) -> io::Result<usize> {
self.assert_sector_sz(buf.len())?;
let pos_usize: usize = pos.try_into().box_err()?;
self.tree.write(pos_usize, buf)?;
let written = self.trailered.write_at(pos, buf)?;
Ok(written)
}
fn flush(&mut self) -> io::Result<()> {
let root = self.tree.root_hash()?;
self.trailered.flush_integ(&self.tree, root)
}
}
impl<T: ReadAt + Size> ReadAt for MerkleStream<T> {
fn read_at(&self, pos: u64, buf: &mut [u8]) -> io::Result<usize> {
self.assert_sector_sz(buf.len())?;
self.trailered.read_exact_at(pos, buf)?;
let pos: usize = pos.try_into().box_err()?;
self.tree.verify(pos, buf)?;
Ok(self.sector_sz())
}
}
impl<U, T: AsRef<U>> AsRef<U> for MerkleStream<T> {
fn as_ref(&self) -> &U {
self.trailered.as_ref()
}
}
impl<U, T: AsMut<U>> AsMut<U> for MerkleStream<T> {
fn as_mut(&mut self) -> &mut U {
self.trailered.as_mut()
}
}
impl<T> Size for MerkleStream<T> {
fn size(&self) -> io::Result<Option<u64>> {
self.trailered.size()
}
}
impl<T: FlushMeta> FlushMeta for MerkleStream<T> {
fn flush_meta(&mut self) -> crate::Result<()> {
self.trailered.flush_meta()
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use btserde::{from_vec, to_vec};
use std::io::{Read, Seek, SeekFrom, Write};
use super::{
private::{exp2, log2},
*,
};
use crate::{
test_helpers::{Randomizer, SectoredCursor, SECTOR_SZ_DEFAULT},
Cursor,
};
#[test]
fn log2_test() {
assert_eq!(-1, log2(0));
assert_eq!(0, log2(1));
assert_eq!(1, log2(2));
assert_eq!(2, log2(4));
assert_eq!(2, log2(5));
assert_eq!(3, log2(8));
assert_eq!(9, log2(1023));
assert_eq!(10, log2(1025));
assert_eq!(63, log2(usize::MAX));
}
fn make_tree_with<const SZ: usize>(
num_sects: usize,
) -> (VecMerkleTree<Sha2_256Node>, Vec<[u8; SZ]>) {
let mut tree = VecMerkleTree::<Sha2_256Node>::empty(SZ);
let mut sectors = Vec::with_capacity(num_sects);
for k in 1..(num_sects + 1) {
let offset = SZ * (k - 1);
let sector = [k as u8; SZ];
sectors.push(sector);
tree.write(offset, §or).expect("append sector failed");
}
(tree, sectors)
}
fn merkle_tree_build_verify_test_case<const SZ: usize>(num_sects: usize) {
let (tree, sectors) = make_tree_with::<SZ>(num_sects);
for (k, sector) in sectors.into_iter().enumerate() {
tree.verify(k * SZ, §or).expect("verify failed");
}
}
#[test]
fn merkle_tree_append_verify() {
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(0));
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(1));
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(2));
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(3));
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(4));
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(0) + 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(1) + 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(2) + 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(3) + 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(4) + 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(0) - 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(1) - 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(2) - 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(3) - 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(exp2(4) - 1);
merkle_tree_build_verify_test_case::<SECTOR_SZ_DEFAULT>(1337);
merkle_tree_build_verify_test_case::<512>(37);
}
#[test]
fn merkle_tree_data_changed_verify_fails() {
const SZ: usize = SECTOR_SZ_DEFAULT;
let mut tree = VecMerkleTree::<Sha2_256Node>::empty(SZ);
let one = [1u8; SZ];
let mut two = [2u8; SZ];
let three = [3u8; SZ];
tree.write(0, &one).expect("append one failed");
tree.write(SZ, &two).expect("append two failed");
tree.write(2 * SZ, &three).expect("append three failed");
two[0] = 7u8;
tree.verify(0, &one).expect("failed to verify one");
tree.verify(SZ, &two)
.expect_err("verify two was expected to fail");
tree.verify(2 * SZ, &three).expect("failed to verify three");
}
#[test]
fn merkle_tree_root_not_verified_verify_fails() {
const SZ: usize = SECTOR_SZ_DEFAULT;
let mut tree = VecMerkleTree::<Sha2_256Node>::empty(SZ);
let one = [1u8; SZ];
let two = [2u8; SZ];
let three = [3u8; SZ];
tree.write(0, &one).expect("append one failed");
tree.write(SZ, &two).expect("append two failed");
tree.write(2 * SZ, &three).expect("append three failed");
let vec = to_vec(&tree).expect("to_vec failed");
let tree: VecMerkleTree<Sha2_256Node> = from_vec(&vec).expect("from_vec failed");
tree.verify(SZ, &two)
.expect_err("verify succeeded, though it should have failed");
}
fn merkle_stream_sequential_test_case(sect_sz: usize, sect_ct: usize) {
let tree = VariantMerkleTree::empty(MerkleTreeKind::Sha2_256, sect_sz);
let stream = MerkleStream::with_tree(SectoredCursor::new(Vec::new(), sect_sz), tree)
.expect("read from end failed");
let mut stream = Cursor::new(stream);
for k in 1..(sect_ct + 1) {
let sector = vec![k as u8; sect_sz];
stream.write(§or).expect("write failed");
}
stream.seek(SeekFrom::Start(0)).expect("seek failed");
for k in 1..(sect_ct + 1) {
let expected = vec![k as u8; sect_sz];
let mut actual = vec![0u8; sect_sz];
stream.read(&mut actual).expect("read failed");
assert_eq!(expected, actual);
}
}
#[test]
fn merkle_stream_sequential() {
merkle_stream_sequential_test_case(SECTOR_SZ_DEFAULT, 20);
merkle_stream_sequential_test_case(SECTOR_SZ_DEFAULT, 200);
merkle_stream_sequential_test_case(SECTOR_SZ_DEFAULT, 800);
merkle_stream_sequential_test_case(512, 25);
merkle_stream_sequential_test_case(8192, 20);
}
pub(crate) fn make_merkle_stream_filled_with_zeros(
sect_sz: usize,
sect_ct: usize,
) -> Cursor<MerkleStream<SectoredCursor<Vec<u8>>>> {
let tree = VariantMerkleTree::empty(MerkleTreeKind::Sha2_256, sect_sz);
let stream = MerkleStream::with_tree(SectoredCursor::new(Vec::new(), sect_sz), tree)
.expect("read from end failed");
let zeros = vec![0u8; sect_sz];
let mut stream = Cursor::new(stream);
for _ in 0..sect_ct {
stream.write(&zeros).expect("write zeros failed");
}
stream.seek(SeekFrom::Start(0)).expect("seek failed");
stream
}
fn merkle_stream_random_test_case(rando: Randomizer, sect_sz: usize, sect_ct: usize) {
let mut stream = make_merkle_stream_filled_with_zeros(sect_sz, sect_ct);
let indices: Vec<usize> = rando.take(sect_ct).map(|e| e % sect_ct).collect();
for index in indices.iter().map(|e| *e) {
let offset = sect_sz * index;
stream
.seek(SeekFrom::Start(offset as u64))
.expect("seek to write failed");
let sector = vec![index as u8; sect_sz];
stream.write(§or).expect("write failed");
}
stream.flush().expect("flush failed");
for index in indices.iter().map(|e| *e) {
let offset = sect_sz * index;
stream
.seek(SeekFrom::Start(offset as u64))
.expect("seek to read failed");
let expected = vec![index as u8; sect_sz];
let mut actual = vec![0u8; sect_sz];
stream.read(&mut actual).expect("read failed");
assert_eq!(expected, actual);
}
}
#[test]
fn merkle_stream_random() {
const SEED: [u8; Randomizer::HASH.len()] = [3u8; Randomizer::HASH.len()];
merkle_stream_random_test_case(Randomizer::new(SEED), SECTOR_SZ_DEFAULT, 2);
merkle_stream_random_test_case(Randomizer::new(SEED), SECTOR_SZ_DEFAULT, 4);
merkle_stream_random_test_case(Randomizer::new(SEED), SECTOR_SZ_DEFAULT, 8);
merkle_stream_random_test_case(Randomizer::new(SEED), SECTOR_SZ_DEFAULT, 20);
merkle_stream_random_test_case(Randomizer::new(SEED), SECTOR_SZ_DEFAULT, 200);
merkle_stream_random_test_case(Randomizer::new(SEED), SECTOR_SZ_DEFAULT, 800);
merkle_stream_random_test_case(Randomizer::new(SEED), 8192, 63);
}
}