Skip to content

Commit

Permalink
Use base58 encoded UUIDs for document IDs
Browse files Browse the repository at this point in the history
Problem: the automerge-repo JS implementation uses base68 encoded UUIDs
for it's document IDs. In this library we accept arbitrary strings.

Solution: In preparation for full compatibility with the JS implementation
constrain document IDs to be a UUID. This shouldn't have any
compatibility problems for most users because by default when we create
a document ID we use a UUID.
  • Loading branch information
alexjg committed Dec 13, 2023
1 parent 3ecdcdb commit a393021
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 19 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ tracing = "0.1.37"
ring = "0.16.20"
hex = "0.4.3"
tempfile = "3.6.0"
bs58 = { version = "0.5.0", features = ["check"] }

[dev-dependencies]
clap = { version = "4.2.5", features = ["derive"] }
Expand Down
7 changes: 4 additions & 3 deletions src/fs_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::{
fs::File,
io::Write,
path::{Path, PathBuf},
str,
};

use crate::DocumentId;
Expand Down Expand Up @@ -314,8 +313,10 @@ impl DocIdPaths {

let level2 = level2.file_name()?.to_str()?;
let doc_id_bytes = hex::decode(level2).ok()?;
let doc_id_str = str::from_utf8(&doc_id_bytes).ok()?;
let doc_id = DocumentId::from(doc_id_str);
let Ok(doc_id) = DocumentId::try_from(doc_id_bytes) else {
tracing::error!(level2_path=%level2, "invalid document ID");
return None;
};
let result = Self::from(&doc_id);
if result.prefix != prefix {
None
Expand Down
69 changes: 60 additions & 9 deletions src/interfaces.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use futures::future::BoxFuture;
use serde::{Deserialize, Serialize};
use std::fmt::{Display, Formatter};
use std::{
fmt::{Display, Formatter},
str::FromStr,
};

#[derive(Debug, Eq, Hash, PartialEq, Clone)]
pub struct RepoId(pub String);
Expand All @@ -17,24 +20,72 @@ impl<'a> From<&'a str> for RepoId {
}
}

#[derive(Debug, Eq, Hash, PartialEq, Clone, Deserialize, Serialize)]
pub struct DocumentId(pub String);
#[derive(Eq, Hash, PartialEq, Clone, Deserialize, Serialize)]
pub struct DocumentId([u8; 16]);

impl DocumentId {
pub fn random() -> Self {
Self(uuid::Uuid::new_v4().into_bytes())
}

// This is necessary to make the interop tests work, we'll remove it once
// we upgrade to the latest version of automerge-repo for the interop tests
pub fn as_uuid_str(&self) -> String {
uuid::Uuid::from_slice(self.0.as_ref()).unwrap().to_string()
}
}

impl AsRef<[u8]> for DocumentId {
fn as_ref(&self) -> &[u8] {
self.0.as_ref()
}
}

impl Display for DocumentId {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
#[derive(Debug, thiserror::Error)]
#[error("Invalid document ID: {0}")]
pub struct BadDocumentId(String);

impl TryFrom<Vec<u8>> for DocumentId {
type Error = BadDocumentId;

fn try_from(v: Vec<u8>) -> Result<Self, Self::Error> {
match uuid::Uuid::from_slice(v.as_slice()) {
Ok(id) => Ok(Self(id.into_bytes())),
Err(e) => Err(BadDocumentId(format!("invalid uuid: {}", e))),
}
}
}

impl<'a> From<&'a str> for DocumentId {
fn from(s: &'a str) -> Self {
Self(s.to_string())
impl FromStr for DocumentId {
type Err = BadDocumentId;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match bs58::decode(s).with_check(None).into_vec() {
Ok(bytes) => Self::try_from(bytes),
Err(_) => {
// attempt to parse legacy UUID format
let uuid = uuid::Uuid::parse_str(s).map_err(|_| {
BadDocumentId(
"expected either a bs58-encoded document ID or a UUID".to_string(),
)
})?;
Ok(Self(uuid.into_bytes()))
}
}
}
}

impl std::fmt::Debug for DocumentId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let as_string = bs58::encode(&self.0).with_check().into_string();
write!(f, "{}", as_string)
}
}

impl Display for DocumentId {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let as_string = bs58::encode(&self.0).with_check().into_string();
write!(f, "{}", as_string)
}
}

Expand Down
16 changes: 14 additions & 2 deletions src/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ impl Message {
"targetId" => target_id = Some(decoder.str()?.into()),
"channelId" => {
if decoder.probe().str().is_ok() {
document_id = Some(decoder.str()?.into());
let doc_str = decoder.str()?;
if doc_str == "sync" {
// automerge-repo-network-websocket encodes the channel id as "sync"
// for join messages, we just ignore this
continue;
}
document_id = Some(
doc_str
.parse()
.map_err(|_| DecodeError::InvalidDocumentId)?,
);
}
}
"type" => type_name = Some(decoder.str()?),
Expand Down Expand Up @@ -77,7 +87,7 @@ impl Message {
encoder.str("targetId").unwrap();
encoder.str(to_repo_id.0.as_str()).unwrap();
encoder.str("channelId").unwrap();
encoder.str(document_id.0.as_str()).unwrap();
encoder.str(document_id.as_uuid_str().as_str()).unwrap();
encoder.str("message").unwrap();
encoder.tag(minicbor::data::Tag::Unassigned(64)).unwrap();
encoder.bytes(message.as_slice()).unwrap();
Expand Down Expand Up @@ -117,6 +127,8 @@ pub enum DecodeError {
MissingBroadcast,
#[error("unknown type {0}")]
UnknownType(String),
#[error("invalid document id")]
InvalidDocumentId,
}

impl From<minicbor::decode::Error> for DecodeError {
Expand Down
2 changes: 1 addition & 1 deletion src/repo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ impl RepoHandle {

/// Create a new document.
pub fn new_document(&self) -> DocHandle {
let document_id = DocumentId(Uuid::new_v4().to_string());
let document_id = DocumentId::random();
let document = new_document();
let doc_info = self.new_document_info(document, DocState::LocallyCreatedNotEdited);
let handle = DocHandle::new(
Expand Down
4 changes: 2 additions & 2 deletions tests/fs_storage/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn fs_store_crud() {
doc.put(automerge::ObjId::Root, "key", "value").unwrap();
let mut change1 = doc.get_last_local_change().unwrap().clone();

let doc_id = automerge_repo::DocumentId::from("somedoc");
let doc_id = automerge_repo::DocumentId::random();
store.append(&doc_id, change1.bytes().as_ref()).unwrap();
let result = store.get(&doc_id).unwrap().unwrap();
assert_eq!(&result, change1.bytes().as_ref());
Expand All @@ -55,7 +55,7 @@ fn fs_store_crud() {
assert_eq!(result, expected);

// check nonexistent docs don't upset anyone
let nonexistent_doc_id = automerge_repo::DocumentId::from("nonexistent");
let nonexistent_doc_id = automerge_repo::DocumentId::random();
let result = store.get(&nonexistent_doc_id).unwrap();
assert!(result.is_none());
}
Expand Down
4 changes: 2 additions & 2 deletions tests/network/document_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async fn test_loading_document_immediately_not_found() {
let repo_handle = repo.run();

// Spawn a task that awaits the requested doc handle.
let doc_id = DocumentId::from("doc1");
let doc_id = DocumentId::random();
assert!(repo_handle.load(doc_id).await.unwrap().is_none());
// Shut down the repo.
repo_handle.stop().unwrap();
Expand All @@ -142,7 +142,7 @@ async fn test_loading_document_not_found_async() {
let repo_handle = repo.run();

// Spawn a task that awaits the requested doc handle.
let doc_id = DocumentId::from("doc1");
let doc_id = DocumentId::random();
assert!(repo_handle.load(doc_id).await.unwrap().is_none());
// Shut down the repo.
tokio::task::spawn_blocking(|| {
Expand Down

0 comments on commit a393021

Please sign in to comment.