4 files changed, 103 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 1b1a448..e971da4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1081,6 +1081,7 @@ dependencies = [
  "tokio",
  "tracing",
  "tracing-subscriber",
+ "unicode-normalization",
 ]
 
 [[package]]
@@ -1506,6 +1507,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 
 [[package]]
+name = "unicode-normalization"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
 name = "untrusted"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 70b1e11..efd6672 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,3 +13,4 @@ serde_json = "1.0"
 tokio = { version = "1.49", features = ["rt-multi-thread", "macros"] }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
+unicode-normalization = "0.1"
diff --git a/src/epub.rs b/src/epub.rs
new file mode 100644
index 0000000..c984dde
--- /dev/null
+++ b/src/epub.rs
@@ -0,0 +1,91 @@
+use std::path::{Path, PathBuf};
+use unicode_normalization::UnicodeNormalization;
+
+pub struct EpubSkeleton {
+    /// Books/<book_title (book_id)>/
+    pub root: PathBuf,
+    pub meta_inf: PathBuf,
+    pub oebps: PathBuf,
+}
+
+impl EpubSkeleton {
+    pub fn plan(base_books_dir: &Path, title: &str, bookid: &str) -> Self {
+        // Maximum number of bytes in a filename.
+        const MAX_BYTES: usize = 255;
+        let clean_title = sanitize_filename(title);
+        let root_name = if !clean_title.is_empty() {
+            // Title length should take into account the bookid, space, and () characters.
+            let title_max_length = MAX_BYTES.saturating_sub(3 + bookid.len());
+            let truncated_title = truncate_utf8_by_byte(&clean_title, title_max_length);
+            format!("{} ({})", truncated_title, bookid)
+        } else {
+            format!("({})", bookid)
+        };
+        let root_dir = base_books_dir.join(root_name);
+        Self {
+            meta_inf: root_dir.join("META-INF"),
+            oebps: root_dir.join("OEBPS"),
+            root: root_dir,
+        }
+    }
+}
+
+/// Sanitize a filename component for cross‑platform compatibility.
+/// Applies sensible defaults:
+/// - Normalize to NFC
+/// - Replace illegal characters: <>:"/\\|?*
+/// - Remove control characters
+/// - Collapse whitespace
+/// - Trim whitespace
+fn sanitize_filename(input: &str) -> String {
+    // Normalize to NFC to ensure consistency - characters displayed the same are stored the same.
+    let mut s = input.nfc().collect::<String>();
+
+    // Replace illegal Windows/FAT characters + control chars
+    const ILLEGAL: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
+    let mut cleaned = String::with_capacity(s.len());
+
+    for ch in s.chars() {
+        if ch.is_control() || ILLEGAL.contains(&ch) {
+            cleaned.push('_');
+        } else {
+            cleaned.push(ch);
+        }
+    }
+    s = cleaned;
+
+    // Collapse whitespace
+    let mut cleaned = String::with_capacity(s.len());
+    let mut prev_was_whitespace = false;
+    for ch in s.chars() {
+        if ch.is_whitespace() {
+            if !prev_was_whitespace {
+                cleaned.push(' ');
+                prev_was_whitespace = true;
+            }
+        } else {
+            cleaned.push(ch);
+            prev_was_whitespace = false;
+        }
+    }
+    cleaned.trim().to_string()
+}
+
+/// Truncate a UTF‑8 string safely without splitting codepoints.
+fn truncate_utf8_by_byte(s: &str, max_bytes: usize) -> &str {
+    if s.len() <= max_bytes {
+        return s;
+    }
+
+    let mut end = max_bytes;
+    // Back up until we end with a non-continuation byte.
+    while end > 0 && !s.is_char_boundary(end) {
+        end -= 1;
+    }
+
+    if end == 0 {
+        return "";
+    }
+
+    &s[..end]
+}
diff --git a/src/main.rs b/src/main.rs
index d393a55..b963850 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,6 +2,7 @@ mod cli;
 mod config;
 mod cookies;
 mod display;
+mod epub;
 mod http_client;
 mod orly;