diff options
Diffstat (limited to 'src/epub.rs')
| -rw-r--r-- | src/epub.rs | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/src/epub.rs b/src/epub.rs new file mode 100644 index 0000000..c984dde --- /dev/null +++ b/src/epub.rs @@ -0,0 +1,91 @@ +use std::path::{Path, PathBuf}; +use unicode_normalization::UnicodeNormalization; + +pub struct EpubSkeleton { + /// Books/<book_title (book_id)>/ + pub root: PathBuf, + pub meta_inf: PathBuf, + pub oebps: PathBuf, +} + +impl EpubSkeleton { + pub fn plan(base_books_dir: &Path, title: &str, bookid: &str) -> Self { + // Maximum number of bytes in a filename. + const MAX_BYTES: usize = 255; + let clean_title = sanitize_filename(title); + let root_name = if !clean_title.is_empty() { + // Title length should take into account the bookid, space, and () characters. + let title_max_length = MAX_BYTES.saturating_sub(3 + bookid.len()); + let truncated_title = truncate_utf8_by_byte(&clean_title, title_max_length); + format!("{} ({})", truncated_title, bookid) + } else { + format!("({})", bookid) + }; + let root_dir = base_books_dir.join(root_name); + Self { + meta_inf: root_dir.join("META-INF"), + oebps: root_dir.join("OEBPS"), + root: root_dir, + } + } +} + +/// Sanitize a filename component for cross‑platform compatibility. +/// Applies sensible defaults: +/// - Normalize to NFC +/// - Replace illegal characters: <>:"/\\|?* +/// - Remove control characters +/// - Collapse whitespace +/// - Trim whitespace +fn sanitize_filename(input: &str) -> String { + // Normalize to NFC to ensure consistency - characters displayed the same are stored the same. + let mut s = input.nfc().collect::<String>(); + + // Replace illegal Windows/FAT characters + control chars + const ILLEGAL: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*']; + let mut cleaned = String::with_capacity(s.len()); + + for ch in s.chars() { + if ch.is_control() || ILLEGAL.contains(&ch) { + cleaned.push('_'); + } else { + cleaned.push(ch); + } + } + s = cleaned; + + // Collapse whitespace + let mut cleaned = String::with_capacity(s.len()); + let mut prev_was_whitespace = false; + for ch in s.chars() { + if ch.is_whitespace() { + if !prev_was_whitespace { + cleaned.push(' '); + prev_was_whitespace = true; + } + } else { + cleaned.push(ch); + prev_was_whitespace = false; + } + } + cleaned.trim().to_string() +} + +/// Truncate a UTF‑8 string safely without splitting codepoints. +fn truncate_utf8_by_byte(s: &str, max_bytes: usize) -> &str { + if s.len() <= max_bytes { + return s; + } + + let mut end = max_bytes; + // Back up until we end with a non-continuation byte. + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + + if end == 0 { + return ""; + } + + &s[..end] +} |
