1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
|
use anyhow::{Context, Result};
use std::fs;
use std::path::{Path, PathBuf};
use unicode_normalization::UnicodeNormalization;
pub struct EpubSkeleton {
/// Books/<book_title (book_id)>/
pub root: PathBuf,
pub meta_inf: PathBuf,
pub oebps: PathBuf,
}
impl EpubSkeleton {
/// Plan the output directory structure using the sanitized title + bookid.
pub fn plan(base_books_dir: &Path, title: &str, bookid: &str) -> Self {
// Maximum number of bytes in a filename.
const MAX_BYTES: usize = 255;
let clean_title = sanitize_filename(title);
let root_name = if !clean_title.is_empty() {
// Title length should take into account the bookid, space, and () characters.
let title_max_length = MAX_BYTES.saturating_sub(3 + bookid.len());
let truncated_title = truncate_utf8_by_byte(&clean_title, title_max_length);
format!("{} ({})", truncated_title, bookid)
} else {
format!("({})", bookid)
};
let root_dir = base_books_dir.join(root_name);
Self {
meta_inf: root_dir.join("META-INF"),
oebps: root_dir.join("OEBPS"),
root: root_dir,
}
}
/// Initialize EpubSkeleton by creating directories and required files.
pub fn initialize(&self) -> Result<()> {
self.create_dirs()?;
self.write_mimetype()?;
self.write_container_xml()?;
Ok(())
}
/// Create the directories defined in the struct.
pub fn create_dirs(&self) -> Result<()> {
fs::create_dir_all(&self.oebps)
.with_context(|| format!("Creating directory {}", self.oebps.display()))?;
fs::create_dir_all(&self.meta_inf)
.with_context(|| format!("Creating directory {}", self.meta_inf.display()))?;
Ok(())
}
/// Write META-INF/container.xml pointing to OEBPS/content.opf.
pub fn write_container_xml(&self) -> Result<()> {
let path = self.meta_inf.join("container.xml");
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
"#;
fs::write(&path, xml).with_context(|| format!("Writing file {}", path.display()))?;
Ok(())
}
/// Write the plaintext "mimetype" file at the root (no newline).
pub fn write_mimetype(&self) -> Result<()> {
let path = self.root.join("mimetype");
// EXACT bytes required by OCF; do not add '\n'.
fs::write(&path, b"application/epub+zip")
.with_context(|| format!("Writing file {}", path.display()))?;
Ok(())
}
}
/// Sanitize a filename component for cross‑platform compatibility.
/// Applies sensible defaults:
/// - Normalize to NFC
/// - Replace illegal characters: <>:"/\\|?*
/// - Remove control characters
/// - Collapse whitespace
/// - Trim whitespace
fn sanitize_filename(input: &str) -> String {
// Normalize to NFC to ensure consistency - characters displayed the same are stored the same.
let mut s = input.nfc().collect::<String>();
// Replace illegal Windows/FAT characters + control chars
const ILLEGAL: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
let mut cleaned = String::with_capacity(s.len());
for ch in s.chars() {
if ch.is_control() || ILLEGAL.contains(&ch) {
cleaned.push('_');
} else {
cleaned.push(ch);
}
}
s = cleaned;
// Collapse whitespace
let mut cleaned = String::with_capacity(s.len());
let mut prev_was_whitespace = false;
for ch in s.chars() {
if ch.is_whitespace() {
if !prev_was_whitespace {
cleaned.push(' ');
prev_was_whitespace = true;
}
} else {
cleaned.push(ch);
prev_was_whitespace = false;
}
}
cleaned.trim().to_string()
}
/// Truncate a UTF‑8 string safely without splitting codepoints.
fn truncate_utf8_by_byte(s: &str, max_bytes: usize) -> &str {
if s.len() <= max_bytes {
return s;
}
let mut end = max_bytes;
// Back up until we end with a non-continuation byte.
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
if end == 0 {
return "";
}
&s[..end]
}
#[cfg(test)]
mod tests {
use super::EpubSkeleton;
use tempfile::TempDir;
use quick_xml::{Reader, events::Event};
use std::fs;
/// Make a temp directory with a predictable prefix.
fn temp(label: &str) -> TempDir {
tempfile::Builder::new()
.prefix(&format!("safaribooks-rs-{}", label))
.tempdir()
.unwrap_or_else(|_| panic!("Create tempdir with label: {}", label))
}
#[test]
fn initialize_skeleton() {
// GIVEN
let tmp = temp("initialize");
let base = tmp.path();
let skel = EpubSkeleton::plan(base, "A Title", "1234567890123");
// WHEN
skel.initialize().expect("Initialize skeleton");
// THEN: directory structure exists
assert!(skel.root.exists(), "Root dir missing: {}", skel.root.display());
assert!(skel.oebps.exists(), "OEBPS dir missing: {}", skel.oebps.display());
assert!(skel.meta_inf.exists(), "META-INF dir missing: {}", skel.meta_inf.display());
}
#[test]
fn mimetype_exact() {
// GIVEN
let tmp = temp("mimetype");
let base = tmp.path();
let skel = EpubSkeleton::plan(base, "A Title", "1234567890123");
// WHEN
skel.create_dirs().expect("Create skeleton dirs");
skel.write_mimetype().expect("Write mimetype");
// THEN: file exists
let mimetype = skel.root.join("mimetype");
assert!(mimetype.exists(), "Mimetype file not found");
// mimetype has *exact* bytes with *no* trailing newline.
let bytes = fs::read(&mimetype).expect("Read mimetype");
assert_eq!(
bytes.as_slice(),
b"application/epub+zip",
"mimetype must be exactly 'application/epub+zip' with NO trailing newline"
);
}
#[test]
fn container_xml_well_formed() {
// GIVEN
let tmp = temp("container");
let base = tmp.path();
let skel = EpubSkeleton::plan(base, "Another Title", "9876543210");
// WHEN
skel.create_dirs().expect("Create skeleton dirs");
skel.write_container_xml().expect("Write container.xml");
// THEN: file exists
let container = skel.meta_inf.join("container.xml");
assert!(container.exists(), "META-INF/container.xml not found");
// Parse with quick-xml to ensure it is well-formed and to inspect elements.
let xml = fs::read_to_string(&container).expect("Read container.xml");
let mut reader = Reader::from_str(xml.trim());
// Walk events; ensure <container> and expected <rootfile> are present with correct attributes.
let mut saw_container = false;
let mut saw_rootfiles = false;
let mut saw_rootfile_ok = false;
let mut buf = Vec::<u8>::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e) | Event::Empty(e)) => {
let name_tmp = e.name();
let name = name_tmp.as_ref();
if name == b"container" {
saw_container = true;
} else if name == b"rootfiles" {
saw_rootfiles = true;
} else if name == b"rootfile" {
// Check attributes on rootfile
let mut full_path_ok = false;
let mut media_type_ok = false;
for a in e.attributes().flatten() {
if a.key.as_ref() == b"full-path" && a.value.as_ref() == b"OEBPS/content.opf" {
full_path_ok = true;
}
else if a.key.as_ref() == b"media-type"
&& a.value.as_ref() == b"application/oebps-package+xml"
{
media_type_ok = true;
}
}
if full_path_ok && media_type_ok {
saw_rootfile_ok = true;
}
}
}
Ok(Event::Eof) => break,
Ok(_) => {}
Err(e) => panic!("XML parse error at position {}: {e}", reader.buffer_position()),
}
buf.clear();
}
assert!(saw_container, "container.xml is missing <container> root element");
assert!(saw_rootfiles, "container.xml is missing <rootfiles> element");
assert!(
saw_rootfile_ok,
"container.xml <rootfile> must have full-path='OEBPS/content.opf' \
and media-type='application/oebps-package+xml'"
);
}
}
|