aboutsummaryrefslogtreecommitdiff
path: root/src/epub.rs
diff options
context:
space:
mode:
authorA Farzat <a@farzat.xyz>2026-02-13 19:50:52 +0300
committerA Farzat <a@farzat.xyz>2026-02-13 19:59:25 +0300
commitedf46d8965752725ef3305c9d01decc038457db0 (patch)
treea5fc876f65dc53f350a58d3816c41942dc3ce570 /src/epub.rs
parent57bc69a7f9af497526695e5a0bfbc60939f667e9 (diff)
downloadsafaribooks-rs-edf46d8965752725ef3305c9d01decc038457db0.tar.gz
safaribooks-rs-edf46d8965752725ef3305c9d01decc038457db0.zip
Create the basic directory structure for epub
The directory name needs to be sanitized such that it is valid on all major Operating systems and filesystems. This includes replacing invalid characters and limiting the filename to 255 bytes.
Diffstat (limited to 'src/epub.rs')
-rw-r--r--src/epub.rs91
1 files changed, 91 insertions, 0 deletions
diff --git a/src/epub.rs b/src/epub.rs
new file mode 100644
index 0000000..c984dde
--- /dev/null
+++ b/src/epub.rs
@@ -0,0 +1,91 @@
+use std::path::{Path, PathBuf};
+use unicode_normalization::UnicodeNormalization;
+
+pub struct EpubSkeleton {
+ /// Books/<book_title (book_id)>/
+ pub root: PathBuf,
+ pub meta_inf: PathBuf,
+ pub oebps: PathBuf,
+}
+
+impl EpubSkeleton {
+ pub fn plan(base_books_dir: &Path, title: &str, bookid: &str) -> Self {
+ // Maximum number of bytes in a filename.
+ const MAX_BYTES: usize = 255;
+ let clean_title = sanitize_filename(title);
+ let root_name = if !clean_title.is_empty() {
+ // Title length should take into account the bookid, space, and () characters.
+ let title_max_length = MAX_BYTES.saturating_sub(3 + bookid.len());
+ let truncated_title = truncate_utf8_by_byte(&clean_title, title_max_length);
+ format!("{} ({})", truncated_title, bookid)
+ } else {
+ format!("({})", bookid)
+ };
+ let root_dir = base_books_dir.join(root_name);
+ Self {
+ meta_inf: root_dir.join("META-INF"),
+ oebps: root_dir.join("OEBPS"),
+ root: root_dir,
+ }
+ }
+}
+
+/// Sanitize a filename component for cross‑platform compatibility.
+/// Applies sensible defaults:
+/// - Normalize to NFC
+/// - Replace illegal characters: <>:"/\\|?*
+/// - Remove control characters
+/// - Collapse whitespace
+/// - Trim whitespace
+fn sanitize_filename(input: &str) -> String {
+ // Normalize to NFC to ensure consistency - characters displayed the same are stored the same.
+ let mut s = input.nfc().collect::<String>();
+
+ // Replace illegal Windows/FAT characters + control chars
+ const ILLEGAL: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
+ let mut cleaned = String::with_capacity(s.len());
+
+ for ch in s.chars() {
+ if ch.is_control() || ILLEGAL.contains(&ch) {
+ cleaned.push('_');
+ } else {
+ cleaned.push(ch);
+ }
+ }
+ s = cleaned;
+
+ // Collapse whitespace
+ let mut cleaned = String::with_capacity(s.len());
+ let mut prev_was_whitespace = false;
+ for ch in s.chars() {
+ if ch.is_whitespace() {
+ if !prev_was_whitespace {
+ cleaned.push(' ');
+ prev_was_whitespace = true;
+ }
+ } else {
+ cleaned.push(ch);
+ prev_was_whitespace = false;
+ }
+ }
+ cleaned.trim().to_string()
+}
+
+/// Truncate a UTF‑8 string safely without splitting codepoints.
+fn truncate_utf8_by_byte(s: &str, max_bytes: usize) -> &str {
+ if s.len() <= max_bytes {
+ return s;
+ }
+
+ let mut end = max_bytes;
+ // Back up until we end with a non-continuation byte.
+ while end > 0 && !s.is_char_boundary(end) {
+ end -= 1;
+ }
+
+ if end == 0 {
+ return "";
+ }
+
+ &s[..end]
+}