diff options
| author | A Farzat <a@farzat.xyz> | 2026-03-04 21:14:32 +0300 |
|---|---|---|
| committer | A Farzat <a@farzat.xyz> | 2026-03-04 21:14:32 +0300 |
| commit | a31db5a182c7d45ca6720a99c409b5c35fad69bb (patch) | |
| tree | c590e42fb84847fd4639124a28174fb9306abb7b /src | |
| parent | d34403eddc75188b1657a7076442b48da76bb727 (diff) | |
| download | oreilly-epub-a31db5a182c7d45ca6720a99c409b5c35fad69bb.tar.gz oreilly-epub-a31db5a182c7d45ca6720a99c409b5c35fad69bb.zip | |
Convert URLs pointing upstream to local relative
EPUB standard only recognizes relative paths. Fixes image rendering.
Diffstat (limited to 'src')
| -rw-r--r-- | src/epub.rs | 36 | ||||
| -rw-r--r-- | src/main.rs | 5 |
2 files changed, 37 insertions, 4 deletions
diff --git a/src/epub.rs b/src/epub.rs index 7eb47f3..4fc4915 100644 --- a/src/epub.rs +++ b/src/epub.rs @@ -1,7 +1,9 @@ -use crate::models::FileEntry; +use crate::models::{Chapter, FileEntry}; use anyhow::{Context, Result}; -use reqwest::Client; +use relative_path::RelativePath; +use reqwest::{Client, Url}; use std::{ + collections::HashMap, io::{Read, Write}, path::Path, }; @@ -66,6 +68,7 @@ pub fn create_epub_archive( epub_root: &Path, output_epub: &Path, file_entries: &[FileEntry], + chapters: &HashMap<String, Chapter>, ) -> Result<()> { let out_file = std::fs::File::create(output_epub)?; let mut zip = ZipWriter::new(out_file); @@ -83,6 +86,12 @@ pub fn create_epub_archive( .context("No OPF file with the correct MIME type was found.")?; write_container_xml_to_zip(&mut zip, &opf_entry.full_path)?; + // Prepare url path to local path mapping to clean xhtml files from external dependencies. + let url_to_local: HashMap<String, String> = file_entries + .iter() + .map(url_path_to_local) + .collect::<Result<HashMap<_, _>>>()?; + // Add the rest of the files according to file_entries. let options: FileOptions<()> = FileOptions::default().compression_method(CompressionMethod::Deflated); @@ -91,10 +100,31 @@ pub fn create_epub_archive( let mut src_file = std::fs::File::open(epub_root.join(&entry.full_path))?; let mut buffer = Vec::new(); src_file.read_to_end(&mut buffer)?; - zip.write_all(&buffer)?; + if chapters.contains_key(&entry.ourn) { + let mut html = String::from_utf8(buffer)?; + let chapter_dir = RelativePath::new(&entry.full_path) + .parent() + .unwrap_or(RelativePath::new("")); + for (url_path, local_path) in &url_to_local { + let rel_path = chapter_dir + .to_relative_path_buf() + .relative(RelativePath::new(local_path)); + html = html.replace(url_path, rel_path.as_str()); + } + zip.write_all(html.as_bytes())?; + } else { + zip.write_all(&buffer)?; + } } zip.finish()?; Ok(()) } + +/// Helper function. Maps FileEntry to (url path, full_path) pair. +fn url_path_to_local(entry: &FileEntry) -> Result<(String, String)> { + let url = Url::parse(&entry.url).with_context(|| format!("Could not parse: {}", entry.url))?; + let url_path = url.path().to_string(); + Ok((url_path, entry.full_path.clone())) +} diff --git a/src/main.rs b/src/main.rs index 3c5956e..80f81e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod epub; mod http_client; mod models; +use std::collections::HashMap; use std::path::Path; use crate::epub::{create_epub_archive, download_all_files}; @@ -107,6 +108,8 @@ async fn main() -> Result<()> { println!("Fetching book structure..."); let chapters: Vec<Chapter> = fetch_all_pages(&client, epub_data.chapters.clone()).await?; + let chapters: HashMap<String, Chapter> = + chapters.into_iter().map(|c| (c.ourn.clone(), c)).collect(); let file_entries: Vec<FileEntry> = fetch_all_pages(&client, epub_data.files.clone()).await?; let spine_items: Vec<SpineItem> = fetch_all_pages(&client, epub_data.spine.clone()).await?; let toc_vec: Vec<TocNode> = fetch_direct_array(&client, &epub_data.table_of_contents).await?; @@ -116,7 +119,7 @@ async fn main() -> Result<()> { download_all_files(&client, &file_entries, dest_root).await?; let epub_path = format!("Books/{0}/{0}.epub", args.bookid); let epub_path = Path::new(&epub_path); - create_epub_archive(dest_root, &epub_path, &file_entries)?; + create_epub_archive(dest_root, epub_path, &file_entries, &chapters)?; Ok(()) } |
