diff options
| author | A Farzat <a@farzat.xyz> | 2026-03-07 21:50:56 +0300 |
|---|---|---|
| committer | A Farzat <a@farzat.xyz> | 2026-03-07 21:50:56 +0300 |
| commit | c181afaf936ba0d5cd19c38869422fc9351af2d1 (patch) | |
| tree | a8361f6c40bfa0f4d4883a89dc418ece5b1ce8fc /src | |
| parent | a1ffe295ade3026f8712c2608eacc285c595f08c (diff) | |
| download | oreilly-epub-c181afaf936ba0d5cd19c38869422fc9351af2d1.tar.gz oreilly-epub-c181afaf936ba0d5cd19c38869422fc9351af2d1.zip | |
Avoid using string buffers when modifying chapters
This saves on memory. Bytes are read from the file as needed, and
written to zip as soon as they are ready.
Diffstat (limited to 'src')
| -rw-r--r-- | src/epub.rs | 13 | ||||
| -rw-r--r-- | src/xml.rs | 62 |
2 files changed, 41 insertions, 34 deletions
diff --git a/src/epub.rs b/src/epub.rs index f9a5aac..c455671 100644 --- a/src/epub.rs +++ b/src/epub.rs @@ -8,7 +8,7 @@ use relative_path::{RelativePath, RelativePathBuf}; use reqwest::Client; use std::{ collections::HashMap, - io::{Read, Write}, + io::{BufReader, Read, Write}, path::Path, }; use tokio::{ @@ -107,21 +107,20 @@ pub fn create_epub_archive( for entry in file_entries { zip.start_file(&entry.full_path, options)?; let mut src_file = std::fs::File::open(entry.full_path.to_path(epub_root))?; - let mut buffer = Vec::new(); - src_file.read_to_end(&mut buffer)?; if let Some(chapter) = chapters.get(&entry.ourn) { let chapter_dir = entry.full_path.parent().unwrap_or(RelativePath::new("")); - let html = String::from_utf8(buffer)?; - let html = build_epub_chapter( + build_epub_chapter( epub_data, chapter, chapter_dir, - &html, + BufReader::new(src_file), &url_to_file, &url_path_to_local, + &mut zip, )?; - zip.write_all(html.as_bytes())?; } else { + let mut buffer = Vec::new(); + src_file.read_to_end(&mut buffer)?; zip.write_all(&buffer)?; } } @@ -4,7 +4,7 @@ use quick_xml::events::{BytesStart, Event}; use quick_xml::{Reader, Writer}; use relative_path::{RelativePath, RelativePathBuf}; use std::collections::HashMap; -use std::io::Cursor; +use std::io::{BufRead, Write}; use url::Url; use crate::models::{Chapter, EpubResponse, FileEntry}; @@ -31,25 +31,50 @@ fn is_html_void_tag(name: &[u8]) -> bool { } /// Processes the fragment and outputs a complete, EPUB-ready XHTML document. -pub fn build_epub_chapter( +pub fn build_epub_chapter<R: BufRead, W: Write>( epub_data: &EpubResponse, chapter: &Chapter, chapter_dir: &RelativePath, - fragment: &str, + fragment_input: R, url_to_file: &HashMap<&Url, &FileEntry>, url_path_to_local: &HashMap<&str, &RelativePathBuf>, -) -> Result<String> { + mut out: &mut W, +) -> Result<()> { + // EPUB XHTML Boilerplate wrapper. + // EPUBs strictly require the w3 and idpf namespaces to validate properly. + let wrapper_xhtml = xml!( + <?xml version="1.0" encoding="UTF-8"?> + <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" + lang={epub_data.language} xml:lang={epub_data.language}> + <head> + <title>{chapter.title}</title> + {|doc| make_stylesheet_links(doc, chapter, chapter_dir, url_to_file)} + </head> + <body> + </body> + </html> + ); + let wrapper_suffix = "</body></html>"; + let wrapper_prefix = wrapper_xhtml + .as_str() + .strip_suffix(wrapper_suffix) + .context("Wrapper must end with </body></html>")?; + + // Write wrapper prefix to output first. + out.write_all(wrapper_prefix.as_bytes())?; + // Setup the XML Reader and Writer. - let mut reader = Reader::from_str(fragment); + let mut reader = Reader::from_reader(fragment_input); // Preserve spacing for EPUB text formatting. reader.config_mut().trim_text(false); // Fragments could have unmatched tags - tell the parser not to panic if so. reader.config_mut().check_end_names = false; - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(&mut out); // Loop through the XML events and rewrite tags. + let mut buffer = Vec::new(); loop { - match reader.read_event() { + match reader.read_event_into(&mut buffer) { Ok(Event::Start(tag_data)) => { // If it is a void tag, convert it to a self-closing XML tag. let tag_type = if is_html_void_tag(tag_data.name().as_ref()) { @@ -83,27 +108,10 @@ pub fn build_epub_chapter( } } - // Extract the modified fragment - let processed_fragment = String::from_utf8(writer.into_inner().into_inner())?; - - // Wrap in EPUB XHTML Boilerplate. - // EPUBs strictly require the w3 and idpf namespaces to validate properly. - let wrapper_xhtml = xml!( - <?xml version="1.0" encoding="UTF-8"?> - <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" - lang={epub_data.language} xml:lang={epub_data.language}> - <head> - <title>{chapter.title}</title> - {|doc| make_stylesheet_links(doc, chapter, chapter_dir, url_to_file)} - </head> - <body> - </body> - </html> - ); - let wrapper_suffix = "</body></html>"; - let wrapper_prefix = wrapper_xhtml.as_str().strip_suffix(wrapper_suffix).context("Wrapper must end with </body></html>")?; + // Finish by flushing wrapper suffix to output. + out.write_all(wrapper_suffix.as_bytes())?; - Ok(format!("{}\n{}\n{}", wrapper_prefix, processed_fragment, wrapper_suffix)) + Ok(()) } /// Helper function add link elements for stylesheets to an xml Document. |
