From c181afaf936ba0d5cd19c38869422fc9351af2d1 Mon Sep 17 00:00:00 2001 From: A Farzat Date: Sat, 7 Mar 2026 21:50:56 +0300 Subject: Avoid using string buffers when modifying chapters This saves on memory. Bytes are read from the file as needed, and written to zip as soon as they are ready. --- src/xml.rs | 62 +++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 27 deletions(-) (limited to 'src/xml.rs') diff --git a/src/xml.rs b/src/xml.rs index 6944cb9..058cb1b 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -4,7 +4,7 @@ use quick_xml::events::{BytesStart, Event}; use quick_xml::{Reader, Writer}; use relative_path::{RelativePath, RelativePathBuf}; use std::collections::HashMap; -use std::io::Cursor; +use std::io::{BufRead, Write}; use url::Url; use crate::models::{Chapter, EpubResponse, FileEntry}; @@ -31,25 +31,50 @@ fn is_html_void_tag(name: &[u8]) -> bool { } /// Processes the fragment and outputs a complete, EPUB-ready XHTML document. -pub fn build_epub_chapter( +pub fn build_epub_chapter( epub_data: &EpubResponse, chapter: &Chapter, chapter_dir: &RelativePath, - fragment: &str, + fragment_input: R, url_to_file: &HashMap<&Url, &FileEntry>, url_path_to_local: &HashMap<&str, &RelativePathBuf>, -) -> Result { + mut out: &mut W, +) -> Result<()> { + // EPUB XHTML Boilerplate wrapper. + // EPUBs strictly require the w3 and idpf namespaces to validate properly. + let wrapper_xhtml = xml!( + + + + {chapter.title} + {|doc| make_stylesheet_links(doc, chapter, chapter_dir, url_to_file)} + + + + + ); + let wrapper_suffix = ""; + let wrapper_prefix = wrapper_xhtml + .as_str() + .strip_suffix(wrapper_suffix) + .context("Wrapper must end with ")?; + + // Write wrapper prefix to output first. + out.write_all(wrapper_prefix.as_bytes())?; + // Setup the XML Reader and Writer. - let mut reader = Reader::from_str(fragment); + let mut reader = Reader::from_reader(fragment_input); // Preserve spacing for EPUB text formatting. reader.config_mut().trim_text(false); // Fragments could have unmatched tags - tell the parser not to panic if so. reader.config_mut().check_end_names = false; - let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut writer = Writer::new(&mut out); // Loop through the XML events and rewrite tags. + let mut buffer = Vec::new(); loop { - match reader.read_event() { + match reader.read_event_into(&mut buffer) { Ok(Event::Start(tag_data)) => { // If it is a void tag, convert it to a self-closing XML tag. let tag_type = if is_html_void_tag(tag_data.name().as_ref()) { @@ -83,27 +108,10 @@ pub fn build_epub_chapter( } } - // Extract the modified fragment - let processed_fragment = String::from_utf8(writer.into_inner().into_inner())?; - - // Wrap in EPUB XHTML Boilerplate. - // EPUBs strictly require the w3 and idpf namespaces to validate properly. - let wrapper_xhtml = xml!( - - - - {chapter.title} - {|doc| make_stylesheet_links(doc, chapter, chapter_dir, url_to_file)} - - - - - ); - let wrapper_suffix = ""; - let wrapper_prefix = wrapper_xhtml.as_str().strip_suffix(wrapper_suffix).context("Wrapper must end with ")?; + // Finish by flushing wrapper suffix to output. + out.write_all(wrapper_suffix.as_bytes())?; - Ok(format!("{}\n{}\n{}", wrapper_prefix, processed_fragment, wrapper_suffix)) + Ok(()) } /// Helper function add link elements for stylesheets to an xml Document. -- cgit v1.2.3-70-g09d2