diff --git a/Cargo.lock b/Cargo.lock index 3a99750..de3999b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -189,9 +189,19 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "futhorc" -version = "0.1.7" +version = "0.1.8" dependencies = [ "atom_syndication", "chrono", @@ -203,6 +213,7 @@ dependencies = [ "serde", "serde_yaml", "slug", + "url", ] [[package]] @@ -259,6 +270,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "itertools" version = "0.10.0" @@ -286,6 +308,12 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + [[package]] name = "memchr" version = "2.3.4" @@ -447,6 +475,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + [[package]] name = "unicase" version = "2.6.0" @@ -456,6 +499,21 @@ dependencies = [ "version_check", ] +[[package]] +name = "unicode-bidi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" + +[[package]] +name = "unicode-normalization" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-width" version = "0.1.8" @@ -468,6 +526,19 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", + "serde", +] + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index e1eeab0..f95c647 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ slug = "0.1.4" clap = "2.33.3" atom_syndication = "0.11.0" chrono = "0.4.19" +url = { version = "2.2.2", features = ["serde"] } [features] fail-on-warnings = [] diff --git a/src/build.rs b/src/build.rs index c526e3e..8beb7f0 100644 --- a/src/build.rs +++ b/src/build.rs @@ -8,6 +8,7 @@ use crate::config::Config; use crate::feed::{Error as FeedError, *}; use crate::post::{Error as ParseError, Parser as PostParser}; use crate::write::{Error as WriteError, *}; +use crate::url::{Url, UrlBuf}; use gtmpl::Template; use std::fmt; use std::fs::File; @@ -25,7 +26,8 @@ pub fn build_site(config: Config) -> Result<()> { ); // collect all posts - let posts = post_parser.parse_posts(&config.posts_source_directory)?; + let (posts, static_files) = + post_parser.parse_posts(&config.posts_source_directory)?; // Parse the template files. let index_template = parse_template(config.index_template.iter())?; @@ -48,13 +50,14 @@ pub fn build_site(config: Config) -> Result<()> { posts_template: &posts_template, index_template: &index_template, index_page_size: config.index_page_size, - index_base_url: &config.index_url, + index_base_url: Url::from_url(&config.index_url), index_output_directory: &config.index_output_directory, - home_page: &config.home_page, - static_url: &config.static_url, - atom_url: &config.atom_url, + home_page: Url::from_url(&config.home_page), + static_url: Url::from_url(&config.static_url), + atom_url: Url::from_url(&config.atom_url), }; writer.write_posts(&posts)?; + writer.write_static_files(&static_files)?; // copy static directory copy_dir( @@ -74,7 +77,7 @@ pub fn build_site(config: Config) -> Result<()> { title: config.title, id: config.home_page.to_string(), author: config.author, - home_page: config.home_page, + home_page: UrlBuf::from(&config.home_page), }, &posts, File::create(config.root_output_directory.join("feed.atom"))?, diff --git a/src/config.rs b/src/config.rs index 60587ac..935412d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,11 +1,11 @@ //! Contains the logic for collecting and consolidating the program's //! configuration. -use crate::url::UrlBuf; use serde::Deserialize; use std::fmt; use std::fs::File; use std::path::{Path, PathBuf}; +use url::Url; #[derive(Deserialize)] struct PageSize(usize); @@ -28,9 +28,8 @@ pub struct Author { #[derive(Deserialize)] struct Profile { pub name: String, - #[serde(default)] - pub site_root: UrlBuf, - pub home_page: UrlBuf, + pub site_root: Url, + pub home_page: String, pub author: Option, pub title: String, @@ -70,13 +69,13 @@ pub struct Config { /// The fully-qualified URL to the site's home page. This comes from the /// `futhorc.yaml` project file and is intended to be provided to the /// index and post templates, e.g., to create a site-header link. - pub home_page: UrlBuf, + pub home_page: Url, /// The fully-qualified base URL for the index pages. The main index pages /// will live at `{index_url}/index.html`, `{index_url}/1.html`, etc. The /// tag index pages will live at `{index_url}/{tag_name}/index.html`, /// `{index_url}/{tag_name}/1.html`, etc. - pub index_url: UrlBuf, + pub index_url: Url, /// The paths to index template files which will be concatenated and the /// result parsed into a [`gtmpl::Template`] object. @@ -96,7 +95,7 @@ pub struct Config { /// The fully-qualified base URL for post pages. E.g., for a post whose /// source file is located at `{posts_source_directory}/foo/bar.md`, the /// URL will be `{posts_url}/foo/bar.html`. - pub posts_url: UrlBuf, + pub posts_url: Url, /// The paths to post template files which will be concatenated and the /// result parsed into a [`gtmpl::Template`] object. @@ -110,7 +109,7 @@ pub struct Config { /// The fully-qualified base URL for static assets. E.g., a static asset /// whose source file is located at `{static_source_directory}/style.css` /// will have the URL, `{static_url}/style.css`. - pub static_url: UrlBuf, + pub static_url: Url, /// The absolute path to the source directory for static assets. pub static_source_directory: PathBuf, @@ -119,7 +118,7 @@ pub struct Config { pub static_output_directory: PathBuf, /// The fully-qualified URL for the atom feed. - pub atom_url: UrlBuf, + pub atom_url: Url, /// The absolute path to the atom output file. pub atom_output_path: PathBuf, @@ -188,10 +187,10 @@ impl Config { title: profile.title, author: profile.author, root_output_directory: output_directory.to_owned(), - home_page: profile.site_root.join(&profile.home_page), + home_page: profile.site_root.join(&profile.home_page)?, posts_source_directory: project_root.join("posts"), - index_url: (&profile.site_root).join("pages"), - posts_url: (&profile.site_root).join("posts"), + index_url: (&profile.site_root).join("pages/")?, + posts_url: (&profile.site_root).join("posts/")?, index_template: theme .index_template .iter() @@ -204,11 +203,11 @@ impl Config { .collect(), index_output_directory: output_directory.join("pages"), posts_output_directory: output_directory.join("posts"), - static_url: (&profile.site_root).join("static"), + static_url: (&profile.site_root).join("static/")?, static_source_directory: theme_dir.join("static"), static_output_directory: output_directory.join("static"), index_page_size: profile.index_page_size.0, - atom_url: profile.site_root.join("feed.atom"), + atom_url: profile.site_root.join("feed.atom")?, atom_output_path: output_directory.join("feed.atom"), }) } @@ -243,6 +242,9 @@ pub enum Error { /// Returned when there is a problem opening the project file. OpenProjectFile { path: PathBuf, err: std::io::Error }, + /// Returned when there is a problem parsing URLs. + UrlParse(url::ParseError), + /// Returned for other I/O errors. Io(std::io::Error), } @@ -275,6 +277,7 @@ impl fmt::Display for Error { Error::OpenProjectFile { path, err } => { write!(f, "Opening project file '{}': {}", path.display(), err) } + Error::UrlParse(err) => err.fmt(f), Error::Io(err) => err.fmt(f), } } @@ -290,11 +293,20 @@ impl std::error::Error for Error { Error::UnknownProfile(_) => None, Error::OpenThemeFile { path: _, err } => Some(err), Error::OpenProjectFile { path: _, err } => Some(err), + Error::UrlParse(err) => Some(err), Error::Io(err) => Some(err), } } } +impl From for Error { + /// Converts [`url::ParseError`] into [`Error`]. This allows us to use + /// the `?` operator on fallible config parsing operations. + fn from(err: url::ParseError) -> Error { + Error::UrlParse(err) + } +} + impl From for Error { /// Converts [`serde_yaml::Error`] into [`Error`]. This allows us to use /// the `?` operator on fallible config parsing operations. diff --git a/src/htmlrenderer.rs b/src/htmlrenderer.rs index 9d2b4ea..3844bcf 100644 --- a/src/htmlrenderer.rs +++ b/src/htmlrenderer.rs @@ -74,7 +74,7 @@ enum TableState { /// Renders markdown [`Event`]s into HTML. This is largely modeled after /// [`pulldown_cmark`]'s private [`HtmlWriter` /// struct](https://github.com/raphlinus/pulldown-cmark/blob/bf0a1a4938dbd2ec41c3add069b3d361d11731f4/src/html.rs#L36-L50). -struct HtmlRenderer { +pub struct HtmlRenderer { table_alignments: Vec, table_state: TableState, table_cell_index: usize, @@ -84,7 +84,7 @@ struct HtmlRenderer { } impl<'a> HtmlRenderer { - fn on_event( + pub fn on_event( &mut self, w: &mut W, event: Event<'a>, @@ -113,7 +113,7 @@ impl<'a> HtmlRenderer { } impl<'a> HtmlRenderer { - fn new() -> Self { + pub fn new() -> Self { HtmlRenderer { table_alignments: Vec::default(), table_state: TableState::Head, @@ -304,17 +304,18 @@ impl<'a> HtmlRenderer { /// Converts [`Event`]s into an HTML string much like /// `pulldown_cmark::html::push_html` except that this also supports footnote /// prefixes. See the module description for more details. -pub fn push_html<'a, I>( +pub fn push_html<'a, I, E>( out: &mut String, events: I, footnote_prefix: &str, -) -> io::Result<()> +) -> Result<(), E> where - I: Iterator>, + E: From, + I: Iterator, E>>, { let mut renderer = HtmlRenderer::with_footnote_prefix(footnote_prefix); for event in events { - renderer.on_event(out, event)?; + renderer.on_event(out, event?)?; } Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 5ee3ed0..53b97d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,7 @@ pub mod build; pub mod config; pub mod feed; pub mod htmlrenderer; +pub mod normalize_url; pub mod post; pub mod tag; pub mod url; diff --git a/src/normalize_url.rs b/src/normalize_url.rs new file mode 100644 index 0000000..199a8e1 --- /dev/null +++ b/src/normalize_url.rs @@ -0,0 +1,208 @@ +use crate::post::{HTML_EXTENSION, MARKDOWN_EXTENSION}; +use std::borrow::Cow; +use url::{ParseError, Url}; + +/// `posts_url` contains the URL to the `posts` directory, e.g., `https://example.org/posts/`. +/// `base` must be a path to a file inside of `posts_url`. `url` must be either an absolute path +/// (e.g., `https://example.org/posts/foo.md`) or a path relative to `base` (e.g., `foo.md`). +pub fn convert<'a>( + posts_url: &Url, + base: &str, + url: &'a str, +) -> Result { + println!("posts_url: {}", posts_url); + println!("base: {}", base); + println!("url: {}", url); + // `base_in_url` is the url referencing the `url` + let base_in_url = posts_url.join(base)?; + + // `absolute` is the absolute URL for `url`. + let absolute = match Url::parse(url) { + Ok(u) => u, + Err(ParseError::RelativeUrlWithoutBase) => base_in_url.join(url)?, + Err(e) => return Err(e), + }; + + // make the absolute URL relative to the posts directory URL as required + // by md2html (`base` could be in a post-bundle directory, and if we passed + // md2html a URL that was base-relative, then it may not properly detect + // post-bundles). + Ok(match posts_url.make_relative(&absolute) { + Some(posts_url_rel) => { + // Should never fail; we should always be able to join a + // post-relative URL to the posts directory URL. + posts_url.join(&md2html(&posts_url_rel)).unwrap().to_string() + } + + // if we get here, `absolute` cannot be made relative to `posts_url` + // and thus it is probably on some other host. We could probably return + // `url`, but by returning `absolute`, we can guarantee that the URL is + // normalized (e.g., if `url` is `http://foo.com/./bar`, we still want + // to return `http://foo.com/bar`). + None => absolute.to_string(), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + struct TestCase { + posts_url: Url, + base: &'static str, + url: &'static str, + wanted: &'static str, + } + + fn convert_test(test_case: &TestCase) -> Result<(), ParseError> { + let result = + convert(&test_case.posts_url, &test_case.base, test_case.url)?; + assert_eq!( + test_case.wanted, result, + "wanted \"{}\"; found \"{}\"", + test_case.wanted, result + ); + Ok(()) + } + + #[test] + fn test_convert_absolute_url_to_unbundled_post() -> Result<(), ParseError> + { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "base.html", + url: "https://example.org/posts/post.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_relative_url_to_unbundled_post() -> Result<(), ParseError> + { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "base.html", + url: "post.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_relative_url_to_unbundled_post_leading_dotslash( + ) -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "base.html", + url: "./post.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_absolute_url_to_bundled_post() -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: ".", + url: "https://example.org/posts/post/index.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_relative_url_to_bundled_post() -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "base.html", + url: "post/index.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_relative_url_to_bundled_post_leading_dotslash( + ) -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "base.html", + url: "./post/index.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_relative_url_to_unbundled_post_from_bundle( + ) -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "post/index.md", + url: "../foo.md", + wanted: "https://example.org/posts/foo.html", + }) + } + + #[test] + fn test_convert_relative_url_to_bundled_post_from_bundle( + ) -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "post/index.md", + url: "foo.jpg", + wanted: "https://example.org/posts/post/foo.jpg", + }) + } + + #[test] + fn test_convert_indirect_relative_url_to_bundle_from_bundle( + ) -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "post/index.md", + url: "../post/foo.jpg", + wanted: "https://example.org/posts/post/foo.jpg", + }) + } + + #[test] + fn test_convert_from_bundle_to_same_bundle() -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "post/", + url: "index.md", + wanted: "https://example.org/posts/post.html", + }) + } + + #[test] + fn test_convert_from_bundle_to_same_bundle_indirect( + ) -> Result<(), ParseError> { + convert_test(&TestCase { + posts_url: Url::parse("https://example.org/posts/")?, + base: "post/", + url: "../post/index.md", + wanted: "https://example.org/posts/post.html", + }) + } +} + +// `relative` must be a normalized URL path relative to the output posts URL. +// Returns `None` if `relative` isn't a markdown file at all. +fn md2html(relative: &str) -> Cow { + let path = relative.trim_start_matches('/'); + if let Some(out) = replace_suffix(path, "/index.md", HTML_EXTENSION) { + return Cow::Owned(out); + } + if let Some(out) = replace_suffix(path, MARKDOWN_EXTENSION, HTML_EXTENSION) + { + return Cow::Owned(out); + } + return Cow::Borrowed(relative); +} + +fn replace_suffix(input: &str, before: &str, after: &str) -> Option { + if input.ends_with(before) { + let mut out = String::from(&input[..input.len() - before.len()]); + out.push_str(after); + return Some(out); + } + return None; +} diff --git a/src/post.rs b/src/post.rs index b7e69d2..edf4cff 100644 --- a/src/post.rs +++ b/src/post.rs @@ -4,8 +4,9 @@ //! converted into template values. use crate::htmlrenderer::*; +use crate::normalize_url; use crate::tag::Tag; -use crate::url::*; +use crate::url::UrlBuf; use gtmpl::Value; use pulldown_cmark::{self, *}; use serde::Deserialize; @@ -14,6 +15,17 @@ use std::fmt; use std::fs::{read_dir, File}; use std::path::{Path, PathBuf}; +#[derive(Default, Deserialize, Clone)] +pub struct StaticFile { + /// The path to the source data. + #[serde(default)] + pub source: PathBuf, + + /// The output path where the final post file will be rendered. + #[serde(default)] + pub destination: PathBuf, +} + /// Represents a blog post. #[derive(Deserialize, Clone)] pub struct Post { @@ -111,12 +123,12 @@ pub struct Parser<'a> { /// `index_url` is the base URL for index pages. It's used to prefix tag /// page URLs (i.e., the URL for the first page of a tag is /// `{index_url}/{tag_name}/index.html`). - index_url: &'a Url, + index_url: &'a url::Url, /// `posts_url` is the base URL for post pages. It's used to prefix post /// page URLs (i.e., the URL for a post is /// `{posts_url}/{post_id}.html`). - posts_url: &'a Url, + posts_url: &'a url::Url, /// `posts_directory` is the directory in which post pages will be /// rendered. @@ -127,8 +139,8 @@ impl<'a> Parser<'a> { /// Constructs a new parser. See fields on [`Parser`] for argument /// descriptions. pub fn new( - index_url: &'a Url, - posts_url: &'a Url, + index_url: &'a url::Url, + posts_url: &'a url::Url, posts_directory: &'a Path, ) -> Parser<'a> { Parser { @@ -142,7 +154,9 @@ impl<'a> Parser<'a> { /// the path of the file relative to the `posts_source_directory` less the /// extension (e.g., the ID for a post whose source file is /// `{posts_source_directory}/foo/bar.md` is `foo/bar`). - fn parse_post(&self, id: &str, input: &str) -> Result { + fn parse_post(&self, post_path: &str, id: &str, file: &mut File) -> Result { + use std::io::Read; + fn frontmatter_indices(input: &str) -> Result<(usize, usize, usize)> { const FENCE: &str = "---"; if !input.starts_with(FENCE) { @@ -158,18 +172,29 @@ impl<'a> Parser<'a> { } } - let (yaml_start, yaml_stop, body_start) = frontmatter_indices(input)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + let (yaml_start, yaml_stop, body_start) = + frontmatter_indices(&contents)?; let mut post: Post = - serde_yaml::from_str(&input[yaml_start..yaml_stop])?; + serde_yaml::from_str(&contents[yaml_start..yaml_stop])?; let file_name = format!("{}.html", id); - post.url = self.posts_url.join(&file_name); + post.url = UrlBuf::from(self.posts_url.join(&file_name)?.as_str()); post.file_path = self.posts_directory.join(&file_name); post.tags = post .tags .iter() .map(|t| Tag { name: t.name.clone(), - url: self.index_url.join(&t.name).join("index.html"), + url: UrlBuf::from( + self.index_url + .join(&t.name) + .unwrap() + .join("index.html") + .unwrap() + .to_string(), + ), }) .collect(); let mut options = Options::empty(); @@ -178,24 +203,81 @@ impl<'a> Parser<'a> { options.insert(Options::ENABLE_STRIKETHROUGH); options.insert(Options::ENABLE_TABLES); options.insert(Options::ENABLE_TASKLISTS); - let parser = - pulldown_cmark::Parser::new_ext(&input[body_start..], options); - - // The headings in the post itself need to be deprecated twice to be - // subordinate to both the site title (h1) and the post title (h2). So - // `#` becomes h3 instead of h1. We do this by intercepting heading - // tags and returning the tag size + 2. - let fixed_subheading_sizes = parser.map(|ev| match ev { - Event::Start(tag) => Event::Start(match tag { - pulldown_cmark::Tag::Heading(s) => { - pulldown_cmark::Tag::Heading(s + 2) - } - _ => tag, - }), - _ => ev, - }); - push_html(&mut post.body, fixed_subheading_sizes, post.url.as_str())?; + //TODO: look into storing self.posts_url as a `url::Url` + let posts_url = url::Url::parse(self.posts_url.as_str())?; + let events = + pulldown_cmark::Parser::new_ext(&contents[body_start..], options) + // The headings in the post itself need to be deprecated twice to + // be subordinate to both the site title (h1) and the post title + // (h2). So `#` becomes h3 instead of h1. We do this by + // intercepting heading tags and returning the tag size + 2. + .map(|ev| match ev { + Event::Start(tag) => Event::Start(match tag { + pulldown_cmark::Tag::Heading(s) => { + pulldown_cmark::Tag::Heading(s + 2) + } + _ => tag, + }), + _ => ev, + }) + .map(|ev| { + Result::::Ok(match ev { + Event::Start(tag) => Event::Start(match tag { + pulldown_cmark::Tag::Link( + LinkType::Inline, + url, + title, + ) => pulldown_cmark::Tag::Link( + LinkType::Inline, + CowStr::Boxed( + normalize_url::convert( + &posts_url, + post_path, + &url, + )? + .into_boxed_str(), + ), + title, + ), + _ => tag, + }), + _ => ev, + }) + }); + + let mut renderer = HtmlRenderer::new(); + for ev in events { + renderer.on_event(&mut post.body, ev?)?; + } + Ok(post) + } + + fn parse_post_directory( + &self, + id: &str, + dir: &Path, + static_files: &mut Vec, + ) -> Result { + let posts_relative_path: PathBuf = PathBuf::from(dir.file_name().unwrap()).join("index.md"); + let post = self.parse_post( + &posts_relative_path.to_string_lossy(), + id, + &mut File::open(&dir.join("index.md"))?, + )?; + for result in read_dir(dir)? { + let entry = result?; + let file_name = entry.file_name(); + if file_name != "index.md" { + static_files.push(StaticFile { + source: entry.path(), + destination: self + .posts_directory + .join(id) + .join(&file_name), + }); + } + } Ok(post) } @@ -220,28 +302,47 @@ impl<'a> Parser<'a> { /// /// World /// ``` - pub fn parse_posts(&self, source_directory: &Path) -> Result> { - use std::io::Read; - const MARKDOWN_EXTENSION: &str = ".md"; - + pub fn parse_posts( + &self, + source_directory: &Path, + ) -> Result<(Vec, Vec)> { let mut posts = Vec::new(); + let mut static_files = Vec::new(); for result in read_dir(source_directory)? { let entry = result?; let os_file_name = entry.file_name(); let file_name = os_file_name.to_string_lossy(); + let id = file_name.trim_end_matches(MARKDOWN_EXTENSION); + if file_name.ends_with(MARKDOWN_EXTENSION) { - let base_name = file_name.trim_end_matches(MARKDOWN_EXTENSION); - let mut contents = String::new(); - File::open(entry.path())?.read_to_string(&mut contents)?; - posts.push(self.parse_post(base_name, &contents)?); + posts.push( + self.parse_post( + &file_name, + id, + &mut File::open(entry.path())?, + )?, + ); + } else if entry.file_type()?.is_dir() { + // if the entry is a directory containing an index.md file, + // parse a post from the directory + if entry.path().join("index.md").is_file() { + posts.push(self.parse_post_directory( + id, + &entry.path(), + &mut static_files, + )?); + } } } posts.sort_by(|a, b| b.date.cmp(&a.date)); - Ok(posts) + Ok((posts, static_files)) } } +pub const MARKDOWN_EXTENSION: &str = ".md"; +pub const HTML_EXTENSION: &str = ".html"; + /// Represents the result of a [`Post`]-parse operation. pub type Result = std::result::Result; @@ -260,6 +361,9 @@ pub enum Error { /// Returned when there was an error parsing the frontmatter as YAML. DeserializeYaml(serde_yaml::Error), + /// Returned when there was an error parsing or joining URLs. + UrlParse(url::ParseError), + /// Returned for other I/O errors. Io(std::io::Error), } @@ -275,6 +379,7 @@ impl fmt::Display for Error { write!(f, "Missing clossing `---`") } Error::DeserializeYaml(err) => err.fmt(f), + Error::UrlParse(err) => err.fmt(f), Error::Io(err) => err.fmt(f), } } @@ -287,11 +392,20 @@ impl std::error::Error for Error { Error::FrontmatterMissingStartFence => None, Error::FrontmatterMissingEndFence => None, Error::DeserializeYaml(err) => Some(err), + Error::UrlParse(err) => Some(err), Error::Io(err) => Some(err), } } } +impl From for Error { + /// Converts a [`url::ParseError`] into an [`Error`]. It allows us to use + /// the `?` operator for [`url`] parse and join functions. + fn from(err: url::ParseError) -> Error { + Error::UrlParse(err) + } +} + impl From for Error { /// Converts a [`serde_yaml::Error`] into an [`Error`]. It allows us to use /// the `?` operator for [`serde_yaml`] deserialization functions. diff --git a/src/url.rs b/src/url.rs index 0794d08..c3d9d60 100644 --- a/src/url.rs +++ b/src/url.rs @@ -38,6 +38,16 @@ impl Url { pub fn as_str(&self) -> &str { &self.0 } + + pub fn from_url(url: &url::Url) -> &Url { + Url::new(url.as_str()) + } +} + +impl From<&url::Url> for UrlBuf { + fn from(url: &url::Url) -> UrlBuf { + UrlBuf::from(url.to_string()) + } } impl ToOwned for Url { diff --git a/src/write.rs b/src/write.rs index 3f7196c..4654bec 100644 --- a/src/write.rs +++ b/src/write.rs @@ -4,6 +4,7 @@ use crate::post::*; use crate::url::{Url, UrlBuf}; use gtmpl::{Template, Value}; +use std::collections::HashSet; use std::fmt; use std::io; use std::path::{Path, PathBuf}; @@ -78,8 +79,7 @@ impl Writer<'_> { /// Takes a slice of [`Post`], indexes it by tag, and writes post and index /// pages to disk. pub fn write_posts(&self, posts: &[Post]) -> Result<()> { - use std::collections::HashSet; - let mut seen_dirs: HashSet = HashSet::new(); + let mut seen_dirs = HashSet::new(); pages( posts, self.index_base_url, @@ -89,13 +89,33 @@ impl Writer<'_> { self.index_template, ) .try_for_each(|page| { - let dir = page.file_path.parent().unwrap(); // there should always be a dir - if seen_dirs.insert(dir.to_owned()) { - std::fs::create_dir_all(dir)?; - } + make_parent_dir(&mut seen_dirs, &page.file_path)?; self.write_page(&page) }) } + + pub fn write_static_files( + &self, + static_files: &[StaticFile], + ) -> Result<()> { + let mut seen_dirs = HashSet::new(); + for static_file in static_files { + make_parent_dir(&mut seen_dirs, &static_file.destination)?; + std::fs::hard_link(&static_file.source, &static_file.destination)?; + } + Ok(()) + } +} + +fn make_parent_dir<'a>( + seen_dirs: &mut HashSet, + path: &Path, +) -> Result<()> { + let parent = path.parent().unwrap(); + if seen_dirs.insert(parent.to_owned()) { + std::fs::create_dir_all(&parent)?; + } + Ok(()) } /// An object representing an output HTML file. A [`Page`] can be converted to