use crate::markup5ever::tendril::TendrilSink; use actix_files::NamedFile; use actix_web::error::ErrorInternalServerError; use actix_web::{error, get, http, middleware, web, App, Error, HttpResponse, HttpServer}; use anyhow::anyhow; use chrono::{DateTime, Local}; use clap::{App as ClapApp, Arg}; use html5ever::serialize::{serialize, SerializeOpts}; use kuchiki::{parse_fragment, Attribute, ExpandedName, NodeRef}; use linkify::LinkFinder; use log::{info, trace}; use markup5ever::QualName; use percent_encoding::{percent_decode_str, utf8_percent_encode}; use pulldown_cmark::{html, Event, Options, Parser, Tag}; use regex::{Captures, Regex}; use slug::slugify; use std::cmp::Reverse; use std::collections::HashMap; use std::fs::File; use std::io::Read; use std::net::SocketAddr; use std::path::{Path, PathBuf}; use std::sync::Mutex; use std::time::{Duration, SystemTime}; use std::{env, fs}; use tera::{Context, Tera}; #[macro_use] extern crate markup5ever; #[derive(Clone)] struct State { garden_dir: PathBuf, index_file: Option, title: Option, server_name: String, tera: Tera, } struct MutableState { garden_cache: Mutex, } #[derive(Clone, Debug)] struct GardenCache { pages: HashMap, files: Vec, tags: HashMap, } impl Default for GardenCache { fn default() -> Self { GardenCache { pages: HashMap::new(), files: vec![], tags: HashMap::new(), } } } #[derive(Clone, Debug)] struct ParsedPage { timestamp: Option, title: String, html: String, links: Vec, } const VERSION: &str = env!("CARGO_PKG_VERSION"); fn main() -> anyhow::Result<()> { let env = env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"); env_logger::init_from_env(env); let app = ClapApp::new("gardenserver") .version(VERSION) .author("Tomáš Mládek ") .arg(Arg::with_name("DIRECTORY").required(true).index(1)) .arg( Arg::with_name("BIND") .long("bind") .default_value("127.0.0.1:8642") .help("address and port to bind the Web interface on") .required(true), ) .arg( Arg::with_name("INDEX_FILE") .takes_value(true) .short("i") .long("index") .help("File to be served at the root."), ) .arg( Arg::with_name("TITLE") .takes_value(true) .short("t") .long("title") .help("Title of this digital garden."), ) .arg( Arg::with_name("SERVER_NAME") .takes_value(true) .short("u") .long("garden-url") .help("Hostname of the server of this digital garden (for metadata)."), ); let matches = app.get_matches(); let directory = Path::new(matches.value_of("DIRECTORY").unwrap()); info!( "Starting GardenServer {} of {}...", VERSION, directory.display() ); let tera = Tera::new("templates/**/*.html")?; let sys = actix::System::new("gardenserver"); let bind: SocketAddr = matches .value_of("BIND") .unwrap() .parse() .expect("Incorrect bind format."); info!("Starting server at: http://{}", &bind); let mutable_state = web::Data::new(MutableState { garden_cache: Mutex::new(update_garden(directory, GardenCache::default())?), }); let state = State { garden_dir: directory.to_path_buf(), index_file: matches.value_of("INDEX_FILE").map(|s| s.to_string()), title: matches.value_of("TITLE").map(|s| s.to_string()), server_name: matches .value_of("SERVER_NAME") .map_or(matches.value_of("BIND").unwrap().to_string(), |s| { s.to_string() }), tera, }; // Start HTTP server HttpServer::new(move || { App::new() .wrap(middleware::Logger::default()) .data(state.clone()) .app_data(mutable_state.clone()) .service(actix_files::Files::new("/static", "templates")) .service(render) }) .bind(&bind)? .run(); Ok(sys.run()?) } #[get("{path:.*}")] async fn render( request: web::HttpRequest, data: web::Data, state: web::Data, path: web::Path, ) -> Result { let mut cache = state.garden_cache.lock().unwrap(); *cache = update_garden(&data.garden_dir, (*cache).clone()) .map_err(error::ErrorInternalServerError)?; // Redirect to index if path is empty. if path.is_empty() { let location = match data.index_file.as_ref() { Some(index_file) => index_file.clone(), None => cache .files .iter() .filter(|f| f.to_str().unwrap().ends_with(".md")) .collect::>() .first() .unwrap_or(&cache.files.first().unwrap()) .display() .to_string(), }; return Ok(HttpResponse::Found() .header(http::header::LOCATION, location.as_str()) .finish()); } let full_path = data.garden_dir.join(path.as_str()); // Redirect to ".md" version if requested path matches a .md file without the extension if !full_path.exists() && Path::new(&format!("{}.md", full_path.to_str().unwrap())).exists() { return Ok(HttpResponse::Found() .header(http::header::LOCATION, format!("{}.md", path.to_string())) .finish()); } // If the path is not a markdown file (e.g. photos), just return it as it is. if full_path.exists() && !path.ends_with(".md") { return NamedFile::open(full_path)?.into_response(&request); } // Otherwise, retrieve it and check backlinks let filename = full_path .components() .last() .unwrap() .as_os_str() .to_str() .unwrap(); let normalized_name = normalize_name(filename); let mut backlinks: Vec = vec![]; for (path, page) in cache.pages.iter() { if page .links .iter() .any(|link| normalize_name(link.as_str()) == normalized_name) { backlinks.push(normalize_name(path)); } } // Special case - graph view let mut graph_page = ParsedPage { timestamp: None, title: "".to_string(), html: "".to_string(), links: vec![], }; let page = if path.as_str() != "!graph" { cache.pages.get(path.as_ref()) } else { let mut context = Context::new(); let mut nodes: Vec> = vec![]; let mut links: Vec> = vec![]; let page_ids: Vec = cache.pages.keys().map(|n| normalize_name(n)).collect(); cache.pages.iter().for_each(|(path, page)| { let normalized_path = normalize_name(path); nodes.push( [("id".to_string(), normalized_path.clone())] .iter() .cloned() .collect(), ); page.links .iter() .map(|l| normalize_name(l)) .filter(|link| page_ids.contains(link)) .for_each(|link| { links.push( [ ("source".to_string(), normalized_path.clone()), ("target".to_string(), link), ] .iter() .cloned() .collect(), ) }) }); context.insert("nodes", &nodes); context.insert("links", &links); graph_page.title = "Graph View".to_string(); graph_page.html = data .tera .render("graph.html", &context) .map_err(ErrorInternalServerError)?; Some(&graph_page) }; // Recently changed let mut recently_changed = cache .pages .clone() .into_iter() .filter_map(|(path, page)| { page.timestamp .map(|ts| (path, SystemTime::now().duration_since(ts).unwrap())) }) .collect::>(); recently_changed.sort_by_key(|i| i.1); let timeago = timeago::Formatter::new(); // Render context generation let mut context = Context::new(); context.insert("version", VERSION); context.insert( "garden_title", data.title.as_ref().unwrap_or(&"Digital Garden".to_string()), ); context.insert("files", &cache.files); let mut tags: Vec<(&String, &u32)> = cache.tags.iter().collect(); tags.sort_by_key(|(t, _)| *t); tags.sort_by_key(|(_, n)| Reverse(*n)); context.insert("tags", &tags); context.insert( "recently_changed", &recently_changed .into_iter() .map(|(path, duration)| (path, timeago.convert(duration))) .collect::>(), ); context.insert( "page_title", &match page { Some(page) => page.title.clone(), None => filename.to_string(), }, ); context.insert( "content", &match page { Some(page) => page.html.clone(), None => data .tera .render("_not_found.html", &Context::new()) .map_err(ErrorInternalServerError)?, }, ); context.insert("backlinks", &backlinks); context.insert( "mtime", &match page { Some(page) => { if let Some(timestamp) = page.timestamp { let mtime: DateTime = timestamp.into(); Some(mtime.format("%c").to_string()) } else { None } } None => None, }, ); context.insert("server_name", &data.server_name); context.insert("path", &path.to_string()); Ok(HttpResponse::Ok().body( data.tera .render("main.html", &context) .map_err(ErrorInternalServerError)?, )) } fn update_garden>( garden_path: P, current: GardenCache, ) -> anyhow::Result { let garden_path = garden_path.as_ref(); let mut files: Vec = fs::read_dir(&garden_path)? .filter_map(|entry| { if let Ok(entry) = entry { let path = entry.path(); if path.is_file() { let stripped_path = path.strip_prefix(&garden_path).unwrap().to_path_buf(); if !stripped_path.to_str().unwrap().starts_with('.') { return Some(stripped_path); } } } None }) .collect(); files.sort_by(move |a, b| { let a_sort = a.file_stem().unwrap_or_else(|| a.as_os_str()); let b_sort = b.file_stem().unwrap_or_else(|| b.as_os_str()); a_sort.cmp(b_sort) }); files.sort_by_key(|p| match p.extension() { None => -1, Some(ext) => { if ext == "md" { 0 } else { 1 } } }); if files.is_empty() { return Err(anyhow!("Garden is empty.")); } let mut pages = current.pages; let mut tags = current.tags; let markdown_paths = files .iter() .filter(|p| p.to_str().unwrap_or("").ends_with(".md")); for path in markdown_paths { trace!("Loading {} into cache...", path.display()); let full_path = garden_path.join(path); let mtime = full_path.metadata().unwrap().modified().ok(); if let Some(page) = pages.get(path.to_str().unwrap()) { if let (Some(fs_time), Some(last_time)) = (mtime, page.timestamp) { if fs_time == last_time { continue; } } } let mut file = File::open(&full_path)?; let mut file_string = String::new(); file.read_to_string(&mut file_string)?; let markdown_source = preprocess_markdown(file_string); let result = parse_garden(&markdown_source)?; pages.insert( String::from(path.to_str().unwrap()), ParsedPage { timestamp: mtime, html: result.html, links: result.links, title: match result.title { Some(title) => title, _ => String::from( path.components() .last() .unwrap() .as_os_str() .to_str() .unwrap_or("???"), ), }, }, ); result.tags.into_iter().for_each(|tag| { *tags.entry(tag).or_insert(0) += 1; }); } let result = GardenCache { pages, files, tags }; trace!("{:#?}", result); Ok(result) } struct ParseResult { html: String, title: Option, links: Vec, tags: Vec, } fn parse_garden>(text: S) -> anyhow::Result { let mut current_top_heading = 999; let mut top_heading_text: Option = None; let mut last_nontext_event: Option = None; let mut links: Vec = vec![]; let mut tags: Vec = vec![]; let parser = Parser::new_ext(text.as_ref(), Options::all()).map(|event| { if let Event::Start(Tag::Link(_, dest, _)) = &event { links.push(dest.to_string()); } if let Some(Event::Start(Tag::Link(_, _, _))) = &last_nontext_event { if let Event::Text(str) = &event { if str.starts_with('#') { tags.push(str[1..].to_string()); } } } if let Some(Event::Start(Tag::Heading(hl))) = last_nontext_event { if hl < current_top_heading { current_top_heading = hl; if let Event::Text(str) = &event { top_heading_text = Some(str.clone().into_string()); } } } last_nontext_event = Some(event.clone()); event }); let mut html = String::new(); html::push_html(&mut html, parser); html = postprocess_html(html)?; Ok(ParseResult { html, title: top_heading_text, links, tags, }) } fn preprocess_markdown(string: String) -> String { let double_brackets = Regex::new(r"\[\[(?P[\w\- .]+)\]\]").unwrap(); let finder = LinkFinder::new(); let result = double_brackets .replace_all(&string, |caps: &Captures| { format!( "[{}]({})", &caps[1], utf8_percent_encode(&caps[1], percent_encoding::NON_ALPHANUMERIC) ) }) .to_string(); let tags = Regex::new(r"#([\w]+)").unwrap(); let result = tags .replace_all(&result, |caps: &Captures| { format!( "[{}]({})", &caps[0], utf8_percent_encode(&caps[1], percent_encoding::NON_ALPHANUMERIC) ) }) .to_string(); let result_vec = Vec::from(result.as_str()); let start_delims = vec![b'(', b'<']; let end_delims = vec![b')', b'>']; // link.end() is the first char AFTER the link! let links = finder.links(result.as_str()).filter(|link| { link.start() == 0 || link.end() == result.len() || !start_delims.contains(&result_vec[link.start() - 1]) || !end_delims.contains(&result_vec[link.end()]) }); let mut offset = 0; let mut result_string = result.to_string(); for link in links { let orig = link.as_str(); let new = format!("<{}>", orig); result_string.replace_range((link.start() + offset)..(link.end() + offset), new.as_str()); offset += new.len() - orig.len(); } result_string } fn postprocess_html>(document: T) -> anyhow::Result { let document_bytes = String::from(document.as_ref()); let frag = parse_fragment(QualName::new(None, ns!(html), local_name!("body")), vec![]) .from_utf8() .read_from(&mut document_bytes.as_bytes()) .unwrap(); frag.select("h1,h2,h3,h4,h5") .unwrap() .into_iter() .for_each(|el| { let id = slugify(el.text_contents()); el.attributes.borrow_mut().insert("id", id.clone()); el.as_node().prepend(NodeRef::new_element( QualName::new(None, ns!(html), local_name!("a")), vec![ ( ExpandedName::new(ns!(), local_name!("class")), Attribute { prefix: None, value: "anchor".to_string(), }, ), ( ExpandedName::new(ns!(), local_name!("href")), Attribute { prefix: None, value: format!("#{}", id), }, ), ], )); }); let mut bytes = vec![]; serialize(&mut bytes, &frag, SerializeOpts::default())?; Ok(String::from_utf8(bytes)?) } fn normalize_name(filename: &str) -> String { let decoded = percent_decode_str(filename).decode_utf8_lossy(); let result = decoded.strip_suffix(".md"); String::from(result.unwrap_or_else(|| decoded.as_ref())) }