gardenserver/src/main.rs

599 lines
18 KiB
Rust

use crate::markup5ever::tendril::TendrilSink;
use actix_files::NamedFile;
use actix_web::error::ErrorInternalServerError;
use actix_web::{error, get, http, middleware, web, App, Error, HttpResponse, HttpServer};
use anyhow::anyhow;
use chrono::{DateTime, Local};
use clap::{App as ClapApp, Arg};
use html5ever::serialize::{serialize, SerializeOpts};
use kuchiki::{parse_fragment, Attribute, ExpandedName, NodeRef};
use linkify::LinkFinder;
use log::{info, trace};
use markup5ever::QualName;
use percent_encoding::{percent_decode_str, utf8_percent_encode};
use pulldown_cmark::{html, Event, Options, Parser, Tag};
use regex::{Captures, Regex};
use slug::slugify;
use std::cmp::Reverse;
use std::collections::HashMap;
use std::fs::File;
use std::io::Read;
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::time::{Duration, SystemTime};
use std::{env, fs};
use tera::{Context, Tera};
#[macro_use]
extern crate markup5ever;
#[derive(Clone)]
struct State {
garden_dir: PathBuf,
index_file: Option<String>,
title: Option<String>,
server_name: String,
tera: Tera,
}
struct MutableState {
garden_cache: Mutex<GardenCache>,
}
#[derive(Clone, Debug)]
struct GardenCache {
pages: HashMap<String, ParsedPage>,
files: Vec<PathBuf>,
tags: HashMap<String, u32>,
}
impl Default for GardenCache {
fn default() -> Self {
GardenCache {
pages: HashMap::new(),
files: vec![],
tags: HashMap::new(),
}
}
}
#[derive(Clone, Debug)]
struct ParsedPage {
timestamp: Option<SystemTime>,
title: String,
html: String,
links: Vec<String>,
}
const VERSION: &str = env!("CARGO_PKG_VERSION");
fn main() -> anyhow::Result<()> {
let env = env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info");
env_logger::init_from_env(env);
let app = ClapApp::new("gardenserver")
.version(VERSION)
.author("Tomáš Mládek <t@mldk.cz>")
.arg(Arg::with_name("DIRECTORY").required(true).index(1))
.arg(
Arg::with_name("BIND")
.long("bind")
.default_value("127.0.0.1:8642")
.help("address and port to bind the Web interface on")
.required(true),
)
.arg(
Arg::with_name("INDEX_FILE")
.takes_value(true)
.short("i")
.long("index")
.help("File to be served at the root."),
)
.arg(
Arg::with_name("TITLE")
.takes_value(true)
.short("t")
.long("title")
.help("Title of this digital garden."),
)
.arg(
Arg::with_name("SERVER_NAME")
.takes_value(true)
.short("u")
.long("garden-url")
.help("Hostname of the server of this digital garden (for metadata)."),
);
let matches = app.get_matches();
let directory = Path::new(matches.value_of("DIRECTORY").unwrap());
info!(
"Starting GardenServer {} of {}...",
VERSION,
directory.display()
);
let tera = Tera::new("templates/**/*.html")?;
let sys = actix::System::new("gardenserver");
let bind: SocketAddr = matches
.value_of("BIND")
.unwrap()
.parse()
.expect("Incorrect bind format.");
info!("Starting server at: http://{}", &bind);
let mutable_state = web::Data::new(MutableState {
garden_cache: Mutex::new(update_garden(directory, GardenCache::default())?),
});
let state = State {
garden_dir: directory.to_path_buf(),
index_file: matches.value_of("INDEX_FILE").map(|s| s.to_string()),
title: matches.value_of("TITLE").map(|s| s.to_string()),
server_name: matches
.value_of("SERVER_NAME")
.map_or(matches.value_of("BIND").unwrap().to_string(), |s| {
s.to_string()
}),
tera,
};
// Start HTTP server
HttpServer::new(move || {
App::new()
.wrap(middleware::Logger::default())
.data(state.clone())
.app_data(mutable_state.clone())
.service(actix_files::Files::new("/static", "templates"))
.service(render)
})
.bind(&bind)?
.run();
Ok(sys.run()?)
}
#[get("{path:.*}")]
async fn render(
request: web::HttpRequest,
data: web::Data<State>,
state: web::Data<MutableState>,
path: web::Path<String>,
) -> Result<HttpResponse, Error> {
let mut cache = state.garden_cache.lock().unwrap();
*cache = update_garden(&data.garden_dir, (*cache).clone())
.map_err(error::ErrorInternalServerError)?;
// Redirect to index if path is empty.
if path.is_empty() {
let location = match data.index_file.as_ref() {
Some(index_file) => index_file.clone(),
None => cache
.files
.iter()
.filter(|f| f.to_str().unwrap().ends_with(".md"))
.collect::<Vec<&PathBuf>>()
.first()
.unwrap_or(&cache.files.first().unwrap())
.display()
.to_string(),
};
return Ok(HttpResponse::Found()
.header(http::header::LOCATION, location.as_str())
.finish());
}
let full_path = data.garden_dir.join(path.as_str());
// Redirect to ".md" version if requested path matches a .md file without the extension
if !full_path.exists() && Path::new(&format!("{}.md", full_path.to_str().unwrap())).exists() {
return Ok(HttpResponse::Found()
.header(http::header::LOCATION, format!("{}.md", path.to_string()))
.finish());
}
// If the path is not a markdown file (e.g. photos), just return it as it is.
if full_path.exists() && !path.ends_with(".md") {
return NamedFile::open(full_path)?.into_response(&request);
}
// Otherwise, retrieve it and check backlinks
let filename = full_path
.components()
.last()
.unwrap()
.as_os_str()
.to_str()
.unwrap();
let normalized_name = normalize_name(filename);
let mut backlinks: Vec<String> = vec![];
for (path, page) in cache.pages.iter() {
if page
.links
.iter()
.any(|link| normalize_name(link.as_str()) == normalized_name)
{
backlinks.push(normalize_name(path));
}
}
// Special case - graph view
let mut graph_page = ParsedPage {
timestamp: None,
title: "".to_string(),
html: "".to_string(),
links: vec![],
};
let page = if path.as_str() != "!graph" {
cache.pages.get(path.as_ref())
} else {
let mut context = Context::new();
let mut nodes: Vec<HashMap<String, String>> = vec![];
let mut links: Vec<HashMap<String, String>> = vec![];
let page_ids: Vec<String> = cache.pages.keys().map(|n| normalize_name(n)).collect();
cache.pages.iter().for_each(|(path, page)| {
let normalized_path = normalize_name(path);
nodes.push(
[("id".to_string(), normalized_path.clone())]
.iter()
.cloned()
.collect(),
);
page.links
.iter()
.map(|l| normalize_name(l))
.filter(|link| page_ids.contains(link))
.for_each(|link| {
links.push(
[
("source".to_string(), normalized_path.clone()),
("target".to_string(), link),
]
.iter()
.cloned()
.collect(),
)
})
});
context.insert("nodes", &nodes);
context.insert("links", &links);
graph_page.title = "Graph View".to_string();
graph_page.html = data
.tera
.render("graph.html", &context)
.map_err(ErrorInternalServerError)?;
Some(&graph_page)
};
// Recently changed
let mut recently_changed = cache
.pages
.clone()
.into_iter()
.filter_map(|(path, page)| {
page.timestamp
.map(|ts| (path, SystemTime::now().duration_since(ts).unwrap()))
})
.collect::<Vec<(String, Duration)>>();
recently_changed.sort_by_key(|i| i.1);
let timeago = timeago::Formatter::new();
// Render context generation
let mut context = Context::new();
context.insert("version", VERSION);
context.insert(
"garden_title",
data.title.as_ref().unwrap_or(&"Digital Garden".to_string()),
);
context.insert("files", &cache.files);
let mut tags: Vec<(&String, &u32)> = cache.tags.iter().collect();
tags.sort_by_key(|(t, _)| *t);
tags.sort_by_key(|(_, n)| Reverse(*n));
context.insert("tags", &tags);
context.insert(
"recently_changed",
&recently_changed
.into_iter()
.map(|(path, duration)| (path, timeago.convert(duration)))
.collect::<Vec<(String, String)>>(),
);
context.insert(
"page_title",
&match page {
Some(page) => page.title.clone(),
None => filename.to_string(),
},
);
context.insert(
"content",
&match page {
Some(page) => page.html.clone(),
None => data
.tera
.render("_not_found.html", &Context::new())
.map_err(ErrorInternalServerError)?,
},
);
context.insert("backlinks", &backlinks);
context.insert(
"mtime",
&match page {
Some(page) => {
if let Some(timestamp) = page.timestamp {
let mtime: DateTime<Local> = timestamp.into();
Some(mtime.format("%c").to_string())
} else {
None
}
}
None => None,
},
);
context.insert("server_name", &data.server_name);
context.insert("path", &path.to_string());
Ok(HttpResponse::Ok().body(
data.tera
.render("main.html", &context)
.map_err(ErrorInternalServerError)?,
))
}
fn update_garden<P: AsRef<Path>>(
garden_path: P,
current: GardenCache,
) -> anyhow::Result<GardenCache> {
let garden_path = garden_path.as_ref();
let mut files: Vec<PathBuf> = fs::read_dir(&garden_path)?
.filter_map(|entry| {
if let Ok(entry) = entry {
let path = entry.path();
if path.is_file() {
let stripped_path = path.strip_prefix(&garden_path).unwrap().to_path_buf();
if !stripped_path.to_str().unwrap().starts_with('.') {
return Some(stripped_path);
}
}
}
None
})
.collect();
files.sort_by(move |a, b| {
let a_sort = a.file_stem().unwrap_or_else(|| a.as_os_str());
let b_sort = b.file_stem().unwrap_or_else(|| b.as_os_str());
a_sort.cmp(b_sort)
});
files.sort_by_key(|p| match p.extension() {
None => -1,
Some(ext) => {
if ext == "md" {
0
} else {
1
}
}
});
if files.is_empty() {
return Err(anyhow!("Garden is empty."));
}
let mut pages = current.pages;
let mut tags = current.tags;
let markdown_paths = files
.iter()
.filter(|p| p.to_str().unwrap_or("").ends_with(".md"));
for path in markdown_paths {
trace!("Loading {} into cache...", path.display());
let full_path = garden_path.join(path);
let mtime = full_path.metadata().unwrap().modified().ok();
if let Some(page) = pages.get(path.to_str().unwrap()) {
if let (Some(fs_time), Some(last_time)) = (mtime, page.timestamp) {
if fs_time == last_time {
continue;
}
}
}
let mut file = File::open(&full_path)?;
let mut file_string = String::new();
file.read_to_string(&mut file_string)?;
let markdown_source = preprocess_markdown(file_string);
let result = parse_garden(&markdown_source)?;
pages.insert(
String::from(path.to_str().unwrap()),
ParsedPage {
timestamp: mtime,
html: result.html,
links: result.links,
title: match result.title {
Some(title) => title,
_ => String::from(
path.components()
.last()
.unwrap()
.as_os_str()
.to_str()
.unwrap_or("???"),
),
},
},
);
result.tags.into_iter().for_each(|tag| {
*tags.entry(tag).or_insert(0) += 1;
});
}
let result = GardenCache { pages, files, tags };
trace!("{:#?}", result);
Ok(result)
}
struct ParseResult {
html: String,
title: Option<String>,
links: Vec<String>,
tags: Vec<String>,
}
fn parse_garden<S: AsRef<str>>(text: S) -> anyhow::Result<ParseResult> {
let mut current_top_heading = 999;
let mut top_heading_text: Option<String> = None;
let mut last_nontext_event: Option<Event> = None;
let mut links: Vec<String> = vec![];
let mut tags: Vec<String> = vec![];
let parser = Parser::new_ext(text.as_ref(), Options::all()).map(|event| {
if let Event::Start(Tag::Link(_, dest, _)) = &event {
links.push(dest.to_string());
}
if let Some(Event::Start(Tag::Link(_, _, _))) = &last_nontext_event {
if let Event::Text(str) = &event {
if str.starts_with('#') {
tags.push(str[1..].to_string());
}
}
}
if let Some(Event::Start(Tag::Heading(hl))) = last_nontext_event {
if hl < current_top_heading {
current_top_heading = hl;
if let Event::Text(str) = &event {
top_heading_text = Some(str.clone().into_string());
}
}
}
last_nontext_event = Some(event.clone());
event
});
let mut html = String::new();
html::push_html(&mut html, parser);
html = postprocess_html(html)?;
Ok(ParseResult {
html,
title: top_heading_text,
links,
tags,
})
}
fn preprocess_markdown(string: String) -> String {
let double_brackets = Regex::new(r"\[\[(?P<inner>[\w\- .]+)\]\]").unwrap();
let finder = LinkFinder::new();
let result = double_brackets
.replace_all(&string, |caps: &Captures| {
format!(
"[{}]({})",
&caps[1],
utf8_percent_encode(&caps[1], percent_encoding::NON_ALPHANUMERIC)
)
})
.to_string();
let tags = Regex::new(r"#([\w]+)").unwrap();
let result = tags
.replace_all(&result, |caps: &Captures| {
format!(
"[{}]({})",
&caps[0],
utf8_percent_encode(&caps[1], percent_encoding::NON_ALPHANUMERIC)
)
})
.to_string();
let result_vec = Vec::from(result.as_str());
let start_delims = vec![b'(', b'<'];
let end_delims = vec![b')', b'>'];
// link.end() is the first char AFTER the link!
let links = finder.links(result.as_str()).filter(|link| {
link.start() == 0
|| link.end() == result.len()
|| !start_delims.contains(&result_vec[link.start() - 1])
|| !end_delims.contains(&result_vec[link.end()])
});
let mut offset = 0;
let mut result_string = result.to_string();
for link in links {
let orig = link.as_str();
let new = format!("<{}>", orig);
result_string.replace_range((link.start() + offset)..(link.end() + offset), new.as_str());
offset += new.len() - orig.len();
}
result_string
}
fn postprocess_html<T: AsRef<str>>(document: T) -> anyhow::Result<String> {
let document_bytes = String::from(document.as_ref());
let frag = parse_fragment(QualName::new(None, ns!(html), local_name!("body")), vec![])
.from_utf8()
.read_from(&mut document_bytes.as_bytes())
.unwrap();
frag.select("h1,h2,h3,h4,h5")
.unwrap()
.into_iter()
.for_each(|el| {
let id = slugify(el.text_contents());
el.attributes.borrow_mut().insert("id", id.clone());
el.as_node().prepend(NodeRef::new_element(
QualName::new(None, ns!(html), local_name!("a")),
vec![
(
ExpandedName::new(ns!(), local_name!("class")),
Attribute {
prefix: None,
value: "anchor".to_string(),
},
),
(
ExpandedName::new(ns!(), local_name!("href")),
Attribute {
prefix: None,
value: format!("#{}", id),
},
),
],
));
});
let mut bytes = vec![];
serialize(&mut bytes, &frag, SerializeOpts::default())?;
Ok(String::from_utf8(bytes)?)
}
fn normalize_name(filename: &str) -> String {
let decoded = percent_decode_str(filename).decode_utf8_lossy();
let result = decoded.strip_suffix(".md");
String::from(result.unwrap_or_else(|| decoded.as_ref()))
}