add cache, prepare for backlinks, improve 404s

This commit is contained in:
Tomáš Mládek 2020-10-25 15:55:53 +01:00
parent db941f02e0
commit 38e560aa51
4 changed files with 197 additions and 79 deletions

View file

@ -1,19 +1,22 @@
use actix_files::NamedFile;
use actix_web::error::ErrorInternalServerError;
use actix_web::{error, get, http, middleware, web, App, Error, HttpResponse, HttpServer};
use anyhow::anyhow;
use chrono::{DateTime, Local};
use clap::{App as ClapApp, Arg};
use linkify::LinkFinder;
use log::{info, trace};
use percent_encoding::utf8_percent_encode;
use pulldown_cmark::{html, Options, Parser};
use regex::{Captures, Regex};
use std::collections::HashMap;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::Read;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::time::SystemTime;
use std::{env, fs}; use std::{env, fs};
use actix_files::NamedFile;
use actix_web::error::ErrorInternalServerError;
use actix_web::{error, get, http, middleware, web, App, Error, HttpResponse, HttpServer};
use chrono::{DateTime, Local};
use clap::{App as ClapApp, Arg};
use linkify::LinkFinder;
use log::info;
use percent_encoding::utf8_percent_encode;
use pulldown_cmark::{html, Options, Parser};
use regex::{Captures, Regex};
use tera::{Context, Tera}; use tera::{Context, Tera};
#[derive(Clone)] #[derive(Clone)]
@ -24,6 +27,33 @@ struct State {
tera: Tera, tera: Tera,
} }
struct MutableState {
garden_cache: Mutex<GardenCache>,
}
#[derive(Clone, Debug)]
struct GardenCache {
pages: HashMap<PathBuf, ParsedPage>,
files: Vec<PathBuf>,
}
impl Default for GardenCache {
fn default() -> Self {
GardenCache {
pages: HashMap::new(),
files: vec![],
}
}
}
#[derive(Clone, Debug)]
struct ParsedPage {
timestamp: Option<SystemTime>,
title: String,
html: String,
links: Vec<String>,
}
const VERSION: &str = env!("CARGO_PKG_VERSION"); const VERSION: &str = env!("CARGO_PKG_VERSION");
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
@ -77,6 +107,10 @@ fn main() -> anyhow::Result<()> {
.expect("Incorrect bind format."); .expect("Incorrect bind format.");
info!("Starting server at: {}", &bind); info!("Starting server at: {}", &bind);
let mutable_state = web::Data::new(MutableState {
garden_cache: Mutex::new(update_garden(directory, GardenCache::default())?),
});
let state = State { let state = State {
garden_dir: directory.to_path_buf(), garden_dir: directory.to_path_buf(),
index_file: matches.value_of("INDEX_FILE").map(|s| s.to_string()), index_file: matches.value_of("INDEX_FILE").map(|s| s.to_string()),
@ -89,6 +123,7 @@ fn main() -> anyhow::Result<()> {
App::new() App::new()
.wrap(middleware::Logger::default()) .wrap(middleware::Logger::default())
.data(state.clone()) .data(state.clone())
.app_data(mutable_state.clone())
.service(actix_files::Files::new("/static", "templates")) .service(actix_files::Files::new("/static", "templates"))
.service(render) .service(render)
}) })
@ -101,15 +136,117 @@ fn main() -> anyhow::Result<()> {
#[get("{path:.*}")] #[get("{path:.*}")]
async fn render( async fn render(
request: web::HttpRequest, request: web::HttpRequest,
state: web::Data<State>, data: web::Data<State>,
state: web::Data<MutableState>,
path: web::Path<String>, path: web::Path<String>,
) -> Result<HttpResponse, Error> { ) -> Result<HttpResponse, Error> {
let mut files: Vec<PathBuf> = fs::read_dir(&state.garden_dir)? let mut cache = state.garden_cache.lock().unwrap();
*cache = update_garden(&data.garden_dir, (*cache).clone())
.map_err(error::ErrorInternalServerError)?;
// Redirect to index if path is empty.
if path.is_empty() {
let location = match data.index_file.as_ref() {
Some(index_file) => index_file.clone(),
None => cache
.files
.iter()
.filter(|f| f.to_str().unwrap().ends_with(".md"))
.collect::<Vec<&PathBuf>>()
.first()
.unwrap_or(&cache.files.first().unwrap())
.display()
.to_string(),
};
return Ok(HttpResponse::Found()
.header(http::header::LOCATION, location.as_str())
.finish());
}
let full_path = data.garden_dir.join(path.as_str());
// Redirect to ".md" version if requested path matches a .md file without the extension
if !full_path.exists() && full_path.extension().is_none() {
let md_path = format!("{}.md", path.to_string());
if Path::new(&md_path).exists() {
return Ok(HttpResponse::Found()
.header(http::header::LOCATION, md_path)
.finish());
}
}
if full_path.exists() && !path.ends_with(".md") {
return Ok(NamedFile::open(full_path)?.into_response(&request)?);
}
let page = cache.pages.get(&full_path);
let mut context = Context::new();
context.insert("version", VERSION);
context.insert(
"garden_title",
data.title.as_ref().unwrap_or(&"Digital Garden".to_string()),
);
context.insert("files", &cache.files);
context.insert(
"page_title",
&match page {
Some(page) => page.title.clone(),
None => full_path
.components()
.last()
.unwrap()
.as_os_str()
.to_str()
.unwrap()
.to_string(),
},
);
context.insert(
"content",
&match page {
Some(page) => page.html.clone(),
None => data
.tera
.render("_not_found.html", &Context::new())
.map_err(ErrorInternalServerError)?,
},
);
context.insert(
"mtime",
&match page {
Some(page) => {
if let Some(timestamp) = page.timestamp {
let mtime: DateTime<Local> = timestamp.into();
Some(mtime.format("%c").to_string())
} else {
None
}
}
None => None,
},
);
Ok(HttpResponse::Ok().body(
data.tera
.render("main.html", &context)
.map_err(ErrorInternalServerError)?,
))
}
fn update_garden<P: AsRef<Path>>(
garden_path: P,
current: GardenCache,
) -> anyhow::Result<GardenCache> {
let garden_path = garden_path.as_ref().clone();
let mut files: Vec<PathBuf> = fs::read_dir(&garden_path)?
.filter_map(|entry| { .filter_map(|entry| {
if let Ok(entry) = entry { if let Ok(entry) = entry {
let path = entry.path(); let path = entry.path();
if path.is_file() { if path.is_file() {
let stripped_path = path.strip_prefix(&state.garden_dir).unwrap().to_path_buf(); let stripped_path = path.strip_prefix(&garden_path).unwrap().to_path_buf();
if !stripped_path.to_str().unwrap().starts_with(".") { if !stripped_path.to_str().unwrap().starts_with(".") {
return Some(stripped_path); return Some(stripped_path);
} }
@ -121,54 +258,42 @@ async fn render(
files.sort(); files.sort();
if files.is_empty() { if files.is_empty() {
return Err(error::ErrorNotFound("Garden is empty.")); return Err(anyhow!("Garden is empty."));
} }
if path.is_empty() { let mut pages = current.pages.clone();
let location = match state.index_file.as_ref() {
Some(index_file) => index_file.clone(),
None => files
.iter()
.filter(|f| f.to_str().unwrap().ends_with(".md"))
.collect::<Vec<&PathBuf>>()
.first()
.unwrap_or(&files.first().unwrap())
.display()
.to_string(),
};
return Ok(HttpResponse::Found() let markdown_paths = files
.header(http::header::LOCATION, location.as_str()) .iter()
.finish()); .filter(|p| p.to_str().unwrap_or("").ends_with(".md"))
} .map(|p| garden_path.join(p));
for path in markdown_paths {
let full_path = state.garden_dir.join(path.as_str()); trace!("Loading {} into cache...", path.display());
match (full_path.exists(), full_path.extension()) { let mtime = path.metadata().unwrap().modified().ok();
(false, None) => { if let Some(page) = pages.get(&path) {
return Ok(HttpResponse::Found() match (mtime, page.timestamp) {
.header(http::header::LOCATION, format!("{}.md", path.to_string())) (Some(fs_time), Some(last_time)) => {
.finish()); if fs_time == last_time {
continue;
}
}
_ => {}
}
} }
(false, Some(_)) => return Err(error::ErrorNotFound("File not found.")),
_ => {}
}
if !path.ends_with(".md") { let mut file = File::open(&path)?;
Ok(NamedFile::open(full_path)?.into_response(&request)?)
} else {
let mut file = File::open(full_path.clone())?;
let mut file_string = String::new(); let mut file_string = String::new();
file.read_to_string(&mut file_string)?; file.read_to_string(&mut file_string)?;
let markdown_source = preprocess(file_string); let markdown_source = preprocess_markdown(file_string);
let parser = Parser::new_ext(markdown_source.as_str(), Options::all()); let parser = Parser::new_ext(markdown_source.as_str(), Options::all());
let mut html_output = String::new(); let mut html_output = String::new();
html::push_html(&mut html_output, parser); html::push_html(&mut html_output, parser);
// TODO! // TODO!
let h1_regex = Regex::new(r"<h1>([^>]+)</h1>").unwrap(); let h1_regex = Regex::new(r"<h1>([^>]+)</h1>").unwrap();
let page_title = match h1_regex.captures(&html_output) { let title = match h1_regex.captures(&html_output) {
Some(h1_match) => h1_match.get(1).unwrap().as_str(), Some(h1_match) => h1_match.get(1).unwrap().as_str(),
_ => full_path _ => &path
.components() .components()
.last() .last()
.unwrap() .unwrap()
@ -177,38 +302,23 @@ async fn render(
.unwrap_or("???"), .unwrap_or("???"),
}; };
let mtime: Option<DateTime<Local>> = match file.metadata() { pages.insert(
Ok(metadata) => metadata.modified().ok().map(|mtime| mtime.into()), path.clone(),
_ => None, ParsedPage {
}; timestamp: mtime,
html: html_output.clone(),
let mut context = Context::new(); title: String::from(title),
context.insert( links: vec![], // todo!,
"garden_title", },
state
.title
.as_ref()
.unwrap_or(&"Digital Garden".to_string()),
); );
context.insert("page_title", page_title);
context.insert("files", &files);
context.insert("content", &html_output);
context.insert(
"mtime",
&mtime.map_or("???".to_string(), |t| t.format("%c").to_string()),
);
context.insert("version", VERSION);
Ok(HttpResponse::Ok().body(
state
.tera
.render("main.html", &context)
.map_err(ErrorInternalServerError)?,
))
} }
let result = GardenCache { files, pages };
trace!("{:#?}", result);
Ok(result)
} }
fn preprocess(string: String) -> String { fn preprocess_markdown(string: String) -> String {
let double_brackets = Regex::new(r"\[\[(?P<inner>[\w .]+)\]\]").unwrap(); let double_brackets = Regex::new(r"\[\[(?P<inner>[\w .]+)\]\]").unwrap();
let finder = LinkFinder::new(); let finder = LinkFinder::new();

View file

@ -0,0 +1 @@
<div class="message">File does not (yet?) exist!</div>

View file

@ -110,4 +110,9 @@ pre {
padding: 1em; padding: 1em;
overflow-x: auto; overflow-x: auto;
word-wrap: normal; word-wrap: normal;
}
.message {
text-align: center;
padding: 2em;
} }

View file

@ -28,9 +28,11 @@
<main> <main>
{{content | safe}} {{content | safe}}
<footer> {% if mtime %}
Last modified at {{mtime}} <footer>
</footer> Last modified at {{mtime}}
</footer>
{% endif %}
</main> </main>
</body> </body>