diff --git a/src/lib.rs b/src/lib.rs index e55ca09..9bbd05f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ -use std::time::Duration; +use std::{collections::HashMap, time::Duration}; use feed_rs::parser::parse; +use html2md::{TagHandler, TagHandlerFactory, dummy::DummyHandler}; use reqwest::{Client, Response, StatusCode}; use serde::{Deserialize, Serialize}; use server::ServerState; @@ -17,7 +18,7 @@ mod db; pub mod server; const ZULIP_INTERVAL: Duration = Duration::from_millis(250); -const ZULIP_MESSAGE_CUTOFF: usize = 700; +const ZULIP_MESSAGE_CUTOFF: usize = 500; const LAST_FETCHED: DateTime = DateTime::from_timestamp_nanos(0); @@ -187,7 +188,7 @@ impl BlogdorTheAggregator { let url = feed.url; let last_run = feed.last_run; let last = last_run.posted.unwrap_or(LAST_FETCHED); - handles.spawn(check_feed(self.client.clone(), id, url, last, feed.owner)); + handles.spawn(check_feed(self.client(), id, url, last, feed.owner)); } let mut feed_results = Vec::new(); @@ -562,6 +563,12 @@ async fn check_feed( let now = Utc::now(); let mut feed = fetch_and_parse_feed(&url, &client).await?; let mut entries = Vec::new(); + let mut handlers = HashMap::new(); + for tag in ["a", "sub", "super", "sup", "pre"].iter() { + let dummy: Box = Box::new(Dummy); + handlers.insert(tag.to_string(), dummy); + } + feed.entries.sort_by_key(|e| std::cmp::Reverse(e.posted())); for post in feed.entries.into_iter().take(5) { if post.posted().unwrap_or(LAST_FETCHED) > last_fetched { @@ -585,7 +592,7 @@ async fn check_feed( feed_description: feed.description.to_owned().map(|d| d.content), body: post.content.and_then(|c| { c.body.map(|f| { - let s = html2md::parse_html(&f) + let s = html2md::parse_html_custom(&f, &handlers) .graphemes(false) .take(ZULIP_MESSAGE_CUTOFF) .collect::(); @@ -618,5 +625,13 @@ async fn fetch_and_parse_feed(url: &str, client: &Client) -> Result Box { + Box::new(DummyHandler) + } +} + #[cfg(test)] mod test;