don't try to render troublesome tags
This commit is contained in:
parent
837aeb0e22
commit
60b2744a1e
1 changed files with 19 additions and 4 deletions
23
src/lib.rs
23
src/lib.rs
|
|
@ -1,6 +1,7 @@
|
||||||
use std::time::Duration;
|
use std::{collections::HashMap, time::Duration};
|
||||||
|
|
||||||
use feed_rs::parser::parse;
|
use feed_rs::parser::parse;
|
||||||
|
use html2md::{TagHandler, TagHandlerFactory, dummy::DummyHandler};
|
||||||
use reqwest::{Client, Response, StatusCode};
|
use reqwest::{Client, Response, StatusCode};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use server::ServerState;
|
use server::ServerState;
|
||||||
|
|
@ -17,7 +18,7 @@ mod db;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
|
|
||||||
const ZULIP_INTERVAL: Duration = Duration::from_millis(250);
|
const ZULIP_INTERVAL: Duration = Duration::from_millis(250);
|
||||||
const ZULIP_MESSAGE_CUTOFF: usize = 700;
|
const ZULIP_MESSAGE_CUTOFF: usize = 500;
|
||||||
|
|
||||||
const LAST_FETCHED: DateTime<Utc> = DateTime::from_timestamp_nanos(0);
|
const LAST_FETCHED: DateTime<Utc> = DateTime::from_timestamp_nanos(0);
|
||||||
|
|
||||||
|
|
@ -187,7 +188,7 @@ impl BlogdorTheAggregator {
|
||||||
let url = feed.url;
|
let url = feed.url;
|
||||||
let last_run = feed.last_run;
|
let last_run = feed.last_run;
|
||||||
let last = last_run.posted.unwrap_or(LAST_FETCHED);
|
let last = last_run.posted.unwrap_or(LAST_FETCHED);
|
||||||
handles.spawn(check_feed(self.client.clone(), id, url, last, feed.owner));
|
handles.spawn(check_feed(self.client(), id, url, last, feed.owner));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut feed_results = Vec::new();
|
let mut feed_results = Vec::new();
|
||||||
|
|
@ -562,6 +563,12 @@ async fn check_feed(
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
let mut feed = fetch_and_parse_feed(&url, &client).await?;
|
let mut feed = fetch_and_parse_feed(&url, &client).await?;
|
||||||
let mut entries = Vec::new();
|
let mut entries = Vec::new();
|
||||||
|
let mut handlers = HashMap::new();
|
||||||
|
for tag in ["a", "sub", "super", "sup", "pre"].iter() {
|
||||||
|
let dummy: Box<dyn TagHandlerFactory> = Box::new(Dummy);
|
||||||
|
handlers.insert(tag.to_string(), dummy);
|
||||||
|
}
|
||||||
|
|
||||||
feed.entries.sort_by_key(|e| std::cmp::Reverse(e.posted()));
|
feed.entries.sort_by_key(|e| std::cmp::Reverse(e.posted()));
|
||||||
for post in feed.entries.into_iter().take(5) {
|
for post in feed.entries.into_iter().take(5) {
|
||||||
if post.posted().unwrap_or(LAST_FETCHED) > last_fetched {
|
if post.posted().unwrap_or(LAST_FETCHED) > last_fetched {
|
||||||
|
|
@ -585,7 +592,7 @@ async fn check_feed(
|
||||||
feed_description: feed.description.to_owned().map(|d| d.content),
|
feed_description: feed.description.to_owned().map(|d| d.content),
|
||||||
body: post.content.and_then(|c| {
|
body: post.content.and_then(|c| {
|
||||||
c.body.map(|f| {
|
c.body.map(|f| {
|
||||||
let s = html2md::parse_html(&f)
|
let s = html2md::parse_html_custom(&f, &handlers)
|
||||||
.graphemes(false)
|
.graphemes(false)
|
||||||
.take(ZULIP_MESSAGE_CUTOFF)
|
.take(ZULIP_MESSAGE_CUTOFF)
|
||||||
.collect::<String>();
|
.collect::<String>();
|
||||||
|
|
@ -618,5 +625,13 @@ async fn fetch_and_parse_feed(url: &str, client: &Client) -> Result<feed_rs::mod
|
||||||
parse(feed.reader()).map_err(|e| format!("could not parse feed from {url}, got {e}"))
|
parse(feed.reader()).map_err(|e| format!("could not parse feed from {url}, got {e}"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// so zulip doesn't choke on weird tags
|
||||||
|
struct Dummy;
|
||||||
|
impl TagHandlerFactory for Dummy {
|
||||||
|
fn instantiate(&self) -> Box<dyn TagHandler> {
|
||||||
|
Box::new(DummyHandler)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test;
|
mod test;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue