fetch feeds, post to zulip

This commit is contained in:
joe 2025-12-13 12:48:51 -08:00
parent d3aa796412
commit 9f8b6cb177
4 changed files with 266 additions and 19 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
/target
blogdor.db
secrets

175
Cargo.lock generated
View file

@ -24,7 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6"
dependencies = [
"cssparser",
"html5ever",
"html5ever 0.35.0",
"maplit",
"tendril",
"url",
@ -207,15 +207,19 @@ dependencies = [
"axum",
"clap",
"feed-rs",
"html2md",
"justerror",
"rand 0.9.2",
"reqwest",
"serde",
"serde_urlencoded",
"sqlx",
"thiserror",
"thiserror 2.0.17",
"tokio",
"tokio-util",
"tracing",
"tracing-subscriber",
"unicode-segmentation",
]
[[package]]
@ -246,6 +250,12 @@ dependencies = [
"shlex",
]
[[package]]
name = "cesu8"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cfg-if"
version = "1.0.4"
@ -312,6 +322,16 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "combine"
version = "4.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
dependencies = [
"bytes",
"memchr",
]
[[package]]
name = "concurrent-queue"
version = "2.5.0"
@ -802,6 +822,34 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "html2md"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cff9891f2e0d9048927fbdfc28b11bf378f6a93c7ba70b23d0fbee9af6071b4"
dependencies = [
"html5ever 0.27.0",
"jni",
"lazy_static",
"markup5ever_rcdom",
"percent-encoding",
"regex",
]
[[package]]
name = "html5ever"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
"markup5ever 0.12.1",
"proc-macro2",
"quote",
"syn 2.0.111",
]
[[package]]
name = "html5ever"
version = "0.35.0"
@ -809,7 +857,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
dependencies = [
"log",
"markup5ever",
"markup5ever 0.35.0",
"match_token",
]
@ -1103,6 +1151,26 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jni"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
dependencies = [
"cesu8",
"combine",
"jni-sys",
"log",
"thiserror 1.0.69",
"walkdir",
]
[[package]]
name = "jni-sys"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "js-sys"
version = "0.3.83"
@ -1206,6 +1274,20 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "markup5ever"
version = "0.35.0"
@ -1217,6 +1299,18 @@ dependencies = [
"web_atoms",
]
[[package]]
name = "markup5ever_rcdom"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
dependencies = [
"html5ever 0.27.0",
"markup5ever 0.12.1",
"tendril",
"xml5ever",
]
[[package]]
name = "match_token"
version = "0.35.0"
@ -1841,6 +1935,15 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "schannel"
version = "0.1.28"
@ -2092,7 +2195,7 @@ dependencies = [
"serde_json",
"sha2",
"smallvec",
"thiserror",
"thiserror 2.0.17",
"tokio",
"tokio-stream",
"tracing",
@ -2175,7 +2278,7 @@ dependencies = [
"smallvec",
"sqlx-core",
"stringprep",
"thiserror",
"thiserror 2.0.17",
"tracing",
"whoami",
]
@ -2213,7 +2316,7 @@ dependencies = [
"smallvec",
"sqlx-core",
"stringprep",
"thiserror",
"thiserror 2.0.17",
"tracing",
"whoami",
]
@ -2238,7 +2341,7 @@ dependencies = [
"serde",
"serde_urlencoded",
"sqlx-core",
"thiserror",
"thiserror 2.0.17",
"tracing",
"url",
]
@ -2384,13 +2487,33 @@ dependencies = [
"utf-8",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl 1.0.69",
]
[[package]]
name = "thiserror"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
dependencies = [
"thiserror-impl",
"thiserror-impl 2.0.17",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.111",
]
[[package]]
@ -2656,6 +2779,12 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "untrusted"
version = "0.9.0"
@ -2721,6 +2850,16 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.1"
@ -2841,6 +2980,15 @@ dependencies = [
"wasite",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "windows-core"
version = "0.62.2"
@ -3169,6 +3317,17 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
[[package]]
name = "xml5ever"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69"
dependencies = [
"log",
"mac",
"markup5ever 0.12.1",
]
[[package]]
name = "yoke"
version = "0.8.1"

View file

@ -8,14 +8,18 @@ edition = "2024"
axum = { version = "0.8.7", default-features = false, features = ["http1", "http2", "json", "macros", "tokio"] }
clap = { version = "4.5.53", features = ["derive"] }
feed-rs = { version = "2.3.1", features = ["sanitize"] }
html2md = "0.2.15"
justerror = "1.1.0"
reqwest = "0.12.24"
serde = { version = "1.0.228", features = ["derive"] }
serde_urlencoded = "0.7.1"
sqlx = { version = "0.8.6", default-features = false, features = ["chrono", "derive", "macros", "migrate", "runtime-tokio", "sqlite", "tls-none"] }
thiserror = "2.0.17"
tokio = { version = "1.48.0", features = ["full"] }
tokio-util = "0.7.17"
tracing = "0.1.43"
tracing-subscriber = { version = "0.3.22", features = ["env-filter"] }
unicode-segmentation = "1.12.0"
[dev-dependencies]
rand = "0.9.2"

View file

@ -1,10 +1,7 @@
use std::time::Duration;
use feed_rs::parser::parse;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
pub mod server;
use reqwest::StatusCode;
use sqlx::{
SqlitePool,
sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions},
@ -12,6 +9,10 @@ use sqlx::{
};
use tokio::task::{JoinHandle, JoinSet};
use tokio_util::{bytes::Buf, sync::CancellationToken};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use unicode_segmentation::UnicodeSegmentation;
pub mod server;
const MAX_CONNS: u32 = 200;
const MIN_CONNS: u32 = 5;
@ -37,6 +38,16 @@ pub struct FeedEntry {
body: Option<String>,
}
#[derive(Debug, Default, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
struct ZulipMessage<'s> {
to: u32,
#[serde(rename = "type")]
typ: &'s str,
content: String,
#[serde(skip_serializing_if = "Option::is_none")]
topic: Option<&'s str>,
}
impl BlogdorTheAggregator {
pub async fn new() -> Self {
tracing_subscriber::registry()
@ -96,6 +107,15 @@ async fn check_feeds(db: &SqlitePool, client: &reqwest::Client) {
}
};
let endpoint = std::env::var("ZULIP_URL").expect("ZULIP_URL must be set");
let channel_id: u32 = std::env::var("ZULIP_CHANNEL")
.expect("ZULIP_CHANNEL must be set")
.parse()
.expect("ZULIP_CHANNEL must be an integer");
let email = std::env::var("BLOGDOR_EMAIL").expect("BLOGDOR_EMAIL must be set");
let password = std::env::var("ZULIP_TOKEN").expect("ZULIP_TOKEN must be set");
let mut handles = JoinSet::new();
for feed in feeds {
handles.spawn(check_feed(db.clone(), feed.id, client.clone(), feed.url));
@ -110,10 +130,61 @@ async fn check_feeds(db: &SqlitePool, client: &reqwest::Client) {
Err(s) => {
tracing::warn!("could not fetch feed: {s}")
}
Ok(posts) => {
// send to zulip
for post in posts {
tracing::debug!("{post:?}");
Ok(None) => {}
Ok(Some(posts)) => {
let FeedEntry { feed_id, .. } = posts.last().unwrap();
let mut success = true;
for post in posts.iter() {
let body = post
.body
.iter()
.next()
.cloned()
.unwrap_or("Blogdor Says: NO BODY!".to_string());
let content = format!(
"{body}\n\n---\noriginally posted to {}, on {}",
post.url, post.published
);
let msg = ZulipMessage {
to: channel_id,
typ: "stream",
content,
topic: Some(&post.title),
};
let msg = serde_urlencoded::to_string(msg).expect("serialize msg");
dbg!(&msg);
match client
.post(&endpoint)
.basic_auth(&email, Some(&password))
.body(msg)
.header("Content-Type", "application/x-www-form-urlencoded")
.send()
.await
{
Err(e) => {
tracing::error!("got error sending to zulip: {e}");
success = false;
}
Ok(r) => {
if r.status() == StatusCode::OK {
success &= true;
} else {
success = false;
}
}
}
tokio::time::sleep(Duration::from_millis(200)).await;
}
if success
&& let Err(e) = sqlx::query!(
"insert into runs (feed, succeeded) values (?, true)",
feed_id
)
.execute(db)
.await
{
tracing::error!("could not insert run for {feed_id}, got {e}");
}
}
}
@ -125,7 +196,7 @@ async fn check_feed(
feed_id: i64,
client: reqwest::Client,
url: String,
) -> Result<Vec<FeedEntry>, String> {
) -> Result<Option<Vec<FeedEntry>>, String> {
let rec = sqlx::query!(
"select date_time from runs where succeeded = true and feed = ? order by id desc limit 1",
feed_id
@ -165,13 +236,25 @@ async fn check_feed(
published: post.published.unwrap_or(now),
received: now,
feed_description: feed.description.to_owned().map(|d| d.content),
body: post.content.and_then(|c| c.body),
body: post.content.and_then(|c| {
c.body.map(|f| {
let s = html2md::parse_html(&f)
.graphemes(false)
.take(500)
.collect::<String>();
s.to_string()
})
}),
};
out.push(entry);
}
}
Ok(out)
if out.is_empty() {
Ok(None)
} else {
Ok(Some(out))
}
}
async fn get_db_pool() -> SqlitePool {