From 9f8b6cb1774264df8a6fc3a31ef403f979db4eba Mon Sep 17 00:00:00 2001 From: joe Date: Sat, 13 Dec 2025 12:48:51 -0800 Subject: [PATCH] fetch feeds, post to zulip --- .gitignore | 1 + Cargo.lock | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 4 ++ src/lib.rs | 105 ++++++++++++++++++++++++++++---- 4 files changed, 266 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index bcbd52a..789dd83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target blogdor.db +secrets diff --git a/Cargo.lock b/Cargo.lock index 394f9a9..228e5c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,7 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6" dependencies = [ "cssparser", - "html5ever", + "html5ever 0.35.0", "maplit", "tendril", "url", @@ -207,15 +207,19 @@ dependencies = [ "axum", "clap", "feed-rs", + "html2md", "justerror", "rand 0.9.2", "reqwest", + "serde", + "serde_urlencoded", "sqlx", - "thiserror", + "thiserror 2.0.17", "tokio", "tokio-util", "tracing", "tracing-subscriber", + "unicode-segmentation", ] [[package]] @@ -246,6 +250,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.4" @@ -312,6 +322,16 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -802,6 +822,34 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "html2md" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cff9891f2e0d9048927fbdfc28b11bf378f6a93c7ba70b23d0fbee9af6071b4" +dependencies = [ + "html5ever 0.27.0", + "jni", + "lazy_static", + "markup5ever_rcdom", + "percent-encoding", + "regex", +] + +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "html5ever" version = "0.35.0" @@ -809,7 +857,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4" dependencies = [ "log", - "markup5ever", + "markup5ever 0.35.0", "match_token", ] @@ -1103,6 +1151,26 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jni" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec" +dependencies = [ + "cesu8", + "combine", + "jni-sys", + "log", + "thiserror 1.0.69", + "walkdir", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + [[package]] name = "js-sys" version = "0.3.83" @@ -1206,6 +1274,20 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "markup5ever" version = "0.35.0" @@ -1217,6 +1299,18 @@ dependencies = [ "web_atoms", ] +[[package]] +name = "markup5ever_rcdom" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" +dependencies = [ + "html5ever 0.27.0", + "markup5ever 0.12.1", + "tendril", + "xml5ever", +] + [[package]] name = "match_token" version = "0.35.0" @@ -1841,6 +1935,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2092,7 +2195,7 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "thiserror", + "thiserror 2.0.17", "tokio", "tokio-stream", "tracing", @@ -2175,7 +2278,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 2.0.17", "tracing", "whoami", ] @@ -2213,7 +2316,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 2.0.17", "tracing", "whoami", ] @@ -2238,7 +2341,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror", + "thiserror 2.0.17", "tracing", "url", ] @@ -2384,13 +2487,33 @@ dependencies = [ "utf-8", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", ] [[package]] @@ -2656,6 +2779,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "untrusted" version = "0.9.0" @@ -2721,6 +2850,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2841,6 +2980,15 @@ dependencies = [ "wasite", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -3169,6 +3317,17 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xml5ever" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69" +dependencies = [ + "log", + "mac", + "markup5ever 0.12.1", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index 7edc091..8d626df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,14 +8,18 @@ edition = "2024" axum = { version = "0.8.7", default-features = false, features = ["http1", "http2", "json", "macros", "tokio"] } clap = { version = "4.5.53", features = ["derive"] } feed-rs = { version = "2.3.1", features = ["sanitize"] } +html2md = "0.2.15" justerror = "1.1.0" reqwest = "0.12.24" +serde = { version = "1.0.228", features = ["derive"] } +serde_urlencoded = "0.7.1" sqlx = { version = "0.8.6", default-features = false, features = ["chrono", "derive", "macros", "migrate", "runtime-tokio", "sqlite", "tls-none"] } thiserror = "2.0.17" tokio = { version = "1.48.0", features = ["full"] } tokio-util = "0.7.17" tracing = "0.1.43" tracing-subscriber = { version = "0.3.22", features = ["env-filter"] } +unicode-segmentation = "1.12.0" [dev-dependencies] rand = "0.9.2" diff --git a/src/lib.rs b/src/lib.rs index 17841df..159334a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,7 @@ use std::time::Duration; use feed_rs::parser::parse; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; - -pub mod server; - +use reqwest::StatusCode; use sqlx::{ SqlitePool, sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions}, @@ -12,6 +9,10 @@ use sqlx::{ }; use tokio::task::{JoinHandle, JoinSet}; use tokio_util::{bytes::Buf, sync::CancellationToken}; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use unicode_segmentation::UnicodeSegmentation; + +pub mod server; const MAX_CONNS: u32 = 200; const MIN_CONNS: u32 = 5; @@ -37,6 +38,16 @@ pub struct FeedEntry { body: Option, } +#[derive(Debug, Default, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +struct ZulipMessage<'s> { + to: u32, + #[serde(rename = "type")] + typ: &'s str, + content: String, + #[serde(skip_serializing_if = "Option::is_none")] + topic: Option<&'s str>, +} + impl BlogdorTheAggregator { pub async fn new() -> Self { tracing_subscriber::registry() @@ -96,6 +107,15 @@ async fn check_feeds(db: &SqlitePool, client: &reqwest::Client) { } }; + let endpoint = std::env::var("ZULIP_URL").expect("ZULIP_URL must be set"); + let channel_id: u32 = std::env::var("ZULIP_CHANNEL") + .expect("ZULIP_CHANNEL must be set") + .parse() + .expect("ZULIP_CHANNEL must be an integer"); + + let email = std::env::var("BLOGDOR_EMAIL").expect("BLOGDOR_EMAIL must be set"); + let password = std::env::var("ZULIP_TOKEN").expect("ZULIP_TOKEN must be set"); + let mut handles = JoinSet::new(); for feed in feeds { handles.spawn(check_feed(db.clone(), feed.id, client.clone(), feed.url)); @@ -110,10 +130,61 @@ async fn check_feeds(db: &SqlitePool, client: &reqwest::Client) { Err(s) => { tracing::warn!("could not fetch feed: {s}") } - Ok(posts) => { - // send to zulip - for post in posts { - tracing::debug!("{post:?}"); + Ok(None) => {} + Ok(Some(posts)) => { + let FeedEntry { feed_id, .. } = posts.last().unwrap(); + let mut success = true; + for post in posts.iter() { + let body = post + .body + .iter() + .next() + .cloned() + .unwrap_or("Blogdor Says: NO BODY!".to_string()); + let content = format!( + "{body}\n\n---\noriginally posted to {}, on {}", + post.url, post.published + ); + let msg = ZulipMessage { + to: channel_id, + typ: "stream", + content, + topic: Some(&post.title), + }; + let msg = serde_urlencoded::to_string(msg).expect("serialize msg"); + dbg!(&msg); + + match client + .post(&endpoint) + .basic_auth(&email, Some(&password)) + .body(msg) + .header("Content-Type", "application/x-www-form-urlencoded") + .send() + .await + { + Err(e) => { + tracing::error!("got error sending to zulip: {e}"); + success = false; + } + Ok(r) => { + if r.status() == StatusCode::OK { + success &= true; + } else { + success = false; + } + } + } + tokio::time::sleep(Duration::from_millis(200)).await; + } + if success + && let Err(e) = sqlx::query!( + "insert into runs (feed, succeeded) values (?, true)", + feed_id + ) + .execute(db) + .await + { + tracing::error!("could not insert run for {feed_id}, got {e}"); } } } @@ -125,7 +196,7 @@ async fn check_feed( feed_id: i64, client: reqwest::Client, url: String, -) -> Result, String> { +) -> Result>, String> { let rec = sqlx::query!( "select date_time from runs where succeeded = true and feed = ? order by id desc limit 1", feed_id @@ -165,13 +236,25 @@ async fn check_feed( published: post.published.unwrap_or(now), received: now, feed_description: feed.description.to_owned().map(|d| d.content), - body: post.content.and_then(|c| c.body), + body: post.content.and_then(|c| { + c.body.map(|f| { + let s = html2md::parse_html(&f) + .graphemes(false) + .take(500) + .collect::(); + s.to_string() + }) + }), }; out.push(entry); } } - Ok(out) + if out.is_empty() { + Ok(None) + } else { + Ok(Some(out)) + } } async fn get_db_pool() -> SqlitePool {