From c1bea8284c1ede77547ba366507428f192a5c168 Mon Sep 17 00:00:00 2001 From: Joe Ardent Date: Sun, 4 Feb 2024 13:09:26 -0800 Subject: [PATCH] make import more robust and with fewer duplicates for stars. --- src/imdb_utils.rs | 46 ++++++++++++++++++++++++++------------------- src/import_utils.rs | 13 +++++++++++++ 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/imdb_utils.rs b/src/imdb_utils.rs index a959cff..8590146 100644 --- a/src/imdb_utils.rs +++ b/src/imdb_utils.rs @@ -82,7 +82,7 @@ pub async fn import_imdb_data(w2w_db: &SqlitePool, imdb: &SqlitePool, ids: &mut .await .unwrap(); - for batch in iwatches.chunks(2_000) { + for batch in iwatches.chunks(5_000) { let mut tx = w2w_db.acquire().await.unwrap(); let mut tx = tx.begin().await.unwrap(); for iwatch in batch { @@ -114,24 +114,32 @@ async fn add_imdb_stars( for row in principals { let (name_id, cat) = row; - let name_query = - "select nconst, primaryName, birthYear, deathYear from names where nconst = ?"; - let istar: Option = sqlx::query_as(name_query) - .bind(&name_id) - .fetch_optional(imdb) - .await - .unwrap(); - if let Some(star) = istar { - let star = (&star).into(); - let star_id = insert_star(&star, w2w_db).await; - ids.insert(name_id, star_id); - let credit = Credit { - star: star_id, - watch, - credit: Some(cat.to_string()), - }; - insert_credit(&credit, w2w_db).await; - } + let star = if let Some(id) = ids.get(&name_id) { + *id + } else { + let name_query = + "select nconst, primaryName, birthYear, deathYear from names where nconst = ?"; + let istar: Option = sqlx::query_as(name_query) + .bind(&name_id) + .fetch_optional(imdb) + .await + .unwrap(); + if let Some(star) = istar { + let star = (&star).into(); + let star_id = insert_star(&star, w2w_db).await; + ids.insert(name_id, star_id); + star_id + } else { + continue; + } + }; + + let credit = Credit { + star, + watch, + credit: Some(cat.to_string()), + }; + insert_credit(&credit, w2w_db).await; } } diff --git a/src/import_utils.rs b/src/import_utils.rs index c214631..920fc7d 100644 --- a/src/import_utils.rs +++ b/src/import_utils.rs @@ -47,6 +47,19 @@ pub async fn insert_credit(credit: &Credit, db: &mut SqliteConnection) { .bind(credit.credit.as_deref()) .execute(db) .await + .map(|_| ()) + .or_else(|e| match e { + sqlx::Error::Database(ref db) => { + let exit = db.code().unwrap_or_default().parse().unwrap_or(0u32); + // https://www.sqlite.org/rescode.html codes for unique constraint violations: + if exit == 2067 || exit == 1555 { + Ok(()) + } else { + Err(e) + } + } + _ => Err(e), + }) .unwrap(); }