make import more robust and with fewer duplicates for stars.

This commit is contained in:
Joe Ardent 2024-02-04 13:09:26 -08:00
parent a46a2e8847
commit c1bea8284c
2 changed files with 40 additions and 19 deletions

View file

@ -82,7 +82,7 @@ pub async fn import_imdb_data(w2w_db: &SqlitePool, imdb: &SqlitePool, ids: &mut
.await
.unwrap();
for batch in iwatches.chunks(2_000) {
for batch in iwatches.chunks(5_000) {
let mut tx = w2w_db.acquire().await.unwrap();
let mut tx = tx.begin().await.unwrap();
for iwatch in batch {
@ -114,24 +114,32 @@ async fn add_imdb_stars(
for row in principals {
let (name_id, cat) = row;
let name_query =
"select nconst, primaryName, birthYear, deathYear from names where nconst = ?";
let istar: Option<ImdbStar> = sqlx::query_as(name_query)
.bind(&name_id)
.fetch_optional(imdb)
.await
.unwrap();
if let Some(star) = istar {
let star = (&star).into();
let star_id = insert_star(&star, w2w_db).await;
ids.insert(name_id, star_id);
let credit = Credit {
star: star_id,
watch,
credit: Some(cat.to_string()),
};
insert_credit(&credit, w2w_db).await;
}
let star = if let Some(id) = ids.get(&name_id) {
*id
} else {
let name_query =
"select nconst, primaryName, birthYear, deathYear from names where nconst = ?";
let istar: Option<ImdbStar> = sqlx::query_as(name_query)
.bind(&name_id)
.fetch_optional(imdb)
.await
.unwrap();
if let Some(star) = istar {
let star = (&star).into();
let star_id = insert_star(&star, w2w_db).await;
ids.insert(name_id, star_id);
star_id
} else {
continue;
}
};
let credit = Credit {
star,
watch,
credit: Some(cat.to_string()),
};
insert_credit(&credit, w2w_db).await;
}
}

View file

@ -47,6 +47,19 @@ pub async fn insert_credit(credit: &Credit, db: &mut SqliteConnection) {
.bind(credit.credit.as_deref())
.execute(db)
.await
.map(|_| ())
.or_else(|e| match e {
sqlx::Error::Database(ref db) => {
let exit = db.code().unwrap_or_default().parse().unwrap_or(0u32);
// https://www.sqlite.org/rescode.html codes for unique constraint violations:
if exit == 2067 || exit == 1555 {
Ok(())
} else {
Err(e)
}
}
_ => Err(e),
})
.unwrap();
}