pique/_experiments/2024-03-02-database-benchmark/benches/db.rs
Nicole Tietz-Sokolskaya 77d4ebb371 Add ADR for our choice of SQLite as our primary database,
backed up by experiments demonstrating that SQLite will meet all of our
requirements.

This also introduces ADRs in the repo, and adds a README in preparation
making the repository public.
2024-03-16 11:12:46 -04:00

133 lines
4.3 KiB
Rust

use bench::data::random_entities;
use criterion::async_executor::{AsyncExecutor, FuturesExecutor};
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use rand::distributions::{Distribution, Uniform};
use std::sync::Arc;
use std::time::Duration;
use entity::prelude::*;
use migration::Migrator;
use migration::MigratorTrait;
use sea_orm::ConnectOptions;
use sea_orm::Database;
use sea_orm::{prelude::*, Condition};
async fn load_row(db: &DatabaseConnection, count: &i32) {
let mut rng = rand::thread_rng();
let ids: Vec<i32> = Uniform::new(0, *count)
.sample_iter(&mut rng)
.take(5)
.collect();
let _ = Page::find()
.filter(Condition::all().add(entity::page::Column::Id.is_in(ids)))
.all(db)
.await
.unwrap();
//let _ = Page::find_by_id(id).one(db).await.unwrap().unwrap();
}
async fn setup_db(
db_url: &str,
dsize: usize,
dcount: usize,
) -> anyhow::Result<Arc<DatabaseConnection>> {
let mut opts = ConnectOptions::new(db_url);
opts.connect_timeout(Duration::from_secs(2));
opts.max_connections(50);
let db = Database::connect(opts).await?;
Migrator::reset(&db).await?;
Migrator::refresh(&db).await?;
match db.get_database_backend() {
sea_orm::DatabaseBackend::MySql => {
let _ = db
.execute(sea_orm::Statement::from_string(
db.get_database_backend(),
"SET GLOBAL max_allowed_packet=1073741824;",
))
.await?;
}
sea_orm::DatabaseBackend::Postgres => {}
sea_orm::DatabaseBackend::Sqlite => {}
};
let max_per_chunk = 32 * MB;
let num_chunks = (dsize * dcount) / max_per_chunk;
let pages_per_chunk = std::cmp::min(dcount / num_chunks, 5000);
let pages = random_entities(dcount, dsize);
for chunk in pages.chunks(pages_per_chunk) {
let _ = Page::insert_many(chunk.to_vec()).exec(&db).await?;
}
Ok(Arc::new(db))
}
const SQLITE_URL: &str = "sqlite:./database.db?mode=rwc";
const POSTGRES_URL: &str = "postgresql://postgres:password@localhost/postgres";
static KB: usize = 1024;
static MB: usize = 1024 * KB;
static GB: usize = 1024 * MB;
fn load_from_sqlite(c: &mut Criterion) {
let mut group = c.benchmark_group("sqlite");
//for document_size in [KB, 8 * KB, 64 * KB, 512 * KB, 4 * MB, 32 * MB] {
for document_size in [8 * KB, 64 * KB] {
let document_count = 3 * GB / document_size;
println!(
"attempting {} documents of size {}",
document_count, document_size
);
let db = FuturesExecutor
.block_on(setup_db(SQLITE_URL, document_size, document_count))
.unwrap();
println!("db setup, about to abuse it");
FuturesExecutor.block_on(async {
let res = db.execute_unprepared("PRAGMA hard_heap_limit = 1073741824").await.unwrap();
println!("{:?}", res);
});
group.throughput(Throughput::Bytes(document_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(document_size),
&(db, document_size, document_count as i32),
|b, (db, _size, count)| {
b.to_async(FuturesExecutor).iter(|| async {
load_row(&db, count).await;
});
},
);
}
group.finish();
}
fn load_from_postgres(c: &mut Criterion) {
let mut group = c.benchmark_group("postres");
//for document_size in [KB, 8 * KB, 64 * KB, 512 * KB, 4 * MB, 32 * MB] {
for document_size in [8 * KB, 64 * KB] {
let document_count = 3 * GB / document_size;
let db = FuturesExecutor
.block_on(setup_db(POSTGRES_URL, document_size, document_count))
.unwrap();
group.throughput(Throughput::Bytes(document_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(document_size),
&(db, document_size, document_count as i32),
|b, (db, _size, count)| {
b.to_async(FuturesExecutor).iter(|| async {
load_row(db, count).await;
});
},
);
}
group.finish();
}
criterion_group!(benches, load_from_postgres, load_from_sqlite,);
criterion_main!(benches);