mirror of
https://github.com/alexgo-io/stacks-puppet-node.git
synced 2026-01-12 22:43:42 +08:00
Merge pull request #3122 from stacks-network/fix/vacuum-on-migrate
fix: incrementally vacuum the MARF DB when migrating
This commit is contained in:
21
CHANGELOG.md
21
CHANGELOG.md
@@ -7,8 +7,23 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
|
||||
|
||||
## [2.05.0.2.0]
|
||||
|
||||
### IMPORTANT! READ THIS FIRST
|
||||
|
||||
Please read the following **WARNINGs** in their entirety before upgrading.
|
||||
|
||||
WARNING: Please be aware that using this node on chainstate prior to this release will cause
|
||||
the node to spend up to 30 minutes migrating the data to a new schema.
|
||||
the node to spend **up to 30 minutes** migrating the data to a new schema.
|
||||
Depending on the storage medium, this may take even longer.
|
||||
|
||||
WARNING: This migration process cannot be interrupted. If it is, the chainstate
|
||||
will be **irrecovarably corrupted and require a sync from genesis.**
|
||||
|
||||
WARNING: You will need **at least 2x the disk space** for the migration to work.
|
||||
This is because a copy of the chainstate will be made in the same directory in
|
||||
order to apply the new schema.
|
||||
|
||||
It is highly recommended that you **back up your chainstate** before running
|
||||
this version of the software on it.
|
||||
|
||||
### Changed
|
||||
- The MARF implementation will now defer calculating the root hash of a new trie
|
||||
@@ -22,9 +37,7 @@ the node to spend up to 30 minutes migrating the data to a new schema.
|
||||
- The MARF implementation may now cache trie nodes in RAM if directed to do so
|
||||
by an environment variable (#3042).
|
||||
- Sortition processing performance has been improved by about an order of
|
||||
magnitude, by avoiding a slew of expensive database reads (#3045). WARNING:
|
||||
applying this change to an existing chainstate directory will take a few
|
||||
minutes when the node starts up.
|
||||
magnitude, by avoiding a slew of expensive database reads (#3045).
|
||||
- Updated chains coordinator so that before a Stacks block or a burn block is processed,
|
||||
an event is sent through the event dispatcher. This fixes #3015.
|
||||
- Expose a node's public key and public key hash160 (i.e. what appears in
|
||||
|
||||
@@ -54,6 +54,7 @@ use crate::chainstate::stacks::index::TrieLeaf;
|
||||
use crate::chainstate::stacks::index::{trie_sql, ClarityMarfTrieId, MarfTrieId};
|
||||
|
||||
use crate::util_lib::db::sql_pragma;
|
||||
use crate::util_lib::db::sql_vacuum;
|
||||
use crate::util_lib::db::sqlite_open;
|
||||
use crate::util_lib::db::tx_begin_immediate;
|
||||
use crate::util_lib::db::tx_busy_handler;
|
||||
@@ -191,14 +192,92 @@ impl TrieFile {
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Copy the trie blobs out of a sqlite3 DB into their own file
|
||||
pub fn export_trie_blobs<T: MarfTrieId>(&mut self, db: &Connection) -> Result<(), Error> {
|
||||
/// Vacuum the database and report the size before and after.
|
||||
///
|
||||
/// Returns database errors. Filesystem errors from reporting the file size change are masked.
|
||||
fn inner_post_migrate_vacuum(db: &Connection, db_path: &str) -> Result<(), Error> {
|
||||
// for fun, report the shrinkage
|
||||
let size_before_opt = fs::metadata(db_path)
|
||||
.map(|stat| Some(stat.len()))
|
||||
.unwrap_or(None);
|
||||
|
||||
info!("Preemptively vacuuming the database file to free up space after copying trie blobs to a separate file");
|
||||
sql_vacuum(db)?;
|
||||
|
||||
let size_after_opt = fs::metadata(db_path)
|
||||
.map(|stat| Some(stat.len()))
|
||||
.unwrap_or(None);
|
||||
|
||||
match (size_before_opt, size_after_opt) {
|
||||
(Some(sz_before), Some(sz_after)) => {
|
||||
debug!("Shrank DB from {} to {} bytes", sz_before, sz_after);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Vacuum the database, and set up and tear down the necessary environment variables to
|
||||
/// use same parent directory for scratch space.
|
||||
///
|
||||
/// Infallible -- any vacuum errors are masked.
|
||||
fn post_migrate_vacuum(db: &Connection, db_path: &str) {
|
||||
// set SQLITE_TMPDIR if it isn't set already
|
||||
let mut set_sqlite_tmpdir = false;
|
||||
let mut old_tmpdir_opt = None;
|
||||
if let Some(parent_path) = Path::new(db_path).parent() {
|
||||
if let Err(_) = env::var("SQLITE_TMPDIR") {
|
||||
debug!(
|
||||
"Sqlite will store temporary migration state in '{}'",
|
||||
parent_path.display()
|
||||
);
|
||||
env::set_var("SQLITE_TMPDIR", parent_path);
|
||||
set_sqlite_tmpdir = true;
|
||||
}
|
||||
|
||||
// also set TMPDIR
|
||||
old_tmpdir_opt = env::var("TMPDIR").ok();
|
||||
env::set_var("TMPDIR", parent_path);
|
||||
}
|
||||
|
||||
// don't materialize the error; just warn
|
||||
let res = TrieFile::inner_post_migrate_vacuum(db, db_path);
|
||||
if let Err(e) = res {
|
||||
warn!("Failed to VACUUM the MARF DB post-migration: {:?}", &e);
|
||||
}
|
||||
|
||||
if set_sqlite_tmpdir {
|
||||
debug!("Unset SQLITE_TMPDIR");
|
||||
env::remove_var("SQLITE_TMPDIR");
|
||||
}
|
||||
if let Some(old_tmpdir) = old_tmpdir_opt {
|
||||
debug!("Restore TMPDIR to '{}'", &old_tmpdir);
|
||||
env::set_var("TMPDIR", old_tmpdir);
|
||||
} else {
|
||||
debug!("Unset TMPDIR");
|
||||
env::remove_var("TMPDIR");
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy the trie blobs out of a sqlite3 DB into their own file.
|
||||
/// NOTE: this is *not* thread-safe. Do not call while the DB is being used by another thread.
|
||||
pub fn export_trie_blobs<T: MarfTrieId>(
|
||||
&mut self,
|
||||
db: &Connection,
|
||||
db_path: &str,
|
||||
) -> Result<(), Error> {
|
||||
if trie_sql::detect_partial_migration(db)? {
|
||||
panic!("PARTIAL MIGRATION DETECTED! This is an irrecoverable error. You will need to restart your node from genesis.");
|
||||
}
|
||||
|
||||
let max_block = trie_sql::count_blocks(db)?;
|
||||
info!(
|
||||
"Migrate {} blocks to external blob storage at {}",
|
||||
max_block,
|
||||
&self.get_path()
|
||||
);
|
||||
|
||||
for block_id in 0..(max_block + 1) {
|
||||
match trie_sql::is_unconfirmed_block(db, block_id) {
|
||||
Ok(true) => {
|
||||
@@ -249,6 +328,11 @@ impl TrieFile {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TrieFile::post_migrate_vacuum(db, db_path);
|
||||
|
||||
debug!("Mark MARF trie migration of '{}' as finished", db_path);
|
||||
trie_sql::set_migrated(db).expect("FATAL: failed to mark DB as migrated");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1439,10 +1439,13 @@ impl<T: MarfTrieId> TrieFileStorage<T> {
|
||||
if let Some(blobs) = blobs.as_mut() {
|
||||
if TrieFile::exists(&db_path)? {
|
||||
// migrate blobs out of the old DB
|
||||
blobs.export_trie_blobs::<T>(&db)?;
|
||||
blobs.export_trie_blobs::<T>(&db, &db_path)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
if trie_sql::detect_partial_migration(&db)? {
|
||||
panic!("PARTIAL MIGRATION DETECTED! This is an irrecoverable error. You will need to restart your node from genesis.");
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Opened TrieFileStorage {}; external blobs: {}",
|
||||
|
||||
@@ -52,7 +52,9 @@ use crate::chainstate::stacks::index::node::{
|
||||
use crate::chainstate::stacks::index::storage::{TrieFileStorage, TrieStorageConnection};
|
||||
use crate::chainstate::stacks::index::Error;
|
||||
use crate::chainstate::stacks::index::{trie_sql, BlockMap, MarfTrieId};
|
||||
use crate::util_lib::db::query_count;
|
||||
use crate::util_lib::db::query_row;
|
||||
use crate::util_lib::db::query_rows;
|
||||
use crate::util_lib::db::sql_pragma;
|
||||
use crate::util_lib::db::tx_begin_immediate;
|
||||
use crate::util_lib::db::u64_to_sql;
|
||||
@@ -96,11 +98,15 @@ static SQL_MARF_DATA_TABLE_SCHEMA_2: &str = "
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
version INTEGER DEFAULT 1 NOT NULL
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS migrated_version (
|
||||
version INTEGER DEFAULT 1 NOT NULL
|
||||
);
|
||||
ALTER TABLE marf_data ADD COLUMN external_offset INTEGER DEFAULT 0 NOT NULL;
|
||||
ALTER TABLE marf_data ADD COLUMN external_length INTEGER DEFAULT 0 NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS index_external_offset ON marf_data(external_offset);
|
||||
|
||||
INSERT OR REPLACE INTO schema_version (version) VALUES (2);
|
||||
INSERT OR REPLACE INTO migrated_version (version) VALUES (1);
|
||||
";
|
||||
|
||||
pub static SQL_MARF_SCHEMA_VERSION: u64 = 2;
|
||||
@@ -127,6 +133,19 @@ fn get_schema_version(conn: &Connection) -> u64 {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the last schema version before the last attempted migration
|
||||
fn get_migrated_version(conn: &Connection) -> u64 {
|
||||
// if the table doesn't exist, then the version is 1.
|
||||
let sql = "SELECT version FROM migrated_version";
|
||||
match conn.query_row(sql, NO_PARAMS, |row| row.get::<_, i64>("version")) {
|
||||
Ok(x) => x as u64,
|
||||
Err(e) => {
|
||||
debug!("Failed to get schema version: {:?}", &e);
|
||||
1u64
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Migrate the MARF database to the currently-supported schema.
|
||||
/// Returns the version of the DB prior to the migration.
|
||||
pub fn migrate_tables_if_needed<T: MarfTrieId>(conn: &mut Connection) -> Result<u64, Error> {
|
||||
@@ -157,6 +176,14 @@ pub fn migrate_tables_if_needed<T: MarfTrieId>(conn: &mut Connection) -> Result<
|
||||
}
|
||||
}
|
||||
}
|
||||
if first_version == SQL_MARF_SCHEMA_VERSION
|
||||
&& get_migrated_version(conn) != SQL_MARF_SCHEMA_VERSION
|
||||
&& !trie_sql::detect_partial_migration(conn)?
|
||||
{
|
||||
// no migration will need to happen, so stop checking
|
||||
debug!("Marking MARF data as fully-migrated");
|
||||
set_migrated(conn)?;
|
||||
}
|
||||
Ok(first_version)
|
||||
}
|
||||
|
||||
@@ -530,6 +557,39 @@ pub fn get_external_blobs_length(conn: &Connection) -> Result<u64, Error> {
|
||||
Ok(max_len)
|
||||
}
|
||||
|
||||
/// Do we have a partially-migrated database?
|
||||
/// Either all tries have offset and length 0, or they all don't. If we have a mixture, then we're
|
||||
/// corrupted.
|
||||
pub fn detect_partial_migration(conn: &Connection) -> Result<bool, Error> {
|
||||
let migrated_version = get_migrated_version(conn);
|
||||
let schema_version = get_schema_version(conn);
|
||||
if migrated_version == schema_version {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let num_migrated = query_count(
|
||||
conn,
|
||||
"SELECT COUNT(*) FROM marf_data WHERE external_offset = 0 AND external_length = 0 AND unconfirmed = 0",
|
||||
NO_PARAMS,
|
||||
)?;
|
||||
let num_not_migrated = query_count(
|
||||
conn,
|
||||
"SELECT COUNT(*) FROM marf_data WHERE external_offset != 0 AND external_length != 0 AND unconfirmed = 0",
|
||||
NO_PARAMS,
|
||||
)?;
|
||||
Ok(num_migrated > 0 && num_not_migrated > 0)
|
||||
}
|
||||
|
||||
/// Mark a migration as completed
|
||||
pub fn set_migrated(conn: &Connection) -> Result<(), Error> {
|
||||
conn.execute(
|
||||
"UPDATE migrated_version SET version = ?1",
|
||||
&[&u64_to_sql(SQL_MARF_SCHEMA_VERSION)?],
|
||||
)
|
||||
.map_err(|e| e.into())
|
||||
.and_then(|_| Ok(()))
|
||||
}
|
||||
|
||||
pub fn get_node_hash_bytes(
|
||||
conn: &Connection,
|
||||
block_id: u32,
|
||||
|
||||
@@ -523,6 +523,13 @@ fn inner_sql_pragma(
|
||||
conn.pragma_update(None, pragma_name, pragma_value)
|
||||
}
|
||||
|
||||
/// Run a VACUUM command
|
||||
pub fn sql_vacuum(conn: &Connection) -> Result<(), Error> {
|
||||
conn.execute("VACUUM", NO_PARAMS)
|
||||
.map_err(Error::SqliteError)
|
||||
.and_then(|_| Ok(()))
|
||||
}
|
||||
|
||||
/// Returns true if the database table `table_name` exists in the active
|
||||
/// database of the provided SQLite connection.
|
||||
pub fn table_exists(conn: &Connection, table_name: &str) -> Result<bool, sqlite_error> {
|
||||
|
||||
Reference in New Issue
Block a user