From 8fb44491f9a0e67203550b70dc1643562bc0145a Mon Sep 17 00:00:00 2001 From: Qinmao Zhang Date: Fri, 9 Nov 2018 16:31:45 -0800 Subject: [PATCH] Fredzqm/long delete cli (#977) * make database-remove.js do recursively chunked delets * make concurrent delete requests * fixed exceptions, add pending and in progress to status * limit max concurrency * add print=silent * refactor into database/remove.js * add logging for prefetching * use waiting map * clean up map and handle known bug * use queue * remove feature flag and hardcode concurrency and retry * revert to use utils.reject * remove TODO for bug, since it is fixed * move Helper class into DatabaseRemove file * add tests for Remote * rebase and fix import * convert to remove.spec.ts and remove.ts * handle comments * extract an interface for RemoveRemote --- package.json | 2 + src/commands/database-remove.js | 34 ++--- src/database/remove.ts | 99 +++++++++++++ src/database/removeRemote.ts | 154 ++++++++++++++++++++ src/test/database/remove.spec.ts | 191 +++++++++++++++++++++++++ src/test/database/removeRemote.spec.ts | 74 ++++++++++ 6 files changed, 528 insertions(+), 26 deletions(-) create mode 100644 src/database/remove.ts create mode 100644 src/database/removeRemote.ts create mode 100644 src/test/database/remove.spec.ts create mode 100644 src/test/database/removeRemote.spec.ts diff --git a/package.json b/package.json index 3cd84699..dff75e58 100644 --- a/package.json +++ b/package.json @@ -108,7 +108,9 @@ "@types/glob": "^7.1.1", "@types/lodash": "^4.14.118", "@types/mocha": "^5.2.5", + "@types/nock": "^9.3.0", "@types/node": "^10.12.0", + "@types/request": "^2.48.1", "@types/sinon": "^5.0.5", "chai": "^4.2.0", "chai-as-promised": "^7.1.1", diff --git a/src/commands/database-remove.js b/src/commands/database-remove.js index dd788e99..6f5a3dad 100644 --- a/src/commands/database-remove.js +++ b/src/commands/database-remove.js @@ -3,10 +3,8 @@ var Command = require("../command"); var requireInstance = require("../requireInstance"); var requirePermissions = require("../requirePermissions"); -var request = require("request"); +var DatabaseRemove = require("../database/remove").default; var api = require("../api"); -var responseToError = require("../responseToError"); -var FirebaseError = require("../error"); var utils = require("../utils"); var prompt = require("../prompt"); @@ -41,29 +39,13 @@ module.exports = new Command("database:remove ") if (!options.confirm) { return utils.reject("Command aborted.", { exit: 1 }); } - var url = utils.addSubdomain(api.realtimeOrigin, options.instance) + path + ".json?"; - var reqOptions = { - url: url, - json: true, - }; - - return api.addRequestHeaders(reqOptions).then(function(reqOptionsWithToken) { - return new Promise(function(resolve, reject) { - request.del(reqOptionsWithToken, function(err, res, body) { - if (err) { - return reject( - new FirebaseError("Unexpected error while removing data", { - exit: 2, - }) - ); - } else if (res.statusCode >= 400) { - return reject(responseToError(res, body)); - } - - utils.logSuccess("Data removed successfully"); - return resolve(); - }); - }); + var removeOps = new DatabaseRemove(path, { + concurrency: 20, + retries: 5, + instance: options.instance, + }); + return removeOps.execute().then(function() { + utils.logSuccess("Data removed successfully"); }); }); }); diff --git a/src/database/remove.ts b/src/database/remove.ts new file mode 100644 index 00000000..fa37e1b2 --- /dev/null +++ b/src/database/remove.ts @@ -0,0 +1,99 @@ +import * as pathLib from "path"; +import * as FirebaseError from "../error"; +import * as logger from "../logger"; + +import { NodeSize, RemoveRemote, RTDBRemoveRemote } from "./removeRemote"; +import { Queue } from "../queue"; + +export interface DatabaseRemoveOptions { + // RTBD instance ID. + instance: string; + // Number of concurrent chunk deletes to allow. + concurrency: number; + // Number of retries for each chunk delete. + retries: number; +} + +export default class DatabaseRemove { + public path: string; + public concurrency: number; + public retries: number; + public remote: RemoveRemote; + private jobQueue: Queue; + private waitingPath: Map; + + /** + * Construct a new RTDB delete operation. + * + * @constructor + * @param path path to delete. + * @param options + */ + constructor(path: string, options: DatabaseRemoveOptions) { + this.path = path; + this.concurrency = options.concurrency; + this.retries = options.retries; + this.remote = new RTDBRemoveRemote(options.instance); + this.waitingPath = new Map(); + this.jobQueue = new Queue({ + name: "long delete queue", + concurrency: this.concurrency, + handler: this.chunkedDelete.bind(this), + retries: this.retries, + }); + } + + public execute(): Promise { + const prom: Promise = this.jobQueue.wait(); + this.jobQueue.add(this.path); + return prom; + } + + private chunkedDelete(path: string): Promise { + return this.remote + .prefetchTest(path) + .then((test: NodeSize) => { + switch (test) { + case NodeSize.SMALL: + return this.remote.deletePath(path); + case NodeSize.LARGE: + return this.remote.listPath(path).then((pathList: string[]) => { + if (pathList) { + for (const p of pathList) { + this.jobQueue.add(pathLib.join(path, p)); + } + this.waitingPath.set(path, pathList.length); + } + return false; + }); + case NodeSize.EMPTY: + return true; + default: + throw new FirebaseError("Unexpected prefetch test result: " + test, { exit: 3 }); + } + }) + .then((deleted: boolean) => { + if (!deleted) { + return; + } + if (path === this.path) { + this.jobQueue.close(); + logger.debug("[database][long delete queue][FINAL]", this.jobQueue.stats()); + } else { + const parentPath = pathLib.dirname(path); + const prevParentPathReference = this.waitingPath.get(parentPath); + if (!prevParentPathReference) { + throw new FirebaseError( + `Unexpected error: parent path reference is zero for path=${path}`, + { exit: 3 } + ); + } + this.waitingPath.set(parentPath, prevParentPathReference - 1); + if (this.waitingPath.get(parentPath) === 0) { + this.jobQueue.add(parentPath); + this.waitingPath.delete(parentPath); + } + } + }); + } +} diff --git a/src/database/removeRemote.ts b/src/database/removeRemote.ts new file mode 100644 index 00000000..2455d0a7 --- /dev/null +++ b/src/database/removeRemote.ts @@ -0,0 +1,154 @@ +import { Response } from "request"; +import * as request from "request"; +import * as responseToError from "../responseToError"; +import * as utils from "../utils"; +import * as FirebaseError from "../error"; +import * as logger from "../logger"; +import * as api from "../api"; + +export enum NodeSize { + SMALL = "small", + LARGE = "large", + EMPTY = "empty", +} + +export interface RemoveRemote { + /** + * + * @param {string} path + * @return {Promise} true if the deletion is sucessful. + */ + deletePath(path: string): Promise; + + /** + * + * Run a prefetch test on a path before issuing a delete to detect + * large subtrees and issue recursive chunked deletes instead. + * + * @param {string} path + * @return {Promise}j + */ + prefetchTest(path: string): Promise; + + /** + * + * @param {string} path + * @return {Promise} the list of sub pathes found. + */ + listPath(path: string): Promise; +} + +export class RTDBRemoveRemote implements RemoveRemote { + private instance: string; + + constructor(instance: string) { + this.instance = instance; + } + + public deletePath(path: string): Promise { + return new Promise((resolve, reject) => { + const url = + utils.addSubdomain(api.realtimeOrigin, this.instance) + path + ".json?print=silent"; + const reqOptions = { + url, + json: true, + }; + return api.addRequestHeaders(reqOptions).then((reqOptionsWithToken) => { + request.del(reqOptionsWithToken, (err: Error, res: Response, body: any) => { + if (err) { + return reject( + new FirebaseError(`Unexpected error while removing data at ${path}`, { + exit: 2, + original: err, + }) + ); + } else if (res.statusCode >= 400) { + return reject(responseToError(res, body)); + } + logger.debug(`[database] Sucessfully removed data at ${path}`); + return resolve(true); + }); + }); + }); + } + + public prefetchTest(path: string): Promise { + const url = + utils.addSubdomain(api.realtimeOrigin, this.instance) + path + ".json?timeout=100ms"; + const reqOptions = { + url, + }; + return api.addRequestHeaders(reqOptions).then((reqOptionsWithToken) => { + return new Promise((resolve, reject) => { + logger.debug(`[database] Prefetching test at ${path}`); + request.get(reqOptionsWithToken, (err: Error, res: Response, body: any) => { + if (err) { + return reject( + new FirebaseError(`Unexpected error while prefetching data to delete ${path}`, { + exit: 2, + }) + ); + } + switch (res.statusCode) { + case 200: + if (body) { + return resolve(NodeSize.SMALL); + } else { + return resolve(NodeSize.EMPTY); + } + case 400: + // timeout. large subtree, recursive delete for each subtree + return resolve(NodeSize.LARGE); + case 413: + // payload too large. large subtree, recursive delete for each subtree + return resolve(NodeSize.LARGE); + default: + return reject(responseToError(res, body)); + } + }); + }); + }); + } + + public listPath(path: string): Promise { + const url = + utils.addSubdomain(api.realtimeOrigin, this.instance) + + path + + ".json?shallow=true&limitToFirst=50000"; + const reqOptions = { + url, + }; + return api.addRequestHeaders(reqOptions).then((reqOptionsWithToken) => { + return new Promise((resolve, reject) => { + request.get(reqOptionsWithToken, (err: Error, res: Response, body: any) => { + if (err) { + return reject( + new FirebaseError("Unexpected error while listing subtrees", { + exit: 2, + original: err, + }) + ); + } else if (res.statusCode >= 400) { + return reject(responseToError(res, body)); + } + let data = {}; + try { + data = JSON.parse(body); + } catch (e) { + return reject( + new FirebaseError("Malformed JSON response in shallow get ", { + exit: 2, + original: e, + }) + ); + } + if (data) { + const keyList = Object.keys(data); + return resolve(keyList); + } + resolve([]); + }); + }); + }); + } +} diff --git a/src/test/database/remove.spec.ts b/src/test/database/remove.spec.ts new file mode 100644 index 00000000..971043e3 --- /dev/null +++ b/src/test/database/remove.spec.ts @@ -0,0 +1,191 @@ +import { expect } from "chai"; +import * as pathLib from "path"; + +import DatabaseRemove from "../../database/remove"; +import { NodeSize, RemoveRemote } from "../../database/removeRemote"; + +class TestRemoveRemote implements RemoveRemote { + public data: any; + + constructor(data: any) { + this.data = data; + } + + public deletePath(path: string): Promise { + if (path === "/") { + this.data = null; + return Promise.resolve(true); + } + const parentDir = pathLib.dirname(path); + const basename = pathLib.basename(path); + delete this._dataAtpath(parentDir)[basename]; + if (Object.keys(this._dataAtpath(parentDir)).length === 0) { + return this.deletePath(parentDir); + } + return Promise.resolve(true); + } + + public prefetchTest(path: string): Promise { + const d = this._dataAtpath(path); + if (!d) { + return Promise.resolve(NodeSize.EMPTY); + } + if ("string" === typeof d) { + return Promise.resolve(NodeSize.SMALL); + } else if (Object.keys(d).length === 0) { + return Promise.resolve(NodeSize.EMPTY); + } else { + return Promise.resolve(NodeSize.LARGE); + } + } + + public listPath(path: string): Promise { + const d = this._dataAtpath(path); + if (d) { + return Promise.resolve(Object.keys(d)); + } + return Promise.resolve([]); + } + + private _dataAtpath(path: string): any { + const splitedPath = path.slice(1).split("/"); + let d = this.data; + for (const p of splitedPath) { + if (d && p !== "") { + if (typeof d === "string") { + d = null; + } else { + d = d[p]; + } + } + } + return d; + } +} + +describe("TestRemoveRemote", () => { + const fakeDb = new TestRemoveRemote({ + a: { + b: "1", + c: "2", + }, + d: { + e: "3", + }, + f: null, + }); + + it("listPath should work", () => { + return expect(fakeDb.listPath("/")).to.eventually.eql(["a", "d", "f"]); + }); + + it("prefetchTest should return empty", () => { + return expect(fakeDb.prefetchTest("/f")).to.eventually.eql(NodeSize.EMPTY); + }); + + it("prefetchTest should return large", () => { + return expect(fakeDb.prefetchTest("/")).to.eventually.eql(NodeSize.LARGE); + }); + + it("prefetchTest should return small", () => { + return expect(fakeDb.prefetchTest("/d/e")).to.eventually.eql(NodeSize.SMALL); + }); + + it("deletePath should work", () => { + return fakeDb.deletePath("/a/b").then(() => { + return expect(fakeDb.listPath("/a")).to.eventually.eql(["c"]); + }); + }); +}); + +describe("DatabaseRemove", () => { + it("DatabaseRemove should remove fakeDb at / 1", () => { + const fakeDb = new TestRemoveRemote({ + c: "2", + }); + const removeOps = new DatabaseRemove("/", { + instance: "test-remover", + concurrency: 200, + retries: 5, + }); + removeOps.remote = fakeDb; + return removeOps.execute().then(() => { + expect(fakeDb.data).to.eql(null); + }); + }); + + it("DatabaseRemove should remove fakeDb at / 2", () => { + const fakeDb = new TestRemoveRemote({ + a: { + b: { x: { y: "1" } }, + c: "2", + }, + d: { + e: "3", + }, + }); + const removeOps = new DatabaseRemove("/", { + instance: "test-remover", + concurrency: 200, + retries: 5, + }); + removeOps.remote = fakeDb; + return removeOps.execute().then(() => { + expect(fakeDb.data).to.eql(null); + }); + }); + + it("DatabaseRemove should remove fakeDb at /a/b", () => { + const fakeDb = new TestRemoveRemote({ + a: { + b: { x: { y: "1" } }, + c: "2", + }, + d: { + e: "3", + }, + }); + + const removeOps = new DatabaseRemove("/a/b", { + instance: "test-remover", + concurrency: 200, + retries: 5, + }); + removeOps.remote = fakeDb; + return removeOps.execute().then(() => { + expect(fakeDb.data).to.eql({ + a: { + c: "2", + }, + d: { + e: "3", + }, + }); + }); + }); + + it("DatabaseRemove should remove fakeDb at /a", () => { + const fakeDb = new TestRemoveRemote({ + a: { + b: { x: { y: "1" } }, + c: "2", + }, + d: { + e: "3", + }, + }); + const removeOps = new DatabaseRemove("/a", { + instance: "test-remover", + concurrency: 200, + retries: 5, + }); + removeOps.remote = fakeDb; + return removeOps.execute().then(() => { + expect(fakeDb.data).to.eql({ + d: { + e: "3", + }, + }); + }); + }); +}); diff --git a/src/test/database/removeRemote.spec.ts b/src/test/database/removeRemote.spec.ts new file mode 100644 index 00000000..967e4ce1 --- /dev/null +++ b/src/test/database/removeRemote.spec.ts @@ -0,0 +1,74 @@ +import { expect } from "chai"; +import * as sinon from "sinon"; +import * as nock from "nock"; +import * as utils from "../../utils"; +import * as api from "../../api"; + +import * as helpers from "../helpers"; +import { NodeSize, RTDBRemoveRemote } from "../../database/removeRemote"; + +describe("RemoveRemote", () => { + const instance = "fake-db"; + const remote = new RTDBRemoveRemote(instance); + const serverUrl = utils.addSubdomain(api.realtimeOrigin, instance); + let sandbox: sinon.SinonSandbox; + + beforeEach(() => { + sandbox = sinon.createSandbox(); + helpers.mockAuth(sandbox); + }); + + afterEach(() => { + sandbox.restore(); + nock.cleanAll(); + }); + + it("listPath should work", () => { + nock(serverUrl) + .get("/.json") + .query({ shallow: true, limitToFirst: "50000" }) + .reply(200, { + a: true, + x: true, + f: true, + }); + return expect(remote.listPath("/")).to.eventually.eql(["a", "x", "f"]); + }); + + it("prefetchTest should return empty", () => { + nock(serverUrl) + .get("/empty/path.json") + .query({ timeout: "100ms" }) + .reply(200, null); + return expect(remote.prefetchTest("/empty/path")).to.eventually.eql(NodeSize.EMPTY); + }); + + it("prefetchTest should return large", () => { + nock(serverUrl) + .get("/large/path.json") + .query({ timeout: "100ms" }) + .reply(400, { + error: + "Data requested exceeds the maximum size that can be accessed with a single request.", + }); + return expect(remote.prefetchTest("/large/path")).to.eventually.eql(NodeSize.LARGE); + }); + + it("prefetchTest should return small", () => { + nock(serverUrl) + .get("/small/path.json") + .query({ timeout: "100ms" }) + .reply(200, { + x: "some data", + }); + return expect(remote.prefetchTest("/small/path")).to.eventually.eql(NodeSize.SMALL); + }); + + it("deletePath should work", () => { + nock(serverUrl) + .delete("/a/b.json") + .query({ print: "silent" }) + .reply(200, {}); + return remote.deletePath("/a/b"); + }); +});