SYNC: Merge pull request #3 from dbgate/feature/zip

This commit is contained in:
Jan Prochazka
2025-04-23 13:17:54 +02:00
committed by Diflow
parent 54c53f0b56
commit 8f4118a6b8
82 changed files with 3981 additions and 2814 deletions

View File

@@ -3,7 +3,9 @@ const { archivedir, resolveArchiveFolder } = require('../utility/directories');
const jsonLinesReader = require('./jsonLinesReader');
function archiveReader({ folderName, fileName, ...other }) {
const jsonlFile = path.join(resolveArchiveFolder(folderName), `${fileName}.jsonl`);
const jsonlFile = folderName.endsWith('.zip')
? `zip://archive:${folderName}//${fileName}.jsonl`
: path.join(resolveArchiveFolder(folderName), `${fileName}.jsonl`);
const res = jsonLinesReader({ fileName: jsonlFile, ...other });
return res;
}

View File

@@ -15,9 +15,9 @@ class CollectorWriterStream extends stream.Writable {
_final(callback) {
process.send({
msgtype: 'freeData',
msgtype: 'dataResult',
runid: this.runid,
freeData: { rows: this.rows, structure: this.structure },
dataResult: { rows: this.rows, structure: this.structure },
});
callback();
}

View File

@@ -1,61 +0,0 @@
const stream = require('stream');
const path = require('path');
const { quoteFullName, fullNameToString, getLogger } = require('dbgate-tools');
const requireEngineDriver = require('../utility/requireEngineDriver');
const { connectUtility } = require('../utility/connectUtility');
const logger = getLogger('dataDuplicator');
const { DataDuplicator } = require('dbgate-datalib');
const copyStream = require('./copyStream');
const jsonLinesReader = require('./jsonLinesReader');
const { resolveArchiveFolder } = require('../utility/directories');
async function dataDuplicator({
connection,
archive,
folder,
items,
options,
analysedStructure = null,
driver,
systemConnection,
}) {
if (!driver) driver = requireEngineDriver(connection);
const dbhan = systemConnection || (await connectUtility(driver, connection, 'write'));
try {
if (!analysedStructure) {
analysedStructure = await driver.analyseFull(dbhan);
}
const sourceDir = archive
? resolveArchiveFolder(archive)
: folder?.startsWith('archive:')
? resolveArchiveFolder(folder.substring('archive:'.length))
: folder;
const dupl = new DataDuplicator(
dbhan,
driver,
analysedStructure,
items.map(item => ({
name: item.name,
operation: item.operation,
matchColumns: item.matchColumns,
openStream:
item.openStream || (() => jsonLinesReader({ fileName: path.join(sourceDir, `${item.name}.jsonl`) })),
})),
stream,
copyStream,
options
);
await dupl.run();
} finally {
if (!systemConnection) {
await driver.close(dbhan);
}
}
}
module.exports = dataDuplicator;

View File

@@ -0,0 +1,96 @@
const stream = require('stream');
const path = require('path');
const { quoteFullName, fullNameToString, getLogger } = require('dbgate-tools');
const requireEngineDriver = require('../utility/requireEngineDriver');
const { connectUtility } = require('../utility/connectUtility');
const logger = getLogger('datareplicator');
const { DataReplicator } = require('dbgate-datalib');
const { compileCompoudEvalCondition } = require('dbgate-filterparser');
const copyStream = require('./copyStream');
const jsonLinesReader = require('./jsonLinesReader');
const { resolveArchiveFolder } = require('../utility/directories');
const { evaluateCondition } = require('dbgate-sqltree');
function compileOperationFunction(enabled, condition) {
if (!enabled) return _row => false;
const conditionCompiled = compileCompoudEvalCondition(condition);
if (condition) {
return row => evaluateCondition(conditionCompiled, row);
}
return _row => true;
}
async function dataReplicator({
connection,
archive,
folder,
items,
options,
analysedStructure = null,
driver,
systemConnection,
}) {
if (!driver) driver = requireEngineDriver(connection);
const dbhan = systemConnection || (await connectUtility(driver, connection, 'write'));
try {
if (!analysedStructure) {
analysedStructure = await driver.analyseFull(dbhan);
}
let joinPath;
if (archive?.endsWith('.zip')) {
joinPath = file => `zip://archive:${archive}//${file}`;
} else {
const sourceDir = archive
? resolveArchiveFolder(archive)
: folder?.startsWith('archive:')
? resolveArchiveFolder(folder.substring('archive:'.length))
: folder;
joinPath = file => path.join(sourceDir, file);
}
const repl = new DataReplicator(
dbhan,
driver,
analysedStructure,
items.map(item => {
return {
name: item.name,
matchColumns: item.matchColumns,
findExisting: compileOperationFunction(item.findExisting, item.findCondition),
createNew: compileOperationFunction(item.createNew, item.createCondition),
updateExisting: compileOperationFunction(item.updateExisting, item.updateCondition),
deleteMissing: !!item.deleteMissing,
deleteRestrictionColumns: item.deleteRestrictionColumns ?? [],
openStream: item.openStream
? item.openStream
: item.jsonArray
? () => stream.Readable.from(item.jsonArray)
: () => jsonLinesReader({ fileName: joinPath(`${item.name}.jsonl`) }),
};
}),
stream,
copyStream,
options
);
await repl.run();
if (options?.runid) {
process.send({
msgtype: 'dataResult',
runid: options?.runid,
dataResult: repl.result,
});
}
return repl.result;
} finally {
if (!systemConnection) {
await driver.close(dbhan);
}
}
}
module.exports = dataReplicator;

View File

@@ -1,14 +1,30 @@
const crypto = require('crypto');
const path = require('path');
const { uploadsdir } = require('../utility/directories');
const { uploadsdir, archivedir } = require('../utility/directories');
const { downloadFile } = require('../utility/downloader');
const extractSingleFileFromZip = require('../utility/extractSingleFileFromZip');
async function download(url) {
if (url && url.match(/(^http:\/\/)|(^https:\/\/)/)) {
const tmpFile = path.join(uploadsdir(), crypto.randomUUID());
await downloadFile(url, tmpFile);
return tmpFile;
async function download(url, options = {}) {
const { targetFile } = options || {};
if (url) {
if (url.match(/(^http:\/\/)|(^https:\/\/)/)) {
const destFile = targetFile || path.join(uploadsdir(), crypto.randomUUID());
await downloadFile(url, destFile);
return destFile;
}
const zipMatch = url.match(/^zip\:\/\/(.*)\/\/(.*)$/);
if (zipMatch) {
const destFile = targetFile || path.join(uploadsdir(), crypto.randomUUID());
let zipFile = zipMatch[1];
if (zipFile.startsWith('archive:')) {
zipFile = path.join(archivedir(), zipFile.substring('archive:'.length));
}
await extractSingleFileFromZip(zipFile, zipMatch[2], destFile);
return destFile;
}
}
return url;
}

View File

@@ -25,7 +25,7 @@ const importDatabase = require('./importDatabase');
const loadDatabase = require('./loadDatabase');
const generateModelSql = require('./generateModelSql');
const modifyJsonLinesReader = require('./modifyJsonLinesReader');
const dataDuplicator = require('./dataDuplicator');
const dataReplicator = require('./dataReplicator');
const dbModelToJson = require('./dbModelToJson');
const jsonToDbModel = require('./jsonToDbModel');
const jsonReader = require('./jsonReader');
@@ -35,6 +35,11 @@ const autoIndexForeignKeysTransform = require('./autoIndexForeignKeysTransform')
const generateDeploySql = require('./generateDeploySql');
const dropAllDbObjects = require('./dropAllDbObjects');
const importDbFromFolder = require('./importDbFromFolder');
const zipDirectory = require('./zipDirectory');
const unzipDirectory = require('./unzipDirectory');
const zipJsonLinesData = require('./zipJsonLinesData');
const unzipJsonLinesData = require('./unzipJsonLinesData');
const unzipJsonLinesFile = require('./unzipJsonLinesFile');
const dbgateApi = {
queryReader,
@@ -64,7 +69,7 @@ const dbgateApi = {
loadDatabase,
generateModelSql,
modifyJsonLinesReader,
dataDuplicator,
dataReplicator,
dbModelToJson,
jsonToDbModel,
dataTypeMapperTransform,
@@ -73,6 +78,11 @@ const dbgateApi = {
generateDeploySql,
dropAllDbObjects,
importDbFromFolder,
zipDirectory,
unzipDirectory,
zipJsonLinesData,
unzipJsonLinesData,
unzipJsonLinesFile,
};
requirePlugin.initializeDbgateApi(dbgateApi);

View File

@@ -36,9 +36,10 @@ async function jsonLinesWriter({ fileName, encoding = 'utf-8', header = true })
logger.info(`Writing file ${fileName}`);
const stringify = new StringifyStream({ header });
const fileStream = fs.createWriteStream(fileName, encoding);
stringify.pipe(fileStream);
stringify['finisher'] = fileStream;
return stringify;
return [stringify, fileStream];
// stringify.pipe(fileStream);
// stringify['finisher'] = fileStream;
// return stringify;
}
module.exports = jsonLinesWriter;

View File

@@ -0,0 +1,91 @@
const yauzl = require('yauzl');
const fs = require('fs');
const path = require('path');
const { getLogger, extractErrorLogData } = require('dbgate-tools');
const logger = getLogger('unzipDirectory');
/**
* Extracts an entire ZIP file, preserving its internal directory layout.
*
* @param {string} zipPath Path to the ZIP file on disk.
* @param {string} outputDirectory Folder to create / overwrite with the contents.
* @returns {Promise<boolean>} Resolves `true` on success, rejects on error.
*/
function unzipDirectory(zipPath, outputDirectory) {
return new Promise((resolve, reject) => {
yauzl.open(zipPath, { lazyEntries: true }, (err, zipFile) => {
if (err) return reject(err);
/** Pending per-file extractions we resolve the main promise after theyre all done */
const pending = [];
// kick things off
zipFile.readEntry();
zipFile.on('entry', entry => {
const destPath = path.join(outputDirectory, entry.fileName);
// Handle directories (their names always end with “/” in ZIPs)
if (/\/$/.test(entry.fileName)) {
// Ensure directory exists, then continue to next entry
fs.promises
.mkdir(destPath, { recursive: true })
.then(() => zipFile.readEntry())
.catch(reject);
return;
}
// Handle files
const filePromise = fs.promises
.mkdir(path.dirname(destPath), { recursive: true }) // make sure parent dirs exist
.then(
() =>
new Promise((res, rej) => {
zipFile.openReadStream(entry, (err, readStream) => {
if (err) return rej(err);
const writeStream = fs.createWriteStream(destPath);
readStream.pipe(writeStream);
// proceed to next entry once weve consumed *this* one
readStream.on('end', () => zipFile.readEntry());
writeStream.on('finish', () => {
logger.info(`Extracted "${entry.fileName}" → "${destPath}".`);
res();
});
writeStream.on('error', writeErr => {
logger.error(
extractErrorLogData(writeErr),
`Error extracting "${entry.fileName}" from "${zipPath}".`
);
rej(writeErr);
});
});
})
);
pending.push(filePromise);
});
// Entire archive enumerated; wait for all streams to finish
zipFile.on('end', () => {
Promise.all(pending)
.then(() => {
logger.info(`Archive "${zipPath}" fully extracted to "${outputDirectory}".`);
resolve(true);
})
.catch(reject);
});
zipFile.on('error', err => {
logger.error(extractErrorLogData(err), `ZIP file error in ${zipPath}.`);
reject(err);
});
});
});
}
module.exports = unzipDirectory;

View File

@@ -0,0 +1,60 @@
const yauzl = require('yauzl');
const fs = require('fs');
const { jsonLinesParse } = require('dbgate-tools');
function unzipJsonLinesData(zipPath) {
return new Promise((resolve, reject) => {
// Open the zip file
yauzl.open(zipPath, { lazyEntries: true }, (err, zipfile) => {
if (err) {
return reject(err);
}
const results = {};
// Start reading entries
zipfile.readEntry();
zipfile.on('entry', entry => {
// Only process .json files
if (/\.jsonl$/i.test(entry.fileName)) {
zipfile.openReadStream(entry, (err, readStream) => {
if (err) {
return reject(err);
}
const chunks = [];
readStream.on('data', chunk => chunks.push(chunk));
readStream.on('end', () => {
try {
const fileContent = Buffer.concat(chunks).toString('utf-8');
const parsedJson = jsonLinesParse(fileContent);
results[entry.fileName.replace(/\.jsonl$/, '')] = parsedJson;
} catch (parseError) {
return reject(parseError);
}
// Move to the next entry
zipfile.readEntry();
});
});
} else {
// Not a JSON file, skip
zipfile.readEntry();
}
});
// Resolve when no more entries
zipfile.on('end', () => {
resolve(results);
});
// Catch errors from zipfile
zipfile.on('error', zipErr => {
reject(zipErr);
});
});
});
}
module.exports = unzipJsonLinesData;

View File

@@ -0,0 +1,59 @@
const yauzl = require('yauzl');
const fs = require('fs');
const { jsonLinesParse } = require('dbgate-tools');
function unzipJsonLinesFile(zipPath, fileInZip) {
return new Promise((resolve, reject) => {
// Open the zip file
yauzl.open(zipPath, { lazyEntries: true }, (err, zipfile) => {
if (err) {
return reject(err);
}
let result = null;
// Start reading entries
zipfile.readEntry();
zipfile.on('entry', entry => {
if (entry.fileName == fileInZip) {
zipfile.openReadStream(entry, (err, readStream) => {
if (err) {
return reject(err);
}
const chunks = [];
readStream.on('data', chunk => chunks.push(chunk));
readStream.on('end', () => {
try {
const fileContent = Buffer.concat(chunks).toString('utf-8');
const parsedJson = jsonLinesParse(fileContent);
result = parsedJson;
} catch (parseError) {
return reject(parseError);
}
// Move to the next entry
zipfile.readEntry();
});
});
} else {
// Not a JSON file, skip
zipfile.readEntry();
}
});
// Resolve when no more entries
zipfile.on('end', () => {
resolve(result);
});
// Catch errors from zipfile
zipfile.on('error', zipErr => {
reject(zipErr);
});
});
});
}
module.exports = unzipJsonLinesFile;

View File

@@ -0,0 +1,49 @@
const fs = require('fs');
const path = require('path');
const archiver = require('archiver');
const { getLogger, extractErrorLogData } = require('dbgate-tools');
const { archivedir } = require('../utility/directories');
const logger = getLogger('compressDirectory');
function zipDirectory(inputDirectory, outputFile) {
if (outputFile.startsWith('archive:')) {
outputFile = path.join(archivedir(), outputFile.substring('archive:'.length));
}
return new Promise((resolve, reject) => {
const output = fs.createWriteStream(outputFile);
const archive = archiver('zip', { zlib: { level: 9 } }); // level: 9 => best compression
// Listen for all archive data to be written
output.on('close', () => {
logger.info(`ZIP file created (${archive.pointer()} total bytes)`);
resolve();
});
archive.on('warning', err => {
logger.warn(extractErrorLogData(err), `Warning while creating ZIP: ${err.message}`);
});
archive.on('error', err => {
logger.error(extractErrorLogData(err), `Error while creating ZIP: ${err.message}`);
reject(err);
});
// Pipe archive data to the file
archive.pipe(output);
// Append files from a folder
archive.directory(inputDirectory, false, entryData => {
if (entryData.name.endsWith('.zip')) {
return false; // returning false means "do not include"
}
// otherwise, include it
return entryData;
});
// Finalize the archive
archive.finalize();
});
}
module.exports = zipDirectory;

View File

@@ -0,0 +1,49 @@
const fs = require('fs');
const _ = require('lodash');
const path = require('path');
const archiver = require('archiver');
const { getLogger, extractErrorLogData, jsonLinesStringify } = require('dbgate-tools');
const { archivedir } = require('../utility/directories');
const logger = getLogger('compressDirectory');
function zipDirectory(jsonDb, outputFile) {
if (outputFile.startsWith('archive:')) {
outputFile = path.join(archivedir(), outputFile.substring('archive:'.length));
}
return new Promise((resolve, reject) => {
const output = fs.createWriteStream(outputFile);
const archive = archiver('zip', { zlib: { level: 9 } }); // level: 9 => best compression
// Listen for all archive data to be written
output.on('close', () => {
logger.info(`ZIP file created (${archive.pointer()} total bytes)`);
resolve();
});
archive.on('warning', err => {
logger.warn(extractErrorLogData(err), `Warning while creating ZIP: ${err.message}`);
});
archive.on('error', err => {
logger.error(extractErrorLogData(err), `Error while creating ZIP: ${err.message}`);
reject(err);
});
// Pipe archive data to the file
archive.pipe(output);
for (const key in jsonDb) {
const data = jsonDb[key];
if (_.isArray(data)) {
const jsonString = jsonLinesStringify(data);
archive.append(jsonString, { name: `${key}.jsonl` });
}
}
// Finalize the archive
archive.finalize();
});
}
module.exports = zipDirectory;