From 89121a2608227afdfced52bc556e58b9a6d073b2 Mon Sep 17 00:00:00 2001 From: "SPRINX0\\prochazka" Date: Thu, 4 Dec 2025 16:44:08 +0100 Subject: [PATCH] handled UTF-8 BOM in CSV input --- .../dbgate-plugin-csv/src/backend/reader.js | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/plugins/dbgate-plugin-csv/src/backend/reader.js b/plugins/dbgate-plugin-csv/src/backend/reader.js index e52065f20..0db14370c 100644 --- a/plugins/dbgate-plugin-csv/src/backend/reader.js +++ b/plugins/dbgate-plugin-csv/src/backend/reader.js @@ -6,6 +6,56 @@ const lineReader = require('line-reader'); let dbgateApi; +class StripUtf8BomTransform extends stream.Transform { + constructor(options) { + super(options); + this._checkedBOM = false; + this._pending = Buffer.alloc(0); // store initial bytes until we know if BOM is present + } + + _transform(chunk, encoding, callback) { + if (this._checkedBOM) { + // We already handled BOM decision, just pass through + this.push(chunk); + return callback(); + } + + // Accumulate into pending until we can decide + this._pending = Buffer.concat([this._pending, chunk]); + + if (this._pending.length < 3) { + // Still don't know if it's BOM or not (need at least 3 bytes) + return callback(); + } + + // Now we can check the first 3 bytes + const BOM = [0xef, 0xbb, 0xbf]; + const hasBom = this._pending[0] === BOM[0] && this._pending[1] === BOM[1] && this._pending[2] === BOM[2]; + + if (hasBom) { + // Drop the BOM, push the rest + this.push(this._pending.slice(3)); + } else { + // No BOM, push everything as-is + this.push(this._pending); + } + + this._pending = Buffer.alloc(0); + this._checkedBOM = true; + callback(); + } + + _flush(callback) { + // Stream ended but we never had enough bytes to decide (length < 3) + if (!this._checkedBOM && this._pending.length > 0) { + // If it's less than 3 bytes, it can't be a UTF-8 BOM, so just pass it through + this.push(this._pending); + } + this._pending = Buffer.alloc(0); + callback(); + } +} + function readFirstLine(file) { return new Promise((resolve, reject) => { lineReader.open(file, (err, reader) => { @@ -95,7 +145,7 @@ async function reader({ fileName, encoding = 'utf-8', header = true, delimiter, }); const fileStream = fs.createReadStream(downloadedFile, encoding); const csvPrepare = new CsvPrepareStream({ header }); - return [fileStream, csvStream, csvPrepare]; + return [fileStream, new StripUtf8BomTransform(), csvStream, csvPrepare]; // fileStream.pipe(csvStream); // csvStream.pipe(csvPrepare); // return csvPrepare;