mirror of
https://github.com/DeNNiiInc/dbgate.git
synced 2026-04-18 05:36:00 +00:00
handled UTF-8 BOM in CSV input
This commit is contained in:
@@ -6,6 +6,56 @@ const lineReader = require('line-reader');
|
||||
|
||||
let dbgateApi;
|
||||
|
||||
class StripUtf8BomTransform extends stream.Transform {
|
||||
constructor(options) {
|
||||
super(options);
|
||||
this._checkedBOM = false;
|
||||
this._pending = Buffer.alloc(0); // store initial bytes until we know if BOM is present
|
||||
}
|
||||
|
||||
_transform(chunk, encoding, callback) {
|
||||
if (this._checkedBOM) {
|
||||
// We already handled BOM decision, just pass through
|
||||
this.push(chunk);
|
||||
return callback();
|
||||
}
|
||||
|
||||
// Accumulate into pending until we can decide
|
||||
this._pending = Buffer.concat([this._pending, chunk]);
|
||||
|
||||
if (this._pending.length < 3) {
|
||||
// Still don't know if it's BOM or not (need at least 3 bytes)
|
||||
return callback();
|
||||
}
|
||||
|
||||
// Now we can check the first 3 bytes
|
||||
const BOM = [0xef, 0xbb, 0xbf];
|
||||
const hasBom = this._pending[0] === BOM[0] && this._pending[1] === BOM[1] && this._pending[2] === BOM[2];
|
||||
|
||||
if (hasBom) {
|
||||
// Drop the BOM, push the rest
|
||||
this.push(this._pending.slice(3));
|
||||
} else {
|
||||
// No BOM, push everything as-is
|
||||
this.push(this._pending);
|
||||
}
|
||||
|
||||
this._pending = Buffer.alloc(0);
|
||||
this._checkedBOM = true;
|
||||
callback();
|
||||
}
|
||||
|
||||
_flush(callback) {
|
||||
// Stream ended but we never had enough bytes to decide (length < 3)
|
||||
if (!this._checkedBOM && this._pending.length > 0) {
|
||||
// If it's less than 3 bytes, it can't be a UTF-8 BOM, so just pass it through
|
||||
this.push(this._pending);
|
||||
}
|
||||
this._pending = Buffer.alloc(0);
|
||||
callback();
|
||||
}
|
||||
}
|
||||
|
||||
function readFirstLine(file) {
|
||||
return new Promise((resolve, reject) => {
|
||||
lineReader.open(file, (err, reader) => {
|
||||
@@ -95,7 +145,7 @@ async function reader({ fileName, encoding = 'utf-8', header = true, delimiter,
|
||||
});
|
||||
const fileStream = fs.createReadStream(downloadedFile, encoding);
|
||||
const csvPrepare = new CsvPrepareStream({ header });
|
||||
return [fileStream, csvStream, csvPrepare];
|
||||
return [fileStream, new StripUtf8BomTransform(), csvStream, csvPrepare];
|
||||
// fileStream.pipe(csvStream);
|
||||
// csvStream.pipe(csvPrepare);
|
||||
// return csvPrepare;
|
||||
|
||||
Reference in New Issue
Block a user