json lines reader, writer

This commit is contained in:
Jan Prochazka
2020-06-11 20:52:57 +02:00
parent 41ee6e9b91
commit b520501d1f
7 changed files with 105 additions and 29 deletions

View File

@@ -7,6 +7,8 @@ const copyStream = require('./copyStream');
const fakeObjectReader = require('./fakeObjectReader'); const fakeObjectReader = require('./fakeObjectReader');
const consoleObjectWriter = require('./consoleObjectWriter'); const consoleObjectWriter = require('./consoleObjectWriter');
const excelSheetReader = require('./excelSheetReader'); const excelSheetReader = require('./excelSheetReader');
const jsonLinesWriter = require('./jsonLinesWriter');
const jsonLinesReader = require('./jsonLinesReader');
module.exports = { module.exports = {
queryReader, queryReader,
@@ -16,6 +18,8 @@ module.exports = {
tableWriter, tableWriter,
copyStream, copyStream,
excelSheetReader, excelSheetReader,
jsonLinesWriter,
jsonLinesReader,
fakeObjectReader, fakeObjectReader,
consoleObjectWriter, consoleObjectWriter,
}; };

View File

@@ -0,0 +1,32 @@
const fs = require('fs');
const stream = require('stream');
const byline = require('byline');
class ParseStream extends stream.Transform {
constructor({ header }) {
super({ objectMode: true });
this.header = header;
this.wasHeader = false;
}
_transform(chunk, encoding, done) {
const obj = JSON.parse(chunk);
if (!this.wasHeader) {
if (!this.header) this.push({ columns: Object.keys(obj).map((columnName) => ({ columnName })) });
this.wasHeader = true;
}
this.push(obj);
done();
}
}
async function jsonLinesReader({ fileName, encoding = 'utf-8', header = true }) {
console.log(`Reading file ${fileName}`);
const fileStream = fs.createReadStream(fileName, encoding);
const liner = byline(fileStream);
const parser = new ParseStream({ header });
liner.pipe(parser);
return parser;
}
module.exports = jsonLinesReader;

View File

@@ -0,0 +1,30 @@
const fs = require('fs');
const stream = require('stream');
class StringifyStream extends stream.Transform {
constructor({ header }) {
super({ objectMode: true });
this.header = header;
this.wasHeader = false;
}
_transform(chunk, encoding, done) {
if (!this.wasHeader) {
if (this.header) this.push(JSON.stringify(chunk) + '\n');
this.wasHeader = true;
} else {
this.push(JSON.stringify(chunk) + '\n');
}
done();
}
}
async function jsonLinesWriter({ fileName, encoding = 'utf-8', header = true }) {
console.log(`Writing file ${fileName}`);
const stringify = new StringifyStream({ header });
const fileStream = fs.createWriteStream(fileName, encoding);
stringify.pipe(fileStream);
stringify['finisher'] = fileStream;
return stringify;
}
module.exports = jsonLinesWriter;

View File

@@ -31,9 +31,15 @@ async function run() {
// header: false, // header: false,
}); });
const jsonWriter = await dbgateApi.jsonLinesWriter({
fileName: 'test.jsonl',
header: false,
});
const consoleWriter = await dbgateApi.consoleObjectWriter(); const consoleWriter = await dbgateApi.consoleObjectWriter();
await dbgateApi.copyStream(queryReader, csvWriter); // await dbgateApi.copyStream(queryReader, csvWriter);
await dbgateApi.copyStream(queryReader, jsonWriter);
// await dbgateApi.copyStream(queryReader, consoleWriter); // await dbgateApi.copyStream(queryReader, consoleWriter);
} }

View File

@@ -11,6 +11,11 @@ async function run() {
sheetName: 'Events', sheetName: 'Events',
}); });
const jsonReader = await dbgateApi.jsonLinesReader({
fileName: 'test.jsonl',
header: false,
});
const tableWriter = await dbgateApi.tableWriter({ const tableWriter = await dbgateApi.tableWriter({
connection: { connection: {
server: 'localhost', server: 'localhost',
@@ -20,7 +25,7 @@ async function run() {
database: 'Chinook', database: 'Chinook',
}, },
schemaName: 'dbo', schemaName: 'dbo',
pureName: 'Events', pureName: 'Genre3',
createIfNotExists: true, createIfNotExists: true,
truncate: true, truncate: true,
}); });
@@ -40,7 +45,7 @@ async function run() {
const consoleWriter = await dbgateApi.consoleObjectWriter(); const consoleWriter = await dbgateApi.consoleObjectWriter();
// await dbgateApi.copyStream(excelReader, consoleWriter); // await dbgateApi.copyStream(excelReader, consoleWriter);
await dbgateApi.copyStream(excelReader, tableWriter); await dbgateApi.copyStream(jsonReader, tableWriter);
// await dbgateApi.copyStream(csvReader, consoleWriter); // await dbgateApi.copyStream(csvReader, consoleWriter);
// await dbgateApi.copyStream(csvReader, tableWriter); // await dbgateApi.copyStream(csvReader, tableWriter);
} }

View File

@@ -1,26 +0,0 @@
GenreId,Name
1,Rock
2,Jazz
3,Metal
4,Alternative & Punk
5,Rock And Roll
6,Blues
7,Latin
8,Reggae
9,Pop
10,Soundtrack
11,Bossa Nova
12,Easy Listening
13,Heavy Metal
14,R&B/Soul
15,Electronica/Dance
16,World
17,Hip Hop/Rap
18,Science Fiction
19,TV Shows
20,Sci Fi & Fantasy
21,Drama
22,Comedy
23,Alternative
24,Classical
25,Opera
1 GenreId Name
GenreId Name
1 Rock
2 Jazz
3 Metal
4 Alternative & Punk
5 Rock And Roll
6 Blues
7 Latin
8 Reggae
9 Pop
10 Soundtrack
11 Bossa Nova
12 Easy Listening
13 Heavy Metal
14 R&B/Soul
15 Electronica/Dance
16 World
17 Hip Hop/Rap
18 Science Fiction
19 TV Shows
20 Sci Fi & Fantasy
21 Drama
22 Comedy
23 Alternative
24 Classical
25 Opera

25
test/test.jsonl Normal file
View File

@@ -0,0 +1,25 @@
{"GenreId":1,"Name":"Rock"}
{"GenreId":2,"Name":"Jazz"}
{"GenreId":3,"Name":"Metal"}
{"GenreId":4,"Name":"Alternative & Punk"}
{"GenreId":5,"Name":"Rock And Roll"}
{"GenreId":6,"Name":"Blues"}
{"GenreId":7,"Name":"Latin"}
{"GenreId":8,"Name":"Reggae"}
{"GenreId":9,"Name":"Pop"}
{"GenreId":10,"Name":"Soundtrack"}
{"GenreId":11,"Name":"Bossa Nova"}
{"GenreId":12,"Name":"Easy Listening"}
{"GenreId":13,"Name":"Heavy Metal"}
{"GenreId":14,"Name":"R&B/Soul"}
{"GenreId":15,"Name":"Electronica/Dance"}
{"GenreId":16,"Name":"World"}
{"GenreId":17,"Name":"Hip Hop/Rap"}
{"GenreId":18,"Name":"Science Fiction"}
{"GenreId":19,"Name":"TV Shows"}
{"GenreId":20,"Name":"Sci Fi & Fantasy"}
{"GenreId":21,"Name":"Drama"}
{"GenreId":22,"Name":"Comedy"}
{"GenreId":23,"Name":"Alternative"}
{"GenreId":24,"Name":"Classical"}
{"GenreId":25,"Name":"Opera"}