mirror of
https://github.com/DeNNiiInc/dbgate.git
synced 2026-04-19 00:16:00 +00:00
SYNC: Merge pull request #4 from dbgate/feature/charts
This commit is contained in:
374
packages/datalib/src/chartProcessor.ts
Normal file
374
packages/datalib/src/chartProcessor.ts
Normal file
@@ -0,0 +1,374 @@
|
||||
import {
|
||||
ChartAvailableColumn,
|
||||
ChartDateParsed,
|
||||
ChartDefinition,
|
||||
ChartLimits,
|
||||
ProcessedChart,
|
||||
} from './chartDefinitions';
|
||||
import _sortBy from 'lodash/sortBy';
|
||||
import _sum from 'lodash/sum';
|
||||
import {
|
||||
aggregateChartNumericValuesFromSource,
|
||||
autoAggregateCompactTimelineChart,
|
||||
computeChartBucketCardinality,
|
||||
computeChartBucketKey,
|
||||
fillChartTimelineBuckets,
|
||||
tryParseChartDate,
|
||||
} from './chartTools';
|
||||
import { getChartScore, getChartYFieldScore } from './chartScoring';
|
||||
|
||||
export class ChartProcessor {
|
||||
chartsProcessing: ProcessedChart[] = [];
|
||||
charts: ProcessedChart[] = [];
|
||||
availableColumnsDict: { [field: string]: ChartAvailableColumn } = {};
|
||||
availableColumns: ChartAvailableColumn[] = [];
|
||||
autoDetectCharts = false;
|
||||
rowsAdded = 0;
|
||||
|
||||
constructor(public givenDefinitions: ChartDefinition[] = []) {
|
||||
for (const definition of givenDefinitions) {
|
||||
this.chartsProcessing.push({
|
||||
definition,
|
||||
rowsAdded: 0,
|
||||
bucketKeysOrdered: [],
|
||||
buckets: {},
|
||||
bucketKeyDateParsed: {},
|
||||
isGivenDefinition: true,
|
||||
invalidXRows: 0,
|
||||
invalidYRows: {},
|
||||
availableColumns: [],
|
||||
validYRows: {},
|
||||
topDistinctValues: {},
|
||||
});
|
||||
}
|
||||
this.autoDetectCharts = this.givenDefinitions.length == 0;
|
||||
}
|
||||
|
||||
// findOrCreateChart(definition: ChartDefinition, isGivenDefinition: boolean): ProcessedChart {
|
||||
// const signatureItems = [
|
||||
// definition.chartType,
|
||||
// definition.xdef.field,
|
||||
// definition.xdef.transformFunction,
|
||||
// definition.ydefs.map(y => y.field).join(','),
|
||||
// ];
|
||||
// const signature = signatureItems.join('::');
|
||||
|
||||
// if (this.chartsBySignature[signature]) {
|
||||
// return this.chartsBySignature[signature];
|
||||
// }
|
||||
// const chart: ProcessedChart = {
|
||||
// definition,
|
||||
// rowsAdded: 0,
|
||||
// bucketKeysOrdered: [],
|
||||
// buckets: {},
|
||||
// bucketKeyDateParsed: {},
|
||||
// isGivenDefinition,
|
||||
// };
|
||||
// this.chartsBySignature[signature] = chart;
|
||||
// return chart;
|
||||
// }
|
||||
|
||||
addRow(row: any) {
|
||||
const dateColumns: { [key: string]: ChartDateParsed } = {};
|
||||
const numericColumns: { [key: string]: number } = {};
|
||||
const numericColumnsForAutodetect: { [key: string]: number } = {};
|
||||
const stringColumns: { [key: string]: string } = {};
|
||||
|
||||
for (const [key, value] of Object.entries(row)) {
|
||||
const number: number = typeof value == 'string' ? Number(value) : typeof value == 'number' ? value : NaN;
|
||||
this.availableColumnsDict[key] = {
|
||||
field: key,
|
||||
};
|
||||
|
||||
const keyLower = key.toLowerCase();
|
||||
const keyIsId = keyLower.endsWith('_id') || keyLower == 'id' || key.endsWith('Id');
|
||||
|
||||
const parsedDate = tryParseChartDate(value);
|
||||
if (parsedDate) {
|
||||
dateColumns[key] = parsedDate;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isNaN(number) && isFinite(number)) {
|
||||
numericColumns[key] = number;
|
||||
if (!keyIsId) {
|
||||
numericColumnsForAutodetect[key] = number; // for auto-detecting charts
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (typeof value === 'string' && isNaN(number) && value.length < 100) {
|
||||
stringColumns[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// const sortedNumericColumnns = Object.keys(numericColumns).sort();
|
||||
|
||||
if (this.autoDetectCharts) {
|
||||
// create charts from data, if there are no given definitions
|
||||
for (const datecol in dateColumns) {
|
||||
let usedChart = this.chartsProcessing.find(
|
||||
chart =>
|
||||
!chart.isGivenDefinition &&
|
||||
chart.definition.xdef.field === datecol &&
|
||||
chart.definition.xdef.transformFunction?.startsWith('date:')
|
||||
);
|
||||
|
||||
if (
|
||||
!usedChart &&
|
||||
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
|
||||
this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT)
|
||||
) {
|
||||
usedChart = {
|
||||
definition: {
|
||||
chartType: 'line',
|
||||
xdef: {
|
||||
field: datecol,
|
||||
transformFunction: 'date:day',
|
||||
},
|
||||
ydefs: [],
|
||||
},
|
||||
rowsAdded: 0,
|
||||
bucketKeysOrdered: [],
|
||||
buckets: {},
|
||||
bucketKeyDateParsed: {},
|
||||
isGivenDefinition: false,
|
||||
invalidXRows: 0,
|
||||
invalidYRows: {},
|
||||
availableColumns: [],
|
||||
validYRows: {},
|
||||
topDistinctValues: {},
|
||||
};
|
||||
this.chartsProcessing.push(usedChart);
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(row)) {
|
||||
if (value == null) continue;
|
||||
if (key == datecol) continue; // skip date column itself
|
||||
let existingYDef = usedChart.definition.ydefs.find(y => y.field === key);
|
||||
if (
|
||||
!existingYDef &&
|
||||
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
|
||||
usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT)
|
||||
) {
|
||||
existingYDef = {
|
||||
field: key,
|
||||
aggregateFunction: 'sum',
|
||||
};
|
||||
usedChart.definition.ydefs.push(existingYDef);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// apply on all charts with this date column
|
||||
for (const chart of this.chartsProcessing) {
|
||||
this.applyRawData(
|
||||
chart,
|
||||
row,
|
||||
dateColumns[chart.definition.xdef.field],
|
||||
chart.isGivenDefinition ? numericColumns : numericColumnsForAutodetect,
|
||||
stringColumns
|
||||
);
|
||||
}
|
||||
|
||||
for (let i = 0; i < this.chartsProcessing.length; i++) {
|
||||
this.chartsProcessing[i] = autoAggregateCompactTimelineChart(this.chartsProcessing[i]);
|
||||
}
|
||||
|
||||
this.rowsAdded += 1;
|
||||
if (this.rowsAdded == ChartLimits.APPLY_LIMIT_AFTER_ROWS) {
|
||||
this.applyLimitsOnCharts();
|
||||
}
|
||||
}
|
||||
|
||||
applyLimitsOnCharts() {
|
||||
const autodetectProcessingCharts = this.chartsProcessing.filter(chart => !chart.isGivenDefinition);
|
||||
if (autodetectProcessingCharts.length > ChartLimits.AUTODETECT_CHART_LIMIT) {
|
||||
const newAutodetectProcessingCharts = _sortBy(
|
||||
this.chartsProcessing.slice(0, ChartLimits.AUTODETECT_CHART_LIMIT),
|
||||
chart => -getChartScore(chart)
|
||||
);
|
||||
|
||||
for (const chart of autodetectProcessingCharts) {
|
||||
chart.definition.ydefs = _sortBy(chart.definition.ydefs, yfield => -getChartYFieldScore(chart, yfield)).slice(
|
||||
0,
|
||||
ChartLimits.AUTODETECT_MEASURES_LIMIT
|
||||
);
|
||||
}
|
||||
|
||||
this.chartsProcessing = [
|
||||
...this.chartsProcessing.filter(chart => chart.isGivenDefinition),
|
||||
...newAutodetectProcessingCharts,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
addRows(...rows: any[]) {
|
||||
for (const row of rows) {
|
||||
this.addRow(row);
|
||||
}
|
||||
}
|
||||
|
||||
finalize() {
|
||||
this.applyLimitsOnCharts();
|
||||
this.availableColumns = Object.values(this.availableColumnsDict);
|
||||
for (const chart of this.chartsProcessing) {
|
||||
let addedChart: ProcessedChart = chart;
|
||||
if (chart.rowsAdded == 0) {
|
||||
continue; // skip empty charts
|
||||
}
|
||||
const sortOrder = chart.definition.xdef.sortOrder ?? 'ascKeys';
|
||||
if (sortOrder != 'natural') {
|
||||
if (sortOrder == 'ascKeys' || sortOrder == 'descKeys') {
|
||||
if (chart.definition.xdef.transformFunction.startsWith('date:')) {
|
||||
addedChart = autoAggregateCompactTimelineChart(addedChart);
|
||||
fillChartTimelineBuckets(addedChart);
|
||||
}
|
||||
|
||||
addedChart.bucketKeysOrdered = _sortBy(Object.keys(addedChart.buckets));
|
||||
if (sortOrder == 'descKeys') {
|
||||
addedChart.bucketKeysOrdered.reverse();
|
||||
}
|
||||
}
|
||||
|
||||
if (sortOrder == 'ascValues' || sortOrder == 'descValues') {
|
||||
addedChart.bucketKeysOrdered = _sortBy(Object.keys(addedChart.buckets), key =>
|
||||
computeChartBucketCardinality(addedChart.buckets[key])
|
||||
);
|
||||
if (sortOrder == 'descValues') {
|
||||
addedChart.bucketKeysOrdered.reverse();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!addedChart.isGivenDefinition) {
|
||||
addedChart = {
|
||||
...addedChart,
|
||||
definition: {
|
||||
...addedChart.definition,
|
||||
ydefs: addedChart.definition.ydefs.filter(
|
||||
y =>
|
||||
!addedChart.invalidYRows[y.field] &&
|
||||
addedChart.validYRows[y.field] / addedChart.rowsAdded >= ChartLimits.VALID_VALUE_RATIO_LIMIT
|
||||
),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (addedChart) {
|
||||
addedChart.availableColumns = this.availableColumns;
|
||||
this.charts.push(addedChart);
|
||||
}
|
||||
|
||||
this.groupPieOtherBuckets(addedChart);
|
||||
}
|
||||
|
||||
this.charts = [
|
||||
...this.charts.filter(x => x.isGivenDefinition),
|
||||
..._sortBy(
|
||||
this.charts.filter(x => !x.isGivenDefinition),
|
||||
chart => -getChartScore(chart)
|
||||
),
|
||||
];
|
||||
}
|
||||
groupPieOtherBuckets(chart: ProcessedChart) {
|
||||
if (chart.definition.chartType !== 'pie') {
|
||||
return; // only for pie charts
|
||||
}
|
||||
const ratioLimit = chart.definition.pieRatioLimit ?? ChartLimits.PIE_RATIO_LIMIT;
|
||||
const countLimit = chart.definition.pieCountLimit ?? ChartLimits.PIE_COUNT_LIMIT;
|
||||
if (ratioLimit == 0 && countLimit == 0) {
|
||||
return; // no grouping if limit is 0
|
||||
}
|
||||
const otherBucket: any = {};
|
||||
let newBuckets: any = {};
|
||||
const cardSum = _sum(Object.values(chart.buckets).map(bucket => computeChartBucketCardinality(bucket)));
|
||||
|
||||
if (cardSum == 0) {
|
||||
return; // no buckets to process
|
||||
}
|
||||
|
||||
for (const [bucketKey, bucket] of Object.entries(chart.buckets)) {
|
||||
if (computeChartBucketCardinality(bucket) / cardSum < ratioLimit) {
|
||||
for (const field in bucket) {
|
||||
otherBucket[field] = (otherBucket[field] ?? 0) + bucket[field];
|
||||
}
|
||||
} else {
|
||||
newBuckets[bucketKey] = bucket;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(newBuckets).length > countLimit) {
|
||||
const sortedBucketKeys = _sortBy(
|
||||
Object.entries(newBuckets),
|
||||
([, bucket]) => -computeChartBucketCardinality(bucket)
|
||||
).map(([key]) => key);
|
||||
const newBuckets2 = {};
|
||||
sortedBucketKeys.forEach((key, index) => {
|
||||
if (index < countLimit) {
|
||||
newBuckets2[key] = newBuckets[key];
|
||||
} else {
|
||||
for (const field in newBuckets[key]) {
|
||||
otherBucket[field] = (otherBucket[field] ?? 0) + newBuckets[key][field];
|
||||
}
|
||||
}
|
||||
});
|
||||
newBuckets = newBuckets2;
|
||||
}
|
||||
|
||||
if (Object.keys(otherBucket).length > 0) {
|
||||
newBuckets['Other'] = otherBucket;
|
||||
}
|
||||
chart.buckets = newBuckets;
|
||||
chart.bucketKeysOrdered = [...chart.bucketKeysOrdered, 'Other'].filter(key => key in newBuckets);
|
||||
}
|
||||
|
||||
applyRawData(
|
||||
chart: ProcessedChart,
|
||||
row: any,
|
||||
dateParsed: ChartDateParsed,
|
||||
numericColumns: { [key: string]: number },
|
||||
stringColumns: { [key: string]: string }
|
||||
) {
|
||||
if (chart.definition.xdef == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (row[chart.definition.xdef.field] == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (dateParsed == null && chart.definition.xdef.transformFunction.startsWith('date:')) {
|
||||
chart.invalidXRows += 1;
|
||||
return; // skip if date is invalid
|
||||
}
|
||||
|
||||
const [bucketKey, bucketKeyParsed] = computeChartBucketKey(dateParsed, chart, row);
|
||||
|
||||
if (!bucketKey) {
|
||||
return; // skip if no bucket key
|
||||
}
|
||||
|
||||
if (bucketKeyParsed) {
|
||||
chart.bucketKeyDateParsed[bucketKey] = bucketKeyParsed;
|
||||
}
|
||||
|
||||
if (chart.minX == null || bucketKey < chart.minX) {
|
||||
chart.minX = bucketKey;
|
||||
}
|
||||
if (chart.maxX == null || bucketKey > chart.maxX) {
|
||||
chart.maxX = bucketKey;
|
||||
}
|
||||
|
||||
if (!chart.buckets[bucketKey]) {
|
||||
chart.buckets[bucketKey] = {};
|
||||
if (chart.definition.xdef.sortOrder == 'natural') {
|
||||
chart.bucketKeysOrdered.push(bucketKey);
|
||||
}
|
||||
}
|
||||
|
||||
aggregateChartNumericValuesFromSource(chart, bucketKey, numericColumns, row);
|
||||
chart.rowsAdded += 1;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user