SYNC: chart autodetection improved

This commit is contained in:
Jan Prochazka
2025-07-02 10:50:07 +02:00
committed by Diflow
parent cd3a1bebff
commit 33b707aa68
3 changed files with 68 additions and 60 deletions

View File

@@ -17,7 +17,8 @@ export const ChartConstDefaults = {
};
export const ChartLimits = {
AUTODETECT_CHART_LIMIT: 10, // limit for auto-detecting charts, to avoid too many charts
AUTODETECT_CHART_LIMIT: 10, // limit for auto-detecting charts, to avoid too many charts (after APPLY_LIMIT_AFTER_ROWS rows)
AUTODETECT_CHART_TOTAL_LIMIT: 32, // limit for auto-detecting charts, to avoid too many charts (for first APPLY_LIMIT_AFTER_ROWS rows)
AUTODETECT_MEASURES_LIMIT: 10, // limit for auto-detecting measures, to avoid too many measures
APPLY_LIMIT_AFTER_ROWS: 100,
MAX_DISTINCT_VALUES: 10, // max number of distinct values to keep in topDistinctValues

View File

@@ -74,73 +74,80 @@ export class ChartProcessor {
// return chart;
// }
runAutoDetectCharts(
row,
dateColumns: { [key: string]: ChartDateParsed },
numericColumnsForAutodetect: { [key: string]: number },
stringColumns: { [key: string]: string }
) {
// create charts from data, if there are no given definitions
for (const datecol in dateColumns) {
for (const groupingField of [undefined, ...Object.keys(stringColumns)]) {
let usedChart = this.chartsProcessing.find(
chart =>
!chart.isGivenDefinition &&
chart.definition.xdef.field === datecol &&
chart.definition.xdef.transformFunction?.startsWith('date:') &&
chart.definition.groupingField == groupingField
);
const processColumnType = (columns, transformTest, chartType, transformFunction) => {
for (const xcol in columns) {
for (const groupingField of [undefined, ...Object.keys(stringColumns)]) {
if (xcol == groupingField) {
continue;
}
if (
!usedChart &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT)
) {
usedChart = {
definition: {
chartType: 'timeline',
xdef: {
field: datecol,
transformFunction: 'date:day',
},
ydefs: [],
groupingField,
},
rowsAdded: 0,
bucketKeysOrdered: [],
buckets: {},
groups: [],
bucketKeyDateParsed: {},
isGivenDefinition: false,
invalidXRows: 0,
invalidYRows: {},
availableColumns: [],
validYRows: {},
topDistinctValues: {},
groupSet: new Set<string>(),
bucketKeysSet: new Set<string>(),
};
this.chartsProcessing.push(usedChart);
}
let usedChart = this.chartsProcessing.find(
chart =>
!chart.isGivenDefinition &&
chart.definition.xdef.field === xcol &&
transformTest(chart.definition.xdef.transformFunction) &&
chart.definition.groupingField == groupingField
);
for (const [key, value] of Object.entries(numericColumnsForAutodetect)) {
// if (value == null) continue;
// if (key == datecol) continue; // skip date column itself
const existingYDef = usedChart.definition.ydefs.find(y => y.field === key);
if (
!existingYDef &&
!usedChart &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT)
this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT)
) {
const newYDef: ChartYFieldDefinition = {
field: key,
aggregateFunction: 'sum',
usedChart = {
definition: {
chartType,
xdef: {
field: xcol,
transformFunction,
},
ydefs: [],
groupingField,
},
rowsAdded: 0,
bucketKeysOrdered: [],
buckets: {},
groups: [],
bucketKeyDateParsed: {},
isGivenDefinition: false,
invalidXRows: 0,
invalidYRows: {},
availableColumns: [],
validYRows: {},
topDistinctValues: {},
groupSet: new Set<string>(),
bucketKeysSet: new Set<string>(),
};
usedChart.definition.ydefs.push(newYDef);
this.chartsProcessing.push(usedChart);
}
for (const [key, value] of Object.entries(numericColumnsForAutodetect)) {
// if (value == null) continue;
// if (key == datecol) continue; // skip date column itself
const existingYDef = usedChart.definition.ydefs.find(y => y.field === key);
if (
!existingYDef &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT)
) {
const newYDef: ChartYFieldDefinition = {
field: key,
aggregateFunction: 'sum',
};
usedChart.definition.ydefs.push(newYDef);
}
}
}
}
}
};
processColumnType(dateColumns, transform => transform?.startsWith('date:'), 'timeline', 'date:day');
processColumnType(stringColumns, transform => transform == 'identity', 'bar', 'identity');
}
addRow(row: any) {
@@ -203,7 +210,7 @@ export class ChartProcessor {
// const sortedNumericColumnns = Object.keys(numericColumns).sort();
if (this.autoDetectCharts) {
this.runAutoDetectCharts(row, dateColumns, numericColumnsForAutodetect, stringColumns);
this.runAutoDetectCharts(dateColumns, numericColumnsForAutodetect, stringColumns);
}
// apply on all charts with this date column

View File

@@ -116,8 +116,8 @@ describe('Chart processor', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 3));
processor.finalize();
expect(processor.charts.length).toEqual(2);
const chart = processor.charts.find(x => !x.definition.groupingField);
expect(processor.charts.length).toEqual(3);
const chart = processor.charts.find(x => !x.definition.groupingField && x.definition.xdef.field === 'timestamp');
expect(chart.definition.xdef.transformFunction).toEqual('date:day');
expect(chart.definition.ydefs).toEqual([
expect.objectContaining({
@@ -130,8 +130,8 @@ describe('Chart processor', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 4));
processor.finalize();
expect(processor.charts.length).toEqual(2);
const chart = processor.charts.find(x => !x.definition.groupingField);
expect(processor.charts.length).toEqual(3);
const chart = processor.charts.find(x => !x.definition.groupingField && x.definition.xdef.field === 'timestamp');
expect(chart.definition.xdef.transformFunction).toEqual('date:month');
expect(chart.bucketKeysOrdered).toEqual([
'2023-10',