SYNC: autodetect - with grouping field

This commit is contained in:
Jan Prochazka
2025-07-02 10:22:41 +02:00
committed by Diflow
parent 794dd5a797
commit cd3a1bebff
5 changed files with 93 additions and 70 deletions

View File

@@ -82,7 +82,7 @@ export interface ProcessedChart {
buckets: { [key: string]: any }; // key is the bucket key, value is aggregated data
bucketKeysOrdered: string[];
bucketKeysSet: Set<string>;
bucketKeyDateParsed: { [key: string]: ChartDateParsed }; // key is the bucket key, value is parsed date
bucketKeyDateParsed: { [key: string]: ChartDateParsed }; // key is the bucket key (without group::), value is parsed date
isGivenDefinition: boolean; // true if the chart was created with a given definition, false if it was created from raw data
invalidXRows: number;
invalidYRows: { [key: string]: number }; // key is the y field, value is the count of invalid rows

View File

@@ -73,61 +73,71 @@ export class ChartProcessor {
// this.chartsBySignature[signature] = chart;
// return chart;
// }
runAutoDetectCharts(row, dateColumns: { [key: string]: ChartDateParsed }) {
runAutoDetectCharts(
row,
dateColumns: { [key: string]: ChartDateParsed },
numericColumnsForAutodetect: { [key: string]: number },
stringColumns: { [key: string]: string }
) {
// create charts from data, if there are no given definitions
for (const datecol in dateColumns) {
let usedChart = this.chartsProcessing.find(
chart =>
!chart.isGivenDefinition &&
chart.definition.xdef.field === datecol &&
chart.definition.xdef.transformFunction?.startsWith('date:')
);
for (const groupingField of [undefined, ...Object.keys(stringColumns)]) {
let usedChart = this.chartsProcessing.find(
chart =>
!chart.isGivenDefinition &&
chart.definition.xdef.field === datecol &&
chart.definition.xdef.transformFunction?.startsWith('date:') &&
chart.definition.groupingField == groupingField
);
if (
!usedChart &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT)
) {
usedChart = {
definition: {
chartType: 'timeline',
xdef: {
field: datecol,
transformFunction: 'date:day',
},
ydefs: [],
},
rowsAdded: 0,
bucketKeysOrdered: [],
buckets: {},
groups: [],
bucketKeyDateParsed: {},
isGivenDefinition: false,
invalidXRows: 0,
invalidYRows: {},
availableColumns: [],
validYRows: {},
topDistinctValues: {},
groupSet: new Set<string>(),
bucketKeysSet: new Set<string>(),
};
this.chartsProcessing.push(usedChart);
}
for (const [key, value] of Object.entries(row)) {
if (value == null) continue;
if (key == datecol) continue; // skip date column itself
const existingYDef = usedChart.definition.ydefs.find(y => y.field === key);
if (
!existingYDef &&
!usedChart &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT)
this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT)
) {
const newYDef: ChartYFieldDefinition = {
field: key,
aggregateFunction: 'sum',
usedChart = {
definition: {
chartType: 'timeline',
xdef: {
field: datecol,
transformFunction: 'date:day',
},
ydefs: [],
groupingField,
},
rowsAdded: 0,
bucketKeysOrdered: [],
buckets: {},
groups: [],
bucketKeyDateParsed: {},
isGivenDefinition: false,
invalidXRows: 0,
invalidYRows: {},
availableColumns: [],
validYRows: {},
topDistinctValues: {},
groupSet: new Set<string>(),
bucketKeysSet: new Set<string>(),
};
usedChart.definition.ydefs.push(newYDef);
this.chartsProcessing.push(usedChart);
}
for (const [key, value] of Object.entries(numericColumnsForAutodetect)) {
// if (value == null) continue;
// if (key == datecol) continue; // skip date column itself
const existingYDef = usedChart.definition.ydefs.find(y => y.field === key);
if (
!existingYDef &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT)
) {
const newYDef: ChartYFieldDefinition = {
field: key,
aggregateFunction: 'sum',
};
usedChart.definition.ydefs.push(newYDef);
}
}
}
}
@@ -193,7 +203,7 @@ export class ChartProcessor {
// const sortedNumericColumnns = Object.keys(numericColumns).sort();
if (this.autoDetectCharts) {
this.runAutoDetectCharts(row, dateColumns);
this.runAutoDetectCharts(row, dateColumns, numericColumnsForAutodetect, stringColumns);
}
// apply on all charts with this date column
@@ -202,13 +212,7 @@ export class ChartProcessor {
continue; // skip charts with errors
}
this.applyRawData(
chart,
row,
dateColumns[chart.definition.xdef.field],
chart.isGivenDefinition ? numericColumns : numericColumnsForAutodetect,
stringColumns
);
this.applyRawData(chart, row, dateColumns[chart.definition.xdef.field], numericColumns, stringColumns);
if (Object.keys(chart.buckets).length > ChartLimits.CHART_FILL_LIMIT) {
chart.errorMessage = `Chart has too many buckets, limit is ${ChartLimits.CHART_FILL_LIMIT}.`;

View File

@@ -3,12 +3,23 @@ import _sum from 'lodash/sum';
import { ChartLimits, ChartYFieldDefinition, ProcessedChart } from './chartDefinitions';
export function getChartScore(chart: ProcessedChart): number {
if (chart.errorMessage) {
return -1; // negative score for charts with errors
}
let res = 0;
res += chart.rowsAdded * 5;
const ydefScores = chart.definition.ydefs.map(yField => getChartYFieldScore(chart, yField));
const sorted = _sortBy(ydefScores).reverse();
res += _sum(sorted.slice(0, ChartLimits.AUTODETECT_MEASURES_LIMIT));
if (chart.groupSet?.size >= 2 && chart.groupSet?.size <= 6) {
res += 50; // bonus for nice grouping
}
if (chart.groupSet?.size == 1) {
res -= 20; // penalty for single group
}
return res;
}

View File

@@ -3,6 +3,7 @@ import _sumBy from 'lodash/sumBy';
import {
ChartConstDefaults,
ChartDateParsed,
ChartDefinition,
ChartLimits,
ChartXTransformFunction,
ProcessedChart,
@@ -326,6 +327,14 @@ export function compareChartDatesParsed(
}
}
function extractBucketKeyWithoutGroup(bucketKey: string, definition: ChartDefinition): string {
if (definition.groupingField) {
const [_group, key] = bucketKey.split('::', 2);
return key || bucketKey;
}
return bucketKey;
}
function getParentDateBucketKey(
bucketKey: string,
transform: ChartXTransformFunction,
@@ -424,21 +433,21 @@ function createParentChartAggregation(chart: ProcessedChart): ProcessedChart | n
res.bucketKeysSet.add(getParentDateBucketKey(bucketKey, chart.definition.xdef.transformFunction, false));
}
for (const [bucketKey, bucketValues] of Object.entries(chart.buckets)) {
const parentKey = getParentDateBucketKey(
bucketKey,
for (const [groupedBucketKey, bucketValues] of Object.entries(chart.buckets)) {
const groupedParentKey = getParentDateBucketKey(
groupedBucketKey,
chart.definition.xdef.transformFunction,
!!chart.definition.groupingField
);
if (!parentKey) {
if (!groupedParentKey) {
// skip if the bucket is already a parent
continue;
}
res.bucketKeyDateParsed[parentKey] = getParentKeyParsed(
chart.bucketKeyDateParsed[bucketKey],
res.bucketKeyDateParsed[extractBucketKeyWithoutGroup(groupedParentKey, chart.definition)] = getParentKeyParsed(
chart.bucketKeyDateParsed[extractBucketKeyWithoutGroup(groupedBucketKey, chart.definition)],
chart.definition.xdef.transformFunction
);
aggregateChartNumericValuesFromChild(res, parentKey, bucketValues);
aggregateChartNumericValuesFromChild(res, groupedParentKey, bucketValues);
}
const bucketKeys = Object.keys(res.buckets).sort();

View File

@@ -116,8 +116,8 @@ describe('Chart processor', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 3));
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(processor.charts.length).toEqual(2);
const chart = processor.charts.find(x => !x.definition.groupingField);
expect(chart.definition.xdef.transformFunction).toEqual('date:day');
expect(chart.definition.ydefs).toEqual([
expect.objectContaining({
@@ -130,8 +130,8 @@ describe('Chart processor', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 4));
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(processor.charts.length).toEqual(2);
const chart = processor.charts.find(x => !x.definition.groupingField);
expect(chart.definition.xdef.transformFunction).toEqual('date:month');
expect(chart.bucketKeysOrdered).toEqual([
'2023-10',
@@ -393,8 +393,7 @@ describe('Chart processor', () => {
expect(chart.definition.xdef.transformFunction).toEqual('date:day');
// console.log(getChartDebugPrint(processor.charts[0]));
// expect(chart.definition.xdef.transformFunction).toEqual('date:day');
// expect(chart.definition.ydefs).toEqual([
// expect.objectContaining({