SYNC: split too different ydefs

This commit is contained in:
Jan Prochazka
2025-07-02 13:11:40 +02:00
committed by Diflow
parent b7469062a1
commit 8028aafeff
3 changed files with 111 additions and 14 deletions

View File

@@ -8,12 +8,17 @@ import {
} from './chartDefinitions';
import _sortBy from 'lodash/sortBy';
import _sum from 'lodash/sum';
import _zipObject from 'lodash/zipObject';
import _mapValues from 'lodash/mapValues';
import _pick from 'lodash/pick';
import {
aggregateChartNumericValuesFromSource,
autoAggregateCompactTimelineChart,
chartsHaveSimilarRange,
computeChartBucketCardinality,
computeChartBucketKey,
fillChartTimelineBuckets,
getChartYRange,
runTransformFunction,
tryParseChartDate,
} from './chartTools';
@@ -105,7 +110,12 @@ export class ChartProcessor {
field: xcol,
transformFunction,
},
ydefs: [],
ydefs: [
{
field: '__count',
aggregateFunction: 'count',
},
],
groupingField,
},
rowsAdded: 0,
@@ -125,6 +135,10 @@ export class ChartProcessor {
this.chartsProcessing.push(usedChart);
}
if (!usedChart) {
continue; // chart not created - probably too many charts already
}
for (const [key, value] of Object.entries(numericColumnsForAutodetect)) {
// if (value == null) continue;
// if (key == datecol) continue; // skip date column itself
@@ -270,7 +284,48 @@ export class ChartProcessor {
}
}
splitChartsByYDefs() {
const newCharts: ProcessedChart[] = [];
for (const chart of this.chartsProcessing) {
if (chart.isGivenDefinition) {
newCharts.push(chart);
continue;
}
const yRanges = chart.definition.ydefs.map(ydef => getChartYRange(chart, ydef).max);
const yRangeByField = _zipObject(
chart.definition.ydefs.map(ydef => ydef.field),
yRanges
);
let ydefsToAssign = chart.definition.ydefs.map(ydef => ydef.field);
while (ydefsToAssign.length > 0) {
const first = ydefsToAssign.shift();
const additionals = [];
for (const candidate of ydefsToAssign) {
if (chartsHaveSimilarRange(yRangeByField[first], yRangeByField[candidate])) {
additionals.push(candidate);
}
}
const ydefsCurrent = [first, ...additionals];
const partialChart: ProcessedChart = {
...chart,
definition: {
...chart.definition,
ydefs: ydefsCurrent.map(y => chart.definition.ydefs.find(yd => yd.field === y) as ChartYFieldDefinition),
},
buckets: _mapValues(chart.buckets, bucket => _pick(bucket, ydefsCurrent)),
};
newCharts.push(partialChart);
ydefsToAssign = ydefsToAssign.filter(y => !additionals.includes(y));
}
}
this.chartsProcessing = newCharts;
}
finalize() {
this.splitChartsByYDefs();
this.applyLimitsOnCharts();
this.availableColumns = Object.values(this.availableColumnsDict);
for (const chart of this.chartsProcessing) {
@@ -294,7 +349,6 @@ export class ChartProcessor {
this.charts.push(addedChart);
continue;
}
('');
addedChart.bucketKeysOrdered = _sortBy([...addedChart.bucketKeysSet]);
if (sortOrder == 'descKeys') {
addedChart.bucketKeysOrdered.reverse();
@@ -347,7 +401,7 @@ export class ChartProcessor {
this.charts = [
...this.charts.filter(x => x.isGivenDefinition),
..._sortBy(
this.charts.filter(x => !x.isGivenDefinition),
this.charts.filter(x => !x.isGivenDefinition && !x.errorMessage && x.definition.ydefs.length > 0),
chart => -getChartScore(chart)
),
];

View File

@@ -6,13 +6,16 @@ import {
ChartDefinition,
ChartLimits,
ChartXTransformFunction,
ChartYFieldDefinition,
ProcessedChart,
} from './chartDefinitions';
import { addMinutes, addHours, addDays, addMonths, addYears } from 'date-fns';
export function getChartDebugPrint(chart: ProcessedChart) {
let res = '';
res += `Chart: ${chart.definition.chartType} (${chart.definition.xdef.transformFunction})\n`;
res += `Chart: ${chart.definition.chartType} (${chart.definition.xdef.transformFunction}): (${chart.definition.ydefs
.map(yd => yd.field)
.join(', ')})\n`;
for (const key of chart.bucketKeysOrdered) {
res += `${key}: ${_toPairs(chart.buckets[key])
.map(([k, v]) => `${k}=${v}`)
@@ -490,7 +493,7 @@ export function aggregateChartNumericValuesFromSource(
row: any
) {
for (const ydef of chart.definition.ydefs) {
if (numericColumns[ydef.field] == null) {
if (numericColumns[ydef.field] == null && ydef.field != '__count') {
if (row[ydef.field]) {
chart.invalidYRows[ydef.field] = (chart.invalidYRows[ydef.field] || 0) + 1; // increment invalid row count if the field is not numeric
}
@@ -639,3 +642,32 @@ export function fillChartTimelineBuckets(chart: ProcessedChart) {
export function computeChartBucketCardinality(bucket: { [key: string]: any }): number {
return _sumBy(Object.keys(bucket ?? {}), field => bucket[field]);
}
export function getChartYRange(chart: ProcessedChart, ydef: ChartYFieldDefinition) {
let min = null;
let max = null;
for (const obj of Object.values(chart.buckets)) {
const value = obj[ydef.field];
if (value != null) {
if (min === null || value < min) {
min = value;
}
if (max === null || value > max) {
max = value;
}
}
}
return { min, max };
}
export function chartsHaveSimilarRange(range1: number, range2: number) {
if (range1 < 0 && range2 < 0) {
return Math.abs(range1 - range2) / Math.abs(range1) < 0.5;
}
if (range1 > 0 && range2 > 0) {
return Math.abs(range1 - range2) / Math.abs(range1) < 0.5;
}
return false;
}

View File

@@ -55,7 +55,7 @@ const DS2 = [
{
ts1: '2023-10-03T07:10:00Z',
ts2: '2024-10-03T07:10:00Z',
price1: '13',
price1: '22',
price2: '24',
},
{
@@ -112,11 +112,12 @@ const DS4 = [
];
describe('Chart processor', () => {
test.only('Simple by day test, autodetected', () => {
test('Simple by day test, autodetected', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 3));
processor.finalize();
expect(processor.charts.length).toEqual(3);
// console.log(getChartDebugPrint(processor.charts[0]));
expect(processor.charts.length).toEqual(6);
const chart1 = processor.charts.find(x => !x.definition.groupingField && x.definition.xdef.field === 'timestamp');
expect(chart1.definition.xdef.transformFunction).toEqual('date:day');
expect(chart1.definition.ydefs).toEqual([
@@ -126,7 +127,7 @@ describe('Chart processor', () => {
]);
expect(chart1.bucketKeysOrdered).toEqual(['2023-10-01', '2023-10-02', '2023-10-03']);
const chart2 = processor.charts.find(x => x.definition.groupingField);
const chart2 = processor.charts.find(x => x.definition.groupingField && x.definition.xdef.field === 'timestamp');
expect(chart2.definition.xdef.transformFunction).toEqual('date:day');
expect(chart2.bucketKeysOrdered).toEqual(['2023-10-01', '2023-10-02', '2023-10-03']);
expect(chart2.definition.groupingField).toEqual('category');
@@ -134,13 +135,23 @@ describe('Chart processor', () => {
const chart3 = processor.charts.find(x => x.definition.xdef.field === 'category');
expect(chart3.bucketKeysOrdered).toEqual(['A', 'B']);
expect(chart3.definition.groupingField).toBeUndefined();
const countCharts = processor.charts.filter(
x => x.definition.ydefs.length == 1 && x.definition.ydefs[0].field == '__count'
);
expect(countCharts.length).toEqual(3);
});
test('By month grouped, autedetected', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 4));
processor.finalize();
expect(processor.charts.length).toEqual(3);
const chart = processor.charts.find(x => !x.definition.groupingField && x.definition.xdef.field === 'timestamp');
expect(processor.charts.length).toEqual(6);
const chart = processor.charts.find(
x =>
!x.definition.groupingField &&
x.definition.xdef.field === 'timestamp' &&
!x.definition.ydefs.find(y => y.field === '__count')
);
expect(chart.definition.xdef.transformFunction).toEqual('date:month');
expect(chart.bucketKeysOrdered).toEqual([
'2023-10',
@@ -210,7 +221,7 @@ describe('Chart processor', () => {
const processor = new ChartProcessor();
processor.addRows(...DS2);
processor.finalize();
expect(processor.charts.length).toEqual(2);
expect(processor.charts.length).toEqual(4);
expect(processor.charts[0].definition).toEqual(
expect.objectContaining({
xdef: expect.objectContaining({
@@ -253,8 +264,8 @@ describe('Chart processor', () => {
const processor = new ChartProcessor();
processor.addRows(...DS3);
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(processor.charts.length).toEqual(2);
const chart = processor.charts.find(x => !x.definition.ydefs.find(y => y.field === '__count'));
expect(chart.definition.xdef.transformFunction).toEqual('date:day');
expect(chart.definition.ydefs).toEqual([
expect.objectContaining({