diff --git a/packages/datalib/src/chartDefinitions.ts b/packages/datalib/src/chartDefinitions.ts index c3d4560dd..0dd7d0913 100644 --- a/packages/datalib/src/chartDefinitions.ts +++ b/packages/datalib/src/chartDefinitions.ts @@ -82,7 +82,7 @@ export interface ProcessedChart { buckets: { [key: string]: any }; // key is the bucket key, value is aggregated data bucketKeysOrdered: string[]; bucketKeysSet: Set; - bucketKeyDateParsed: { [key: string]: ChartDateParsed }; // key is the bucket key, value is parsed date + bucketKeyDateParsed: { [key: string]: ChartDateParsed }; // key is the bucket key (without group::), value is parsed date isGivenDefinition: boolean; // true if the chart was created with a given definition, false if it was created from raw data invalidXRows: number; invalidYRows: { [key: string]: number }; // key is the y field, value is the count of invalid rows diff --git a/packages/datalib/src/chartProcessor.ts b/packages/datalib/src/chartProcessor.ts index 6ee51873e..b12e88b8b 100644 --- a/packages/datalib/src/chartProcessor.ts +++ b/packages/datalib/src/chartProcessor.ts @@ -73,61 +73,71 @@ export class ChartProcessor { // this.chartsBySignature[signature] = chart; // return chart; // } - runAutoDetectCharts(row, dateColumns: { [key: string]: ChartDateParsed }) { + runAutoDetectCharts( + row, + dateColumns: { [key: string]: ChartDateParsed }, + numericColumnsForAutodetect: { [key: string]: number }, + stringColumns: { [key: string]: string } + ) { // create charts from data, if there are no given definitions for (const datecol in dateColumns) { - let usedChart = this.chartsProcessing.find( - chart => - !chart.isGivenDefinition && - chart.definition.xdef.field === datecol && - chart.definition.xdef.transformFunction?.startsWith('date:') - ); + for (const groupingField of [undefined, ...Object.keys(stringColumns)]) { + let usedChart = this.chartsProcessing.find( + chart => + !chart.isGivenDefinition && + chart.definition.xdef.field === datecol && + chart.definition.xdef.transformFunction?.startsWith('date:') && + chart.definition.groupingField == groupingField + ); - if ( - !usedChart && - (this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS || - this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT) - ) { - usedChart = { - definition: { - chartType: 'timeline', - xdef: { - field: datecol, - transformFunction: 'date:day', - }, - ydefs: [], - }, - rowsAdded: 0, - bucketKeysOrdered: [], - buckets: {}, - groups: [], - bucketKeyDateParsed: {}, - isGivenDefinition: false, - invalidXRows: 0, - invalidYRows: {}, - availableColumns: [], - validYRows: {}, - topDistinctValues: {}, - groupSet: new Set(), - bucketKeysSet: new Set(), - }; - this.chartsProcessing.push(usedChart); - } - - for (const [key, value] of Object.entries(row)) { - if (value == null) continue; - if (key == datecol) continue; // skip date column itself - const existingYDef = usedChart.definition.ydefs.find(y => y.field === key); if ( - !existingYDef && + !usedChart && (this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS || - usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT) + this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT) ) { - const newYDef: ChartYFieldDefinition = { - field: key, - aggregateFunction: 'sum', + usedChart = { + definition: { + chartType: 'timeline', + xdef: { + field: datecol, + transformFunction: 'date:day', + }, + ydefs: [], + groupingField, + }, + rowsAdded: 0, + bucketKeysOrdered: [], + buckets: {}, + groups: [], + bucketKeyDateParsed: {}, + isGivenDefinition: false, + invalidXRows: 0, + invalidYRows: {}, + availableColumns: [], + validYRows: {}, + topDistinctValues: {}, + groupSet: new Set(), + bucketKeysSet: new Set(), }; - usedChart.definition.ydefs.push(newYDef); + this.chartsProcessing.push(usedChart); + } + + for (const [key, value] of Object.entries(numericColumnsForAutodetect)) { + // if (value == null) continue; + // if (key == datecol) continue; // skip date column itself + + const existingYDef = usedChart.definition.ydefs.find(y => y.field === key); + if ( + !existingYDef && + (this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS || + usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT) + ) { + const newYDef: ChartYFieldDefinition = { + field: key, + aggregateFunction: 'sum', + }; + usedChart.definition.ydefs.push(newYDef); + } } } } @@ -193,7 +203,7 @@ export class ChartProcessor { // const sortedNumericColumnns = Object.keys(numericColumns).sort(); if (this.autoDetectCharts) { - this.runAutoDetectCharts(row, dateColumns); + this.runAutoDetectCharts(row, dateColumns, numericColumnsForAutodetect, stringColumns); } // apply on all charts with this date column @@ -202,13 +212,7 @@ export class ChartProcessor { continue; // skip charts with errors } - this.applyRawData( - chart, - row, - dateColumns[chart.definition.xdef.field], - chart.isGivenDefinition ? numericColumns : numericColumnsForAutodetect, - stringColumns - ); + this.applyRawData(chart, row, dateColumns[chart.definition.xdef.field], numericColumns, stringColumns); if (Object.keys(chart.buckets).length > ChartLimits.CHART_FILL_LIMIT) { chart.errorMessage = `Chart has too many buckets, limit is ${ChartLimits.CHART_FILL_LIMIT}.`; diff --git a/packages/datalib/src/chartScoring.ts b/packages/datalib/src/chartScoring.ts index b4c10861d..5fffa8f12 100644 --- a/packages/datalib/src/chartScoring.ts +++ b/packages/datalib/src/chartScoring.ts @@ -3,12 +3,23 @@ import _sum from 'lodash/sum'; import { ChartLimits, ChartYFieldDefinition, ProcessedChart } from './chartDefinitions'; export function getChartScore(chart: ProcessedChart): number { + if (chart.errorMessage) { + return -1; // negative score for charts with errors + } let res = 0; res += chart.rowsAdded * 5; const ydefScores = chart.definition.ydefs.map(yField => getChartYFieldScore(chart, yField)); const sorted = _sortBy(ydefScores).reverse(); res += _sum(sorted.slice(0, ChartLimits.AUTODETECT_MEASURES_LIMIT)); + + if (chart.groupSet?.size >= 2 && chart.groupSet?.size <= 6) { + res += 50; // bonus for nice grouping + } + if (chart.groupSet?.size == 1) { + res -= 20; // penalty for single group + } + return res; } diff --git a/packages/datalib/src/chartTools.ts b/packages/datalib/src/chartTools.ts index e9eedef9c..7712fd1e9 100644 --- a/packages/datalib/src/chartTools.ts +++ b/packages/datalib/src/chartTools.ts @@ -3,6 +3,7 @@ import _sumBy from 'lodash/sumBy'; import { ChartConstDefaults, ChartDateParsed, + ChartDefinition, ChartLimits, ChartXTransformFunction, ProcessedChart, @@ -326,6 +327,14 @@ export function compareChartDatesParsed( } } +function extractBucketKeyWithoutGroup(bucketKey: string, definition: ChartDefinition): string { + if (definition.groupingField) { + const [_group, key] = bucketKey.split('::', 2); + return key || bucketKey; + } + return bucketKey; +} + function getParentDateBucketKey( bucketKey: string, transform: ChartXTransformFunction, @@ -424,21 +433,21 @@ function createParentChartAggregation(chart: ProcessedChart): ProcessedChart | n res.bucketKeysSet.add(getParentDateBucketKey(bucketKey, chart.definition.xdef.transformFunction, false)); } - for (const [bucketKey, bucketValues] of Object.entries(chart.buckets)) { - const parentKey = getParentDateBucketKey( - bucketKey, + for (const [groupedBucketKey, bucketValues] of Object.entries(chart.buckets)) { + const groupedParentKey = getParentDateBucketKey( + groupedBucketKey, chart.definition.xdef.transformFunction, !!chart.definition.groupingField ); - if (!parentKey) { + if (!groupedParentKey) { // skip if the bucket is already a parent continue; } - res.bucketKeyDateParsed[parentKey] = getParentKeyParsed( - chart.bucketKeyDateParsed[bucketKey], + res.bucketKeyDateParsed[extractBucketKeyWithoutGroup(groupedParentKey, chart.definition)] = getParentKeyParsed( + chart.bucketKeyDateParsed[extractBucketKeyWithoutGroup(groupedBucketKey, chart.definition)], chart.definition.xdef.transformFunction ); - aggregateChartNumericValuesFromChild(res, parentKey, bucketValues); + aggregateChartNumericValuesFromChild(res, groupedParentKey, bucketValues); } const bucketKeys = Object.keys(res.buckets).sort(); diff --git a/packages/datalib/src/tests/chartProcessor.test.ts b/packages/datalib/src/tests/chartProcessor.test.ts index 8b7968c72..7366a3930 100644 --- a/packages/datalib/src/tests/chartProcessor.test.ts +++ b/packages/datalib/src/tests/chartProcessor.test.ts @@ -116,8 +116,8 @@ describe('Chart processor', () => { const processor = new ChartProcessor(); processor.addRows(...DS1.slice(0, 3)); processor.finalize(); - expect(processor.charts.length).toEqual(1); - const chart = processor.charts[0]; + expect(processor.charts.length).toEqual(2); + const chart = processor.charts.find(x => !x.definition.groupingField); expect(chart.definition.xdef.transformFunction).toEqual('date:day'); expect(chart.definition.ydefs).toEqual([ expect.objectContaining({ @@ -130,8 +130,8 @@ describe('Chart processor', () => { const processor = new ChartProcessor(); processor.addRows(...DS1.slice(0, 4)); processor.finalize(); - expect(processor.charts.length).toEqual(1); - const chart = processor.charts[0]; + expect(processor.charts.length).toEqual(2); + const chart = processor.charts.find(x => !x.definition.groupingField); expect(chart.definition.xdef.transformFunction).toEqual('date:month'); expect(chart.bucketKeysOrdered).toEqual([ '2023-10', @@ -393,8 +393,7 @@ describe('Chart processor', () => { expect(chart.definition.xdef.transformFunction).toEqual('date:day'); // console.log(getChartDebugPrint(processor.charts[0])); - - + // expect(chart.definition.xdef.transformFunction).toEqual('date:day'); // expect(chart.definition.ydefs).toEqual([ // expect.objectContaining({