import { ChartAvailableColumn, ChartDateParsed, ChartDefinition, ChartLimits, ProcessedChart, } from './chartDefinitions'; import _sortBy from 'lodash/sortBy'; import _sum from 'lodash/sum'; import { aggregateChartNumericValuesFromSource, autoAggregateCompactTimelineChart, computeChartBucketCardinality, computeChartBucketKey, fillChartTimelineBuckets, tryParseChartDate, } from './chartTools'; import { getChartScore, getChartYFieldScore } from './chartScoring'; export class ChartProcessor { chartsProcessing: ProcessedChart[] = []; charts: ProcessedChart[] = []; availableColumnsDict: { [field: string]: ChartAvailableColumn } = {}; availableColumns: ChartAvailableColumn[] = []; autoDetectCharts = false; rowsAdded = 0; errorMessage?: string; constructor(public givenDefinitions: ChartDefinition[] = []) { for (const definition of givenDefinitions) { this.chartsProcessing.push({ definition, rowsAdded: 0, bucketKeysOrdered: [], buckets: {}, bucketKeyDateParsed: {}, isGivenDefinition: true, invalidXRows: 0, invalidYRows: {}, availableColumns: [], validYRows: {}, topDistinctValues: {}, }); } this.autoDetectCharts = this.givenDefinitions.length == 0; } // findOrCreateChart(definition: ChartDefinition, isGivenDefinition: boolean): ProcessedChart { // const signatureItems = [ // definition.chartType, // definition.xdef.field, // definition.xdef.transformFunction, // definition.ydefs.map(y => y.field).join(','), // ]; // const signature = signatureItems.join('::'); // if (this.chartsBySignature[signature]) { // return this.chartsBySignature[signature]; // } // const chart: ProcessedChart = { // definition, // rowsAdded: 0, // bucketKeysOrdered: [], // buckets: {}, // bucketKeyDateParsed: {}, // isGivenDefinition, // }; // this.chartsBySignature[signature] = chart; // return chart; // } addRow(row: any) { const dateColumns: { [key: string]: ChartDateParsed } = {}; const numericColumns: { [key: string]: number } = {}; const numericColumnsForAutodetect: { [key: string]: number } = {}; const stringColumns: { [key: string]: string } = {}; for (const [key, value] of Object.entries(row)) { const number: number = typeof value == 'string' ? Number(value) : typeof value == 'number' ? value : NaN; this.availableColumnsDict[key] = { field: key, }; const keyLower = key.toLowerCase(); const keyIsId = keyLower.endsWith('_id') || keyLower == 'id' || key.endsWith('Id'); const parsedDate = tryParseChartDate(value); if (parsedDate) { dateColumns[key] = parsedDate; continue; } if (!isNaN(number) && isFinite(number)) { numericColumns[key] = number; if (!keyIsId) { numericColumnsForAutodetect[key] = number; // for auto-detecting charts } continue; } if (typeof value === 'string' && isNaN(number) && value.length < 100) { stringColumns[key] = value; } } // const sortedNumericColumnns = Object.keys(numericColumns).sort(); if (this.autoDetectCharts) { // create charts from data, if there are no given definitions for (const datecol in dateColumns) { let usedChart = this.chartsProcessing.find( chart => !chart.isGivenDefinition && chart.definition.xdef.field === datecol && chart.definition.xdef.transformFunction?.startsWith('date:') ); if ( !usedChart && (this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS || this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT) ) { usedChart = { definition: { chartType: 'line', xdef: { field: datecol, transformFunction: 'date:day', }, ydefs: [], }, rowsAdded: 0, bucketKeysOrdered: [], buckets: {}, bucketKeyDateParsed: {}, isGivenDefinition: false, invalidXRows: 0, invalidYRows: {}, availableColumns: [], validYRows: {}, topDistinctValues: {}, }; this.chartsProcessing.push(usedChart); } for (const [key, value] of Object.entries(row)) { if (value == null) continue; if (key == datecol) continue; // skip date column itself let existingYDef = usedChart.definition.ydefs.find(y => y.field === key); if ( !existingYDef && (this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS || usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT) ) { existingYDef = { field: key, aggregateFunction: 'sum', }; usedChart.definition.ydefs.push(existingYDef); } } } } // apply on all charts with this date column for (const chart of this.chartsProcessing) { if (chart.errorMessage) { continue; // skip charts with errors } this.applyRawData( chart, row, dateColumns[chart.definition.xdef.field], chart.isGivenDefinition ? numericColumns : numericColumnsForAutodetect, stringColumns ); if (Object.keys(chart.buckets).length > ChartLimits.CHART_FILL_LIMIT) { chart.errorMessage = `Chart has too many buckets, limit is ${ChartLimits.CHART_FILL_LIMIT}.`; } } for (let i = 0; i < this.chartsProcessing.length; i++) { if (this.chartsProcessing[i].errorMessage) { continue; // skip charts with errors } this.chartsProcessing[i] = autoAggregateCompactTimelineChart(this.chartsProcessing[i]); } this.rowsAdded += 1; if (this.rowsAdded == ChartLimits.APPLY_LIMIT_AFTER_ROWS) { this.applyLimitsOnCharts(); } } applyLimitsOnCharts() { const autodetectProcessingCharts = this.chartsProcessing.filter(chart => !chart.isGivenDefinition); if (autodetectProcessingCharts.length > ChartLimits.AUTODETECT_CHART_LIMIT) { const newAutodetectProcessingCharts = _sortBy( this.chartsProcessing.slice(0, ChartLimits.AUTODETECT_CHART_LIMIT), chart => -getChartScore(chart) ); for (const chart of autodetectProcessingCharts) { chart.definition.ydefs = _sortBy(chart.definition.ydefs, yfield => -getChartYFieldScore(chart, yfield)).slice( 0, ChartLimits.AUTODETECT_MEASURES_LIMIT ); } this.chartsProcessing = [ ...this.chartsProcessing.filter(chart => chart.isGivenDefinition), ...newAutodetectProcessingCharts, ]; } } addRows(...rows: any[]) { for (const row of rows) { this.addRow(row); } } finalize() { this.applyLimitsOnCharts(); this.availableColumns = Object.values(this.availableColumnsDict); for (const chart of this.chartsProcessing) { if (chart.errorMessage) { this.charts.push(chart); continue; } let addedChart: ProcessedChart = chart; if (chart.rowsAdded == 0) { continue; // skip empty charts } const sortOrder = chart.definition.xdef.sortOrder ?? 'ascKeys'; if (sortOrder != 'natural') { if (sortOrder == 'ascKeys' || sortOrder == 'descKeys') { if (chart.definition.chartType == 'line' && chart.definition.xdef.transformFunction.startsWith('date:')) { addedChart = autoAggregateCompactTimelineChart(addedChart); fillChartTimelineBuckets(addedChart); } if (addedChart.errorMessage) { this.charts.push(addedChart); continue; } addedChart.bucketKeysOrdered = _sortBy(Object.keys(addedChart.buckets)); if (sortOrder == 'descKeys') { addedChart.bucketKeysOrdered.reverse(); } } if (sortOrder == 'ascValues' || sortOrder == 'descValues') { addedChart.bucketKeysOrdered = _sortBy(Object.keys(addedChart.buckets), key => computeChartBucketCardinality(addedChart.buckets[key]) ); if (sortOrder == 'descValues') { addedChart.bucketKeysOrdered.reverse(); } } } if (!addedChart.isGivenDefinition) { addedChart = { ...addedChart, definition: { ...addedChart.definition, ydefs: addedChart.definition.ydefs.filter( y => !addedChart.invalidYRows[y.field] && addedChart.validYRows[y.field] / addedChart.rowsAdded >= ChartLimits.VALID_VALUE_RATIO_LIMIT ), }, }; } if (addedChart) { addedChart.availableColumns = this.availableColumns; this.charts.push(addedChart); } this.groupPieOtherBuckets(addedChart); } this.charts = [ ...this.charts.filter(x => x.isGivenDefinition), ..._sortBy( this.charts.filter(x => !x.isGivenDefinition), chart => -getChartScore(chart) ), ]; } groupPieOtherBuckets(chart: ProcessedChart) { if (chart.definition.chartType !== 'pie') { return; // only for pie charts } const ratioLimit = chart.definition.pieRatioLimit ?? ChartLimits.PIE_RATIO_LIMIT; const countLimit = chart.definition.pieCountLimit ?? ChartLimits.PIE_COUNT_LIMIT; if (ratioLimit == 0 && countLimit == 0) { return; // no grouping if limit is 0 } const otherBucket: any = {}; let newBuckets: any = {}; const cardSum = _sum(Object.values(chart.buckets).map(bucket => computeChartBucketCardinality(bucket))); if (cardSum == 0) { return; // no buckets to process } for (const [bucketKey, bucket] of Object.entries(chart.buckets)) { if (computeChartBucketCardinality(bucket) / cardSum < ratioLimit) { for (const field in bucket) { otherBucket[field] = (otherBucket[field] ?? 0) + bucket[field]; } } else { newBuckets[bucketKey] = bucket; } } if (Object.keys(newBuckets).length > countLimit) { const sortedBucketKeys = _sortBy( Object.entries(newBuckets), ([, bucket]) => -computeChartBucketCardinality(bucket) ).map(([key]) => key); const newBuckets2 = {}; sortedBucketKeys.forEach((key, index) => { if (index < countLimit) { newBuckets2[key] = newBuckets[key]; } else { for (const field in newBuckets[key]) { otherBucket[field] = (otherBucket[field] ?? 0) + newBuckets[key][field]; } } }); newBuckets = newBuckets2; } if (Object.keys(otherBucket).length > 0) { newBuckets['Other'] = otherBucket; } chart.buckets = newBuckets; chart.bucketKeysOrdered = [...chart.bucketKeysOrdered, 'Other'].filter(key => key in newBuckets); } applyRawData( chart: ProcessedChart, row: any, dateParsed: ChartDateParsed, numericColumns: { [key: string]: number }, stringColumns: { [key: string]: string } ) { if (chart.definition.xdef == null) { return; } if (row[chart.definition.xdef.field] == null) { return; } if (dateParsed == null && chart.definition.xdef.transformFunction.startsWith('date:')) { chart.invalidXRows += 1; return; // skip if date is invalid } const [bucketKey, bucketKeyParsed] = computeChartBucketKey(dateParsed, chart, row); if (!bucketKey) { return; // skip if no bucket key } if (bucketKeyParsed) { chart.bucketKeyDateParsed[bucketKey] = bucketKeyParsed; } if (chart.minX == null || bucketKey < chart.minX) { chart.minX = bucketKey; } if (chart.maxX == null || bucketKey > chart.maxX) { chart.maxX = bucketKey; } if (!chart.buckets[bucketKey]) { chart.buckets[bucketKey] = {}; if (chart.definition.xdef.sortOrder == 'natural') { chart.bucketKeysOrdered.push(bucketKey); } } aggregateChartNumericValuesFromSource(chart, bucketKey, numericColumns, row); chart.rowsAdded += 1; } }