SYNC: Merge pull request #4 from dbgate/feature/charts

This commit is contained in:
Jan Prochazka
2025-06-09 09:15:21 +02:00
committed by Diflow
parent 6f69205818
commit f03cffe3f8
22 changed files with 1687 additions and 122 deletions

View File

@@ -0,0 +1,84 @@
export type ChartTypeEnum = 'bar' | 'line' | 'pie' | 'polarArea';
export type ChartXTransformFunction =
| 'identity'
| 'date:minute'
| 'date:hour'
| 'date:day'
| 'date:month'
| 'date:year';
export type ChartYAggregateFunction = 'sum' | 'first' | 'last' | 'min' | 'max' | 'count' | 'avg';
export const ChartConstDefaults = {
sortOrder: ' asc',
windowAlign: 'end',
windowSize: 100,
parentAggregateLimit: 200,
};
export const ChartLimits = {
AUTODETECT_CHART_LIMIT: 10, // limit for auto-detecting charts, to avoid too many charts
AUTODETECT_MEASURES_LIMIT: 10, // limit for auto-detecting measures, to avoid too many measures
APPLY_LIMIT_AFTER_ROWS: 100,
MAX_DISTINCT_VALUES: 10, // max number of distinct values to keep in topDistinctValues
VALID_VALUE_RATIO_LIMIT: 0.5, // limit for valid value ratio, y defs below this will not be used in auto-detect
PIE_RATIO_LIMIT: 0.05, // limit for other values in pie chart, if the value is below this, it will be grouped into "Other"
PIE_COUNT_LIMIT: 10, // limit for number of pie chart slices, if the number of slices is above this, it will be grouped into "Other"
};
export interface ChartXFieldDefinition {
field: string;
title?: string;
transformFunction: ChartXTransformFunction;
sortOrder?: 'natural' | 'ascKeys' | 'descKeys' | 'ascValues' | 'descValues';
windowAlign?: 'start' | 'end';
windowSize?: number;
parentAggregateLimit?: number;
}
export interface ChartYFieldDefinition {
field: string;
title?: string;
aggregateFunction: ChartYAggregateFunction;
}
export interface ChartDefinition {
chartType: ChartTypeEnum;
title?: string;
pieRatioLimit?: number; // limit for pie chart, if the value is below this, it will be grouped into "Other"
pieCountLimit?: number; // limit for number of pie chart slices, if the number of slices is above this, it will be grouped into "Other"
xdef: ChartXFieldDefinition;
ydefs: ChartYFieldDefinition[];
}
export interface ChartDateParsed {
year: number;
month?: number;
day?: number;
hour?: number;
minute?: number;
second?: number;
fraction?: string;
}
export interface ChartAvailableColumn {
field: string;
}
export interface ProcessedChart {
minX?: string;
maxX?: string;
rowsAdded: number;
buckets: { [key: string]: any }; // key is the bucket key, value is aggregated data
bucketKeysOrdered: string[];
bucketKeyDateParsed: { [key: string]: ChartDateParsed }; // key is the bucket key, value is parsed date
isGivenDefinition: boolean; // true if the chart was created with a given definition, false if it was created from raw data
invalidXRows: number;
invalidYRows: { [key: string]: number }; // key is the y field, value is the count of invalid rows
validYRows: { [key: string]: number }; // key is the field, value is the count of valid rows
topDistinctValues: { [key: string]: Set<any> }; // key is the field, value is the set of distinct values
availableColumns: ChartAvailableColumn[];
definition: ChartDefinition;
}

View File

@@ -0,0 +1,374 @@
import {
ChartAvailableColumn,
ChartDateParsed,
ChartDefinition,
ChartLimits,
ProcessedChart,
} from './chartDefinitions';
import _sortBy from 'lodash/sortBy';
import _sum from 'lodash/sum';
import {
aggregateChartNumericValuesFromSource,
autoAggregateCompactTimelineChart,
computeChartBucketCardinality,
computeChartBucketKey,
fillChartTimelineBuckets,
tryParseChartDate,
} from './chartTools';
import { getChartScore, getChartYFieldScore } from './chartScoring';
export class ChartProcessor {
chartsProcessing: ProcessedChart[] = [];
charts: ProcessedChart[] = [];
availableColumnsDict: { [field: string]: ChartAvailableColumn } = {};
availableColumns: ChartAvailableColumn[] = [];
autoDetectCharts = false;
rowsAdded = 0;
constructor(public givenDefinitions: ChartDefinition[] = []) {
for (const definition of givenDefinitions) {
this.chartsProcessing.push({
definition,
rowsAdded: 0,
bucketKeysOrdered: [],
buckets: {},
bucketKeyDateParsed: {},
isGivenDefinition: true,
invalidXRows: 0,
invalidYRows: {},
availableColumns: [],
validYRows: {},
topDistinctValues: {},
});
}
this.autoDetectCharts = this.givenDefinitions.length == 0;
}
// findOrCreateChart(definition: ChartDefinition, isGivenDefinition: boolean): ProcessedChart {
// const signatureItems = [
// definition.chartType,
// definition.xdef.field,
// definition.xdef.transformFunction,
// definition.ydefs.map(y => y.field).join(','),
// ];
// const signature = signatureItems.join('::');
// if (this.chartsBySignature[signature]) {
// return this.chartsBySignature[signature];
// }
// const chart: ProcessedChart = {
// definition,
// rowsAdded: 0,
// bucketKeysOrdered: [],
// buckets: {},
// bucketKeyDateParsed: {},
// isGivenDefinition,
// };
// this.chartsBySignature[signature] = chart;
// return chart;
// }
addRow(row: any) {
const dateColumns: { [key: string]: ChartDateParsed } = {};
const numericColumns: { [key: string]: number } = {};
const numericColumnsForAutodetect: { [key: string]: number } = {};
const stringColumns: { [key: string]: string } = {};
for (const [key, value] of Object.entries(row)) {
const number: number = typeof value == 'string' ? Number(value) : typeof value == 'number' ? value : NaN;
this.availableColumnsDict[key] = {
field: key,
};
const keyLower = key.toLowerCase();
const keyIsId = keyLower.endsWith('_id') || keyLower == 'id' || key.endsWith('Id');
const parsedDate = tryParseChartDate(value);
if (parsedDate) {
dateColumns[key] = parsedDate;
continue;
}
if (!isNaN(number) && isFinite(number)) {
numericColumns[key] = number;
if (!keyIsId) {
numericColumnsForAutodetect[key] = number; // for auto-detecting charts
}
continue;
}
if (typeof value === 'string' && isNaN(number) && value.length < 100) {
stringColumns[key] = value;
}
}
// const sortedNumericColumnns = Object.keys(numericColumns).sort();
if (this.autoDetectCharts) {
// create charts from data, if there are no given definitions
for (const datecol in dateColumns) {
let usedChart = this.chartsProcessing.find(
chart =>
!chart.isGivenDefinition &&
chart.definition.xdef.field === datecol &&
chart.definition.xdef.transformFunction?.startsWith('date:')
);
if (
!usedChart &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
this.chartsProcessing.length < ChartLimits.AUTODETECT_CHART_LIMIT)
) {
usedChart = {
definition: {
chartType: 'line',
xdef: {
field: datecol,
transformFunction: 'date:day',
},
ydefs: [],
},
rowsAdded: 0,
bucketKeysOrdered: [],
buckets: {},
bucketKeyDateParsed: {},
isGivenDefinition: false,
invalidXRows: 0,
invalidYRows: {},
availableColumns: [],
validYRows: {},
topDistinctValues: {},
};
this.chartsProcessing.push(usedChart);
}
for (const [key, value] of Object.entries(row)) {
if (value == null) continue;
if (key == datecol) continue; // skip date column itself
let existingYDef = usedChart.definition.ydefs.find(y => y.field === key);
if (
!existingYDef &&
(this.rowsAdded < ChartLimits.APPLY_LIMIT_AFTER_ROWS ||
usedChart.definition.ydefs.length < ChartLimits.AUTODETECT_MEASURES_LIMIT)
) {
existingYDef = {
field: key,
aggregateFunction: 'sum',
};
usedChart.definition.ydefs.push(existingYDef);
}
}
}
}
// apply on all charts with this date column
for (const chart of this.chartsProcessing) {
this.applyRawData(
chart,
row,
dateColumns[chart.definition.xdef.field],
chart.isGivenDefinition ? numericColumns : numericColumnsForAutodetect,
stringColumns
);
}
for (let i = 0; i < this.chartsProcessing.length; i++) {
this.chartsProcessing[i] = autoAggregateCompactTimelineChart(this.chartsProcessing[i]);
}
this.rowsAdded += 1;
if (this.rowsAdded == ChartLimits.APPLY_LIMIT_AFTER_ROWS) {
this.applyLimitsOnCharts();
}
}
applyLimitsOnCharts() {
const autodetectProcessingCharts = this.chartsProcessing.filter(chart => !chart.isGivenDefinition);
if (autodetectProcessingCharts.length > ChartLimits.AUTODETECT_CHART_LIMIT) {
const newAutodetectProcessingCharts = _sortBy(
this.chartsProcessing.slice(0, ChartLimits.AUTODETECT_CHART_LIMIT),
chart => -getChartScore(chart)
);
for (const chart of autodetectProcessingCharts) {
chart.definition.ydefs = _sortBy(chart.definition.ydefs, yfield => -getChartYFieldScore(chart, yfield)).slice(
0,
ChartLimits.AUTODETECT_MEASURES_LIMIT
);
}
this.chartsProcessing = [
...this.chartsProcessing.filter(chart => chart.isGivenDefinition),
...newAutodetectProcessingCharts,
];
}
}
addRows(...rows: any[]) {
for (const row of rows) {
this.addRow(row);
}
}
finalize() {
this.applyLimitsOnCharts();
this.availableColumns = Object.values(this.availableColumnsDict);
for (const chart of this.chartsProcessing) {
let addedChart: ProcessedChart = chart;
if (chart.rowsAdded == 0) {
continue; // skip empty charts
}
const sortOrder = chart.definition.xdef.sortOrder ?? 'ascKeys';
if (sortOrder != 'natural') {
if (sortOrder == 'ascKeys' || sortOrder == 'descKeys') {
if (chart.definition.xdef.transformFunction.startsWith('date:')) {
addedChart = autoAggregateCompactTimelineChart(addedChart);
fillChartTimelineBuckets(addedChart);
}
addedChart.bucketKeysOrdered = _sortBy(Object.keys(addedChart.buckets));
if (sortOrder == 'descKeys') {
addedChart.bucketKeysOrdered.reverse();
}
}
if (sortOrder == 'ascValues' || sortOrder == 'descValues') {
addedChart.bucketKeysOrdered = _sortBy(Object.keys(addedChart.buckets), key =>
computeChartBucketCardinality(addedChart.buckets[key])
);
if (sortOrder == 'descValues') {
addedChart.bucketKeysOrdered.reverse();
}
}
}
if (!addedChart.isGivenDefinition) {
addedChart = {
...addedChart,
definition: {
...addedChart.definition,
ydefs: addedChart.definition.ydefs.filter(
y =>
!addedChart.invalidYRows[y.field] &&
addedChart.validYRows[y.field] / addedChart.rowsAdded >= ChartLimits.VALID_VALUE_RATIO_LIMIT
),
},
};
}
if (addedChart) {
addedChart.availableColumns = this.availableColumns;
this.charts.push(addedChart);
}
this.groupPieOtherBuckets(addedChart);
}
this.charts = [
...this.charts.filter(x => x.isGivenDefinition),
..._sortBy(
this.charts.filter(x => !x.isGivenDefinition),
chart => -getChartScore(chart)
),
];
}
groupPieOtherBuckets(chart: ProcessedChart) {
if (chart.definition.chartType !== 'pie') {
return; // only for pie charts
}
const ratioLimit = chart.definition.pieRatioLimit ?? ChartLimits.PIE_RATIO_LIMIT;
const countLimit = chart.definition.pieCountLimit ?? ChartLimits.PIE_COUNT_LIMIT;
if (ratioLimit == 0 && countLimit == 0) {
return; // no grouping if limit is 0
}
const otherBucket: any = {};
let newBuckets: any = {};
const cardSum = _sum(Object.values(chart.buckets).map(bucket => computeChartBucketCardinality(bucket)));
if (cardSum == 0) {
return; // no buckets to process
}
for (const [bucketKey, bucket] of Object.entries(chart.buckets)) {
if (computeChartBucketCardinality(bucket) / cardSum < ratioLimit) {
for (const field in bucket) {
otherBucket[field] = (otherBucket[field] ?? 0) + bucket[field];
}
} else {
newBuckets[bucketKey] = bucket;
}
}
if (Object.keys(newBuckets).length > countLimit) {
const sortedBucketKeys = _sortBy(
Object.entries(newBuckets),
([, bucket]) => -computeChartBucketCardinality(bucket)
).map(([key]) => key);
const newBuckets2 = {};
sortedBucketKeys.forEach((key, index) => {
if (index < countLimit) {
newBuckets2[key] = newBuckets[key];
} else {
for (const field in newBuckets[key]) {
otherBucket[field] = (otherBucket[field] ?? 0) + newBuckets[key][field];
}
}
});
newBuckets = newBuckets2;
}
if (Object.keys(otherBucket).length > 0) {
newBuckets['Other'] = otherBucket;
}
chart.buckets = newBuckets;
chart.bucketKeysOrdered = [...chart.bucketKeysOrdered, 'Other'].filter(key => key in newBuckets);
}
applyRawData(
chart: ProcessedChart,
row: any,
dateParsed: ChartDateParsed,
numericColumns: { [key: string]: number },
stringColumns: { [key: string]: string }
) {
if (chart.definition.xdef == null) {
return;
}
if (row[chart.definition.xdef.field] == null) {
return;
}
if (dateParsed == null && chart.definition.xdef.transformFunction.startsWith('date:')) {
chart.invalidXRows += 1;
return; // skip if date is invalid
}
const [bucketKey, bucketKeyParsed] = computeChartBucketKey(dateParsed, chart, row);
if (!bucketKey) {
return; // skip if no bucket key
}
if (bucketKeyParsed) {
chart.bucketKeyDateParsed[bucketKey] = bucketKeyParsed;
}
if (chart.minX == null || bucketKey < chart.minX) {
chart.minX = bucketKey;
}
if (chart.maxX == null || bucketKey > chart.maxX) {
chart.maxX = bucketKey;
}
if (!chart.buckets[bucketKey]) {
chart.buckets[bucketKey] = {};
if (chart.definition.xdef.sortOrder == 'natural') {
chart.bucketKeysOrdered.push(bucketKey);
}
}
aggregateChartNumericValuesFromSource(chart, bucketKey, numericColumns, row);
chart.rowsAdded += 1;
}
}

View File

@@ -0,0 +1,23 @@
import _sortBy from 'lodash/sortBy';
import _sum from 'lodash/sum';
import { ChartLimits, ChartYFieldDefinition, ProcessedChart } from './chartDefinitions';
export function getChartScore(chart: ProcessedChart): number {
let res = 0;
res += chart.rowsAdded * 5;
const ydefScores = chart.definition.ydefs.map(yField => getChartYFieldScore(chart, yField));
const sorted = _sortBy(ydefScores).reverse();
res += _sum(sorted.slice(0, ChartLimits.AUTODETECT_MEASURES_LIMIT));
return res;
}
export function getChartYFieldScore(chart: ProcessedChart, yField: ChartYFieldDefinition): number {
let res = 0;
res += chart.validYRows[yField.field] * 5; // score for valid Y rows
res += (chart.topDistinctValues[yField.field]?.size ?? 0) * 20; // score for distinct values in Y field
res += chart.rowsAdded * 2; // base score for rows added
res -= (chart.invalidYRows[yField.field] ?? 0) * 50; // penalty for invalid Y rows
return res;
}

View File

@@ -0,0 +1,542 @@
import _toPairs from 'lodash/toPairs';
import _sumBy from 'lodash/sumBy';
import {
ChartConstDefaults,
ChartDateParsed,
ChartLimits,
ChartXTransformFunction,
ProcessedChart,
} from './chartDefinitions';
import { addMinutes, addHours, addDays, addMonths, addYears } from 'date-fns';
export function getChartDebugPrint(chart: ProcessedChart) {
let res = '';
res += `Chart: ${chart.definition.chartType} (${chart.definition.xdef.transformFunction})\n`;
for (const key of chart.bucketKeysOrdered) {
res += `${key}: ${_toPairs(chart.buckets[key])
.map(([k, v]) => `${k}=${v}`)
.join(', ')}\n`;
}
return res;
}
export function tryParseChartDate(dateInput: any): ChartDateParsed | null {
if (dateInput instanceof Date) {
return {
year: dateInput.getFullYear(),
month: dateInput.getMonth() + 1,
day: dateInput.getDate(),
hour: dateInput.getHours(),
minute: dateInput.getMinutes(),
second: dateInput.getSeconds(),
fraction: undefined, // Date object does not have fraction
};
}
if (typeof dateInput !== 'string') return null;
const m = dateInput.match(
/^(\d{4})-(\d{2})-(\d{2})(?:[ T](\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|[+-]\d{2}:\d{2})?)?$/
);
if (!m) return null;
const [_notUsed, year, month, day, hour, minute, second, fraction] = m;
return {
year: parseInt(year, 10),
month: parseInt(month, 10),
day: parseInt(day, 10),
hour: parseInt(hour, 10) || 0,
minute: parseInt(minute, 10) || 0,
second: parseInt(second, 10) || 0,
fraction: fraction || undefined,
};
}
function pad2Digits(number) {
return ('00' + number).slice(-2);
}
export function stringifyChartDate(value: ChartDateParsed, transform: ChartXTransformFunction): string {
switch (transform) {
case 'date:year':
return `${value.year}`;
case 'date:month':
return `${value.year}-${pad2Digits(value.month)}`;
case 'date:day':
return `${value.year}-${pad2Digits(value.month)}-${pad2Digits(value.day)}`;
case 'date:hour':
return `${value.year}-${pad2Digits(value.month)}-${pad2Digits(value.day)} ${pad2Digits(value.hour)}`;
case 'date:minute':
return `${value.year}-${pad2Digits(value.month)}-${pad2Digits(value.day)} ${pad2Digits(value.hour)}:${pad2Digits(
value.minute
)}`;
default:
return '';
}
}
export function incrementChartDate(value: ChartDateParsed, transform: ChartXTransformFunction): ChartDateParsed {
const dateRepresentation = new Date(
value.year,
(value.month ?? 1) - 1,
value.day ?? 1,
value.hour ?? 0,
value.minute ?? 0
);
let newDateRepresentation: Date;
switch (transform) {
case 'date:year':
newDateRepresentation = addYears(dateRepresentation, 1);
break;
case 'date:month':
newDateRepresentation = addMonths(dateRepresentation, 1);
break;
case 'date:day':
newDateRepresentation = addDays(dateRepresentation, 1);
break;
case 'date:hour':
newDateRepresentation = addHours(dateRepresentation, 1);
break;
case 'date:minute':
newDateRepresentation = addMinutes(dateRepresentation, 1);
break;
}
switch (transform) {
case 'date:year':
return { year: newDateRepresentation.getFullYear() };
case 'date:month':
return {
year: newDateRepresentation.getFullYear(),
month: newDateRepresentation.getMonth() + 1,
};
case 'date:day':
return {
year: newDateRepresentation.getFullYear(),
month: newDateRepresentation.getMonth() + 1,
day: newDateRepresentation.getDate(),
};
case 'date:hour':
return {
year: newDateRepresentation.getFullYear(),
month: newDateRepresentation.getMonth() + 1,
day: newDateRepresentation.getDate(),
hour: newDateRepresentation.getHours(),
};
case 'date:minute':
return {
year: newDateRepresentation.getFullYear(),
month: newDateRepresentation.getMonth() + 1,
day: newDateRepresentation.getDate(),
hour: newDateRepresentation.getHours(),
minute: newDateRepresentation.getMinutes(),
};
}
}
export function computeChartBucketKey(
dateParsed: ChartDateParsed,
chart: ProcessedChart,
row: any
): [string, ChartDateParsed] {
switch (chart.definition.xdef.transformFunction) {
case 'date:year':
return [dateParsed ? `${dateParsed.year}` : null, { year: dateParsed.year }];
case 'date:month':
return [
dateParsed ? `${dateParsed.year}-${pad2Digits(dateParsed.month)}` : null,
{
year: dateParsed.year,
month: dateParsed.month,
},
];
case 'date:day':
return [
dateParsed ? `${dateParsed.year}-${pad2Digits(dateParsed.month)}-${pad2Digits(dateParsed.day)}` : null,
{
year: dateParsed.year,
month: dateParsed.month,
day: dateParsed.day,
},
];
case 'date:hour':
return [
dateParsed
? `${dateParsed.year}-${pad2Digits(dateParsed.month)}-${pad2Digits(dateParsed.day)} ${pad2Digits(
dateParsed.hour
)}`
: null,
{
year: dateParsed.year,
month: dateParsed.month,
day: dateParsed.day,
hour: dateParsed.hour,
},
];
case 'date:minute':
return [
dateParsed
? `${dateParsed.year}-${pad2Digits(dateParsed.month)}-${pad2Digits(dateParsed.day)} ${pad2Digits(
dateParsed.hour
)}:${pad2Digits(dateParsed.minute)}`
: null,
{
year: dateParsed.year,
month: dateParsed.month,
day: dateParsed.day,
hour: dateParsed.hour,
minute: dateParsed.minute,
},
];
case 'identity':
default:
return [row[chart.definition.xdef.field], null];
}
}
export function computeDateBucketDistance(
begin: ChartDateParsed,
end: ChartDateParsed,
transform: ChartXTransformFunction
): number {
switch (transform) {
case 'date:year':
return end.year - begin.year;
case 'date:month':
return (end.year - begin.year) * 12 + (end.month - begin.month);
case 'date:day':
return (
(end.year - begin.year) * 365 +
(end.month - begin.month) * 30 + // rough approximation
(end.day - begin.day)
);
case 'date:hour':
return (
(end.year - begin.year) * 365 * 24 +
(end.month - begin.month) * 30 * 24 + // rough approximation
(end.day - begin.day) * 24 +
(end.hour - begin.hour)
);
case 'date:minute':
return (
(end.year - begin.year) * 365 * 24 * 60 +
(end.month - begin.month) * 30 * 24 * 60 + // rough approximation
(end.day - begin.day) * 24 * 60 +
(end.hour - begin.hour) * 60 +
(end.minute - begin.minute)
);
case 'identity':
default:
return NaN;
}
}
export function compareChartDatesParsed(
a: ChartDateParsed,
b: ChartDateParsed,
transform: ChartXTransformFunction
): number {
switch (transform) {
case 'date:year':
return a.year - b.year;
case 'date:month':
return a.year === b.year ? a.month - b.month : a.year - b.year;
case 'date:day':
return a.year === b.year && a.month === b.month
? a.day - b.day
: a.year === b.year
? a.month - b.month
: a.year - b.year;
case 'date:hour':
return a.year === b.year && a.month === b.month && a.day === b.day
? a.hour - b.hour
: a.year === b.year && a.month === b.month
? a.day - b.day
: a.year === b.year
? a.month - b.month
: a.year - b.year;
case 'date:minute':
return a.year === b.year && a.month === b.month && a.day === b.day && a.hour === b.hour
? a.minute - b.minute
: a.year === b.year && a.month === b.month && a.day === b.day
? a.hour - b.hour
: a.year === b.year && a.month === b.month
? a.day - b.day
: a.year === b.year
? a.month - b.month
: a.year - b.year;
}
}
function getParentDateBucketKey(bucketKey: string, transform: ChartXTransformFunction): string | null {
switch (transform) {
case 'date:year':
return null; // no parent for year
case 'date:month':
return bucketKey.slice(0, 4);
case 'date:day':
return bucketKey.slice(0, 7);
case 'date:hour':
return bucketKey.slice(0, 10);
case 'date:minute':
return bucketKey.slice(0, 13);
}
}
function getParentDateBucketTransform(transform: ChartXTransformFunction): ChartXTransformFunction | null {
switch (transform) {
case 'date:year':
return null; // no parent for year
case 'date:month':
return 'date:year';
case 'date:day':
return 'date:month';
case 'date:hour':
return 'date:day';
case 'date:minute':
return 'date:hour';
default:
return null;
}
}
function getParentKeyParsed(date: ChartDateParsed, transform: ChartXTransformFunction): ChartDateParsed | null {
switch (transform) {
case 'date:year':
return null; // no parent for year
case 'date:month':
return { year: date.year };
case 'date:day':
return { year: date.year, month: date.month };
case 'date:hour':
return { year: date.year, month: date.month, day: date.day };
case 'date:minute':
return { year: date.year, month: date.month, day: date.day, hour: date.hour };
default:
return null;
}
}
function createParentChartAggregation(chart: ProcessedChart): ProcessedChart | null {
if (chart.isGivenDefinition) {
// if the chart is created with a given definition, we cannot create a parent aggregation
return null;
}
const parentTransform = getParentDateBucketTransform(chart.definition.xdef.transformFunction);
if (!parentTransform) {
return null;
}
const res: ProcessedChart = {
definition: {
...chart.definition,
xdef: {
...chart.definition.xdef,
transformFunction: parentTransform,
},
},
rowsAdded: chart.rowsAdded,
bucketKeysOrdered: [],
buckets: {},
bucketKeyDateParsed: {},
isGivenDefinition: false,
invalidXRows: chart.invalidXRows,
invalidYRows: { ...chart.invalidYRows }, // copy invalid Y rows
validYRows: { ...chart.validYRows }, // copy valid Y rows
topDistinctValues: { ...chart.topDistinctValues }, // copy top distinct values
availableColumns: chart.availableColumns,
};
for (const [bucketKey, bucketValues] of Object.entries(chart.buckets)) {
const parentKey = getParentDateBucketKey(bucketKey, chart.definition.xdef.transformFunction);
if (!parentKey) {
// skip if the bucket is already a parent
continue;
}
res.bucketKeyDateParsed[parentKey] = getParentKeyParsed(
chart.bucketKeyDateParsed[bucketKey],
chart.definition.xdef.transformFunction
);
aggregateChartNumericValuesFromChild(res, parentKey, bucketValues);
}
const bucketKeys = Object.keys(res.buckets).sort();
res.minX = bucketKeys.length > 0 ? bucketKeys[0] : null;
res.maxX = bucketKeys.length > 0 ? bucketKeys[bucketKeys.length - 1] : null;
return res;
}
export function autoAggregateCompactTimelineChart(chart: ProcessedChart) {
while (true) {
const fromParsed = chart.bucketKeyDateParsed[chart.minX];
const toParsed = chart.bucketKeyDateParsed[chart.maxX];
if (!fromParsed || !toParsed) {
return chart; // cannot fill timeline buckets without valid date range
}
const transform = chart.definition.xdef.transformFunction;
if (!transform.startsWith('date:')) {
return chart; // cannot aggregate non-date charts
}
const dateDistance = computeDateBucketDistance(fromParsed, toParsed, transform);
if (dateDistance < (chart.definition.xdef.parentAggregateLimit ?? ChartConstDefaults.parentAggregateLimit)) {
return chart; // no need to aggregate further, the distance is less than the limit
}
const parentChart = createParentChartAggregation(chart);
if (!parentChart) {
return chart; // cannot create parent aggregation
}
chart = parentChart;
}
}
export function aggregateChartNumericValuesFromSource(
chart: ProcessedChart,
bucketKey: string,
numericColumns: { [key: string]: number },
row: any
) {
for (const ydef of chart.definition.ydefs) {
if (numericColumns[ydef.field] == null) {
if (row[ydef.field]) {
chart.invalidYRows[ydef.field] = (chart.invalidYRows[ydef.field] || 0) + 1; // increment invalid row count if the field is not numeric
}
continue;
}
chart.validYRows[ydef.field] = (chart.validYRows[ydef.field] || 0) + 1; // increment valid row count
let distinctValues = chart.topDistinctValues[ydef.field];
if (!distinctValues) {
distinctValues = new Set();
chart.topDistinctValues[ydef.field] = distinctValues;
}
if (distinctValues.size < ChartLimits.MAX_DISTINCT_VALUES) {
chart.topDistinctValues[ydef.field].add(numericColumns[ydef.field]);
}
switch (ydef.aggregateFunction) {
case 'sum':
chart.buckets[bucketKey][ydef.field] =
(chart.buckets[bucketKey][ydef.field] || 0) + (numericColumns[ydef.field] || 0);
break;
case 'first':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = numericColumns[ydef.field];
}
break;
case 'last':
chart.buckets[bucketKey][ydef.field] = numericColumns[ydef.field];
break;
case 'min':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = numericColumns[ydef.field];
} else {
chart.buckets[bucketKey][ydef.field] = Math.min(
chart.buckets[bucketKey][ydef.field],
numericColumns[ydef.field]
);
}
break;
case 'max':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = numericColumns[ydef.field];
} else {
chart.buckets[bucketKey][ydef.field] = Math.max(
chart.buckets[bucketKey][ydef.field],
numericColumns[ydef.field]
);
}
break;
case 'count':
chart.buckets[bucketKey][ydef.field] = (chart.buckets[bucketKey][ydef.field] || 0) + 1;
break;
case 'avg':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = [numericColumns[ydef.field], 1]; // [sum, count]
} else {
chart.buckets[bucketKey][ydef.field][0] += numericColumns[ydef.field];
chart.buckets[bucketKey][ydef.field][1] += 1;
}
break;
}
}
}
export function aggregateChartNumericValuesFromChild(
chart: ProcessedChart,
bucketKey: string,
childBucketValues: { [key: string]: any }
) {
for (const ydef of chart.definition.ydefs) {
if (childBucketValues[ydef.field] == undefined) {
continue; // skip if the field is not present in the child bucket
}
if (!chart.buckets[bucketKey]) {
chart.buckets[bucketKey] = {};
}
switch (ydef.aggregateFunction) {
case 'sum':
case 'count':
chart.buckets[bucketKey][ydef.field] =
(chart.buckets[bucketKey][ydef.field] || 0) + (childBucketValues[ydef.field] || 0);
break;
case 'min':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = childBucketValues[ydef.field];
} else {
chart.buckets[bucketKey][ydef.field] = Math.min(
chart.buckets[bucketKey][ydef.field],
childBucketValues[ydef.field]
);
}
break;
case 'max':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = childBucketValues[ydef.field];
} else {
chart.buckets[bucketKey][ydef.field] = Math.max(
chart.buckets[bucketKey][ydef.field],
childBucketValues[ydef.field]
);
}
break;
case 'avg':
if (chart.buckets[bucketKey][ydef.field] === undefined) {
chart.buckets[bucketKey][ydef.field] = childBucketValues[ydef.field];
} else {
chart.buckets[bucketKey][ydef.field][0] += childBucketValues[ydef.field][0];
chart.buckets[bucketKey][ydef.field][1] += childBucketValues[ydef.field][1];
}
break;
case 'first':
case 'last':
throw new Error(`Cannot aggregate ${ydef.aggregateFunction} for ${ydef.field} in child bucket`);
}
}
}
export function fillChartTimelineBuckets(chart: ProcessedChart) {
const fromParsed = chart.bucketKeyDateParsed[chart.minX];
const toParsed = chart.bucketKeyDateParsed[chart.maxX];
if (!fromParsed || !toParsed) {
return; // cannot fill timeline buckets without valid date range
}
const transform = chart.definition.xdef.transformFunction;
let currentParsed = fromParsed;
while (compareChartDatesParsed(currentParsed, toParsed, transform) <= 0) {
const bucketKey = stringifyChartDate(currentParsed, transform);
if (!chart.buckets[bucketKey]) {
chart.buckets[bucketKey] = {};
chart.bucketKeyDateParsed[bucketKey] = currentParsed;
}
currentParsed = incrementChartDate(currentParsed, transform);
}
}
export function computeChartBucketCardinality(bucket: { [key: string]: any }): number {
return _sumBy(Object.keys(bucket), field => bucket[field]);
}

View File

@@ -23,3 +23,5 @@ export * from './FreeTableGridDisplay';
export * from './FreeTableModel';
export * from './CustomGridDisplay';
export * from './ScriptDrivedDeployer';
export * from './chartDefinitions';
export * from './chartProcessor';

View File

@@ -0,0 +1,376 @@
import exp from 'constants';
import { ChartProcessor } from '../chartProcessor';
import { getChartDebugPrint } from '../chartTools';
const DS1 = [
{
timestamp: '2023-10-01T12:00:00Z',
value: 42.5,
category: 'B',
related_id: 12,
},
{
timestamp: '2023-10-02T10:05:00Z',
value: 12,
category: 'A',
related_id: 13,
},
{
timestamp: '2023-10-03T07:10:00Z',
value: 57,
category: 'A',
related_id: 5,
},
{
timestamp: '2024-08-03T07:10:00Z',
value: 33,
category: 'B',
related_id: 22,
},
];
const DS2 = [
{
ts1: '2023-10-01T12:00:00Z',
ts2: '2024-10-01T12:00:00Z',
dummy1: 1,
dummy2: 1,
dummy3: 1,
dummy4: 1,
dummy5: 1,
dummy6: 1,
dummy7: 1,
dummy8: 1,
dummy9: 1,
dummy10: 1,
price1: '11',
price2: '22',
},
{
ts1: '2023-10-02T10:05:00Z',
ts2: '2024-10-02T10:05:00Z',
price1: '12',
price2: '23',
},
{
ts1: '2023-10-03T07:10:00Z',
ts2: '2024-10-03T07:10:00Z',
price1: '13',
price2: '24',
},
{
ts1: '2023-11-04T12:00:00Z',
ts2: '2024-11-04T12:00:00Z',
price1: 1,
price2: 2,
},
];
const DS3 = [
{
timestamp: '2023-10-01T12:00:00Z',
value: 42.5,
bitval: true,
},
{
timestamp: '2023-10-02T10:05:00Z',
value: 12,
bitval: false,
},
{
timestamp: '2023-10-03T07:10:00Z',
value: 57,
bitval: null,
},
];
const DS4 = [
{
object_id: 710293590,
ObjectName: 'Journal',
Total_Reserved_kb: '68696',
RowsCount: '405452',
},
{
object_id: 182291709,
ObjectName: 'Employee',
Total_Reserved_kb: '732008',
RowsCount: '1980067',
},
{
object_id: 23432525,
ObjectName: 'User',
Total_Reserved_kb: '325352',
RowsCount: '2233',
},
{
object_id: 4985159,
ObjectName: 'Project',
Total_Reserved_kb: '293523',
RowsCount: '1122',
},
];
describe('Chart processor', () => {
test('Simple by day test, autodetected', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 3));
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.definition.xdef.transformFunction).toEqual('date:day');
expect(chart.definition.ydefs).toEqual([
expect.objectContaining({
field: 'value',
}),
]);
expect(chart.bucketKeysOrdered).toEqual(['2023-10-01', '2023-10-02', '2023-10-03']);
});
test('By month grouped, autedetected', () => {
const processor = new ChartProcessor();
processor.addRows(...DS1.slice(0, 4));
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.definition.xdef.transformFunction).toEqual('date:month');
expect(chart.bucketKeysOrdered).toEqual([
'2023-10',
'2023-11',
'2023-12',
'2024-01',
'2024-02',
'2024-03',
'2024-04',
'2024-05',
'2024-06',
'2024-07',
'2024-08',
]);
});
test('Detect columns', () => {
const processor = new ChartProcessor();
processor.autoDetectCharts = false;
processor.addRows(...DS1);
processor.finalize();
expect(processor.charts.length).toEqual(0);
expect(processor.availableColumns).toEqual([
expect.objectContaining({
field: 'timestamp',
}),
expect.objectContaining({
field: 'value',
}),
expect.objectContaining({
field: 'category',
}),
expect.objectContaining({
field: 'related_id',
}),
]);
});
test('Explicit definition', () => {
const processor = new ChartProcessor([
{
chartType: 'pie',
xdef: {
field: 'category',
transformFunction: 'identity',
sortOrder: 'natural',
},
ydefs: [
{
field: 'related_id',
aggregateFunction: 'sum',
},
],
},
]);
processor.addRows(...DS1);
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.definition.xdef.transformFunction).toEqual('identity');
expect(chart.bucketKeysOrdered).toEqual(['B', 'A']);
expect(chart.buckets).toEqual({
B: { related_id: 34 },
A: { related_id: 18 },
});
});
test('Two data sets with different date columns', () => {
const processor = new ChartProcessor();
processor.addRows(...DS2);
processor.finalize();
expect(processor.charts.length).toEqual(2);
expect(processor.charts[0].definition).toEqual(
expect.objectContaining({
xdef: expect.objectContaining({
field: 'ts1',
transformFunction: 'date:day',
}),
ydefs: [
expect.objectContaining({
field: 'price1',
aggregateFunction: 'sum',
}),
expect.objectContaining({
field: 'price2',
aggregateFunction: 'sum',
}),
],
})
);
expect(processor.charts[1].definition).toEqual(
expect.objectContaining({
xdef: expect.objectContaining({
field: 'ts2',
transformFunction: 'date:day',
}),
ydefs: [
expect.objectContaining({
field: 'price1',
aggregateFunction: 'sum',
}),
expect.objectContaining({
field: 'price2',
aggregateFunction: 'sum',
}),
],
})
);
});
test('Exclude boolean fields in autodetected', () => {
const processor = new ChartProcessor();
processor.addRows(...DS3);
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.definition.xdef.transformFunction).toEqual('date:day');
expect(chart.definition.ydefs).toEqual([
expect.objectContaining({
field: 'value',
}),
]);
});
test('Added field manual from GUI', () => {
const processor = new ChartProcessor([
{
chartType: 'bar',
xdef: {
field: 'object_id',
transformFunction: 'identity',
},
ydefs: [
{
field: 'object_id',
aggregateFunction: 'sum',
},
],
},
]);
processor.addRows(...DS4);
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.definition.xdef.transformFunction).toEqual('identity');
expect(chart.definition.ydefs).toEqual([
expect.objectContaining({
field: 'object_id',
aggregateFunction: 'sum',
}),
]);
});
const PieMainTestData = [
['natural', ['Journal', 'Employee', 'User', 'Project']],
['ascKeys', ['Employee', 'Journal', 'Project', 'User']],
['descKeys', ['User', 'Project', 'Journal', 'Employee']],
['ascValues', ['Project', 'User', 'Journal', 'Employee']],
['descValues', ['Employee', 'Journal', 'User', 'Project']],
];
test.each(PieMainTestData)('Pie chart - used space for DB objects (%s)', (sortOrder, expectedOrder) => {
const processor = new ChartProcessor([
{
chartType: 'bar',
xdef: {
field: 'ObjectName',
transformFunction: 'identity',
sortOrder: sortOrder as any,
},
ydefs: [
{
field: 'RowsCount',
aggregateFunction: 'sum',
},
],
},
]);
processor.addRows(...DS4);
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.bucketKeysOrdered).toEqual(expectedOrder);
expect(chart.buckets).toEqual({
Employee: { RowsCount: 1980067 },
Journal: { RowsCount: 405452 },
Project: { RowsCount: 1122 },
User: { RowsCount: 2233 },
});
});
const PieOtherTestData = [
[
'ratio',
0.1,
5,
['Employee', 'Journal', 'Other'],
{
Employee: { RowsCount: 1980067 },
Journal: { RowsCount: 405452 },
Other: { RowsCount: 3355 },
},
],
[
'count',
0,
1,
['Employee', 'Other'],
{
Employee: { RowsCount: 1980067 },
Other: { RowsCount: 408807 },
},
],
];
test.each(PieOtherTestData)(
'Pie limit test - %s',
(_description, pieRatioLimit, pieCountLimit, expectedOrder, expectedBuckets) => {
const processor = new ChartProcessor([
{
chartType: 'pie',
pieRatioLimit: pieRatioLimit as number,
pieCountLimit: pieCountLimit as number,
xdef: {
field: 'ObjectName',
transformFunction: 'identity',
},
ydefs: [
{
field: 'RowsCount',
aggregateFunction: 'sum',
},
],
},
]);
processor.addRows(...DS4);
processor.finalize();
expect(processor.charts.length).toEqual(1);
const chart = processor.charts[0];
expect(chart.bucketKeysOrdered).toEqual(expectedOrder);
expect(chart.buckets).toEqual(expectedBuckets);
}
);
});