SYNC: Merge pull request #6 from dbgate/feature/ai-assistant

This commit is contained in:
Jan Prochazka
2025-07-23 15:45:36 +02:00
committed by Diflow
parent 082d0aa02f
commit c07e19c898
15 changed files with 883 additions and 174 deletions

View File

@@ -0,0 +1,41 @@
<script>
/* npm i highlight.js sql-formatter */
import hljs from 'highlight.js/lib/core';
import sqlGrammar from './sqlGrammar';
import { onMount, afterUpdate } from 'svelte';
export let code = '';
let domCode;
onMount(() => {
hljs.registerLanguage('sql', sqlGrammar);
// first paint
if (domCode) {
hljs.highlightElement(domCode);
}
});
afterUpdate(() => {
if (domCode) {
hljs.highlightElement(domCode);
}
});
</script>
{#key code}
<!--
The `sql` class hints the language; highlight.js will
read it even though we register the grammar explicitly.
-->
<pre bind:this={domCode} class="sql">{code}</pre>
{/key}
<style>
pre {
margin: 0;
padding: 0;
padding: 0.5em;
}
</style>

View File

@@ -0,0 +1,34 @@
<script>
import hljs from 'highlight.js/lib/core';
import xmlGrammar from './xmlGrammar';
import xmlFormat from 'xml-formatter';
import { afterUpdate, onMount } from 'svelte';
export let code = '';
$: formattedCode = xmlFormat(code, { indentation: ' ', throwOnFailure: false });
onMount(() => {
hljs.registerLanguage('xml', xmlGrammar);
});
afterUpdate(() => {
if (codeBlock) {
hljs.highlightElement(codeBlock);
}
});
let codeBlock;
</script>
{#key formattedCode}
<pre bind:this={codeBlock}>{formattedCode}</pre>
{/key}
<style>
pre {
margin: 0;
padding: 0;
padding: 0.5em;
}
</style>

View File

@@ -0,0 +1,691 @@
/*
Language: SQL
Website: https://en.wikipedia.org/wiki/SQL
Category: common, database
*/
/*
Goals:
SQL is intended to highlight basic/common SQL keywords and expressions
- If pretty much every single SQL server includes supports, then it's a canidate.
- It is NOT intended to include tons of vendor specific keywords (Oracle, MySQL,
PostgreSQL) although the list of data types is purposely a bit more expansive.
- For more specific SQL grammars please see:
- PostgreSQL and PL/pgSQL - core
- T-SQL - https://github.com/highlightjs/highlightjs-tsql
- sql_more (core)
*/
export default function(hljs) {
const regex = hljs.regex;
const COMMENT_MODE = hljs.COMMENT('--', '$');
const STRING = {
scope: 'string',
variants: [
{
begin: /'/,
end: /'/,
contains: [ { match: /''/ } ]
}
]
};
const QUOTED_IDENTIFIER = {
begin: /"/,
end: /"/,
contains: [ { match: /""/ } ]
};
const LITERALS = [
"true",
"false",
// Not sure it's correct to call NULL literal, and clauses like IS [NOT] NULL look strange that way.
// "null",
"unknown"
];
const MULTI_WORD_TYPES = [
"double precision",
"large object",
"with timezone",
"without timezone"
];
const TYPES = [
'bigint',
'binary',
'blob',
'boolean',
'char',
'character',
'clob',
'date',
'dec',
'decfloat',
'decimal',
'float',
'int',
'integer',
'interval',
'nchar',
'nclob',
'national',
'numeric',
'real',
'row',
'smallint',
'time',
'timestamp',
'varchar',
'varying', // modifier (character varying)
'varbinary'
];
const NON_RESERVED_WORDS = [
"add",
"asc",
"collation",
"desc",
"final",
"first",
"last",
"view"
];
// https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#reserved-word
const RESERVED_WORDS = [
"abs",
"acos",
"all",
"allocate",
"alter",
"and",
"any",
"are",
"array",
"array_agg",
"array_max_cardinality",
"as",
"asensitive",
"asin",
"asymmetric",
"at",
"atan",
"atomic",
"authorization",
"avg",
"begin",
"begin_frame",
"begin_partition",
"between",
"bigint",
"binary",
"blob",
"boolean",
"both",
"by",
"call",
"called",
"cardinality",
"cascaded",
"case",
"cast",
"ceil",
"ceiling",
"char",
"char_length",
"character",
"character_length",
"check",
"classifier",
"clob",
"close",
"coalesce",
"collate",
"collect",
"column",
"commit",
"condition",
"connect",
"constraint",
"contains",
"convert",
"copy",
"corr",
"corresponding",
"cos",
"cosh",
"count",
"covar_pop",
"covar_samp",
"create",
"cross",
"cube",
"cume_dist",
"current",
"current_catalog",
"current_date",
"current_default_transform_group",
"current_path",
"current_role",
"current_row",
"current_schema",
"current_time",
"current_timestamp",
"current_path",
"current_role",
"current_transform_group_for_type",
"current_user",
"cursor",
"cycle",
"date",
"day",
"deallocate",
"dec",
"decimal",
"decfloat",
"declare",
"default",
"define",
"delete",
"dense_rank",
"deref",
"describe",
"deterministic",
"disconnect",
"distinct",
"double",
"drop",
"dynamic",
"each",
"element",
"else",
"empty",
"end",
"end_frame",
"end_partition",
"end-exec",
"equals",
"escape",
"every",
"except",
"exec",
"execute",
"exists",
"exp",
"external",
"extract",
"false",
"fetch",
"filter",
"first_value",
"float",
"floor",
"for",
"foreign",
"frame_row",
"free",
"from",
"full",
"function",
"fusion",
"get",
"global",
"grant",
"group",
"grouping",
"groups",
"having",
"hold",
"hour",
"identity",
"in",
"indicator",
"initial",
"inner",
"inout",
"insensitive",
"insert",
"int",
"integer",
"intersect",
"intersection",
"interval",
"into",
"is",
"join",
"json_array",
"json_arrayagg",
"json_exists",
"json_object",
"json_objectagg",
"json_query",
"json_table",
"json_table_primitive",
"json_value",
"lag",
"language",
"large",
"last_value",
"lateral",
"lead",
"leading",
"left",
"like",
"like_regex",
"listagg",
"ln",
"local",
"localtime",
"localtimestamp",
"log",
"log10",
"lower",
"match",
"match_number",
"match_recognize",
"matches",
"max",
"member",
"merge",
"method",
"min",
"minute",
"mod",
"modifies",
"module",
"month",
"multiset",
"national",
"natural",
"nchar",
"nclob",
"new",
"no",
"none",
"normalize",
"not",
"nth_value",
"ntile",
"null",
"nullif",
"numeric",
"octet_length",
"occurrences_regex",
"of",
"offset",
"old",
"omit",
"on",
"one",
"only",
"open",
"or",
"order",
"out",
"outer",
"over",
"overlaps",
"overlay",
"parameter",
"partition",
"pattern",
"per",
"percent",
"percent_rank",
"percentile_cont",
"percentile_disc",
"period",
"portion",
"position",
"position_regex",
"power",
"precedes",
"precision",
"prepare",
"primary",
"procedure",
"ptf",
"range",
"rank",
"reads",
"real",
"recursive",
"ref",
"references",
"referencing",
"regr_avgx",
"regr_avgy",
"regr_count",
"regr_intercept",
"regr_r2",
"regr_slope",
"regr_sxx",
"regr_sxy",
"regr_syy",
"release",
"result",
"return",
"returns",
"revoke",
"right",
"rollback",
"rollup",
"row",
"row_number",
"rows",
"running",
"savepoint",
"scope",
"scroll",
"search",
"second",
"seek",
"select",
"sensitive",
"session_user",
"set",
"show",
"similar",
"sin",
"sinh",
"skip",
"smallint",
"some",
"specific",
"specifictype",
"sql",
"sqlexception",
"sqlstate",
"sqlwarning",
"sqrt",
"start",
"static",
"stddev_pop",
"stddev_samp",
"submultiset",
"subset",
"substring",
"substring_regex",
"succeeds",
"sum",
"symmetric",
"system",
"system_time",
"system_user",
"table",
"tablesample",
"tan",
"tanh",
"then",
"time",
"timestamp",
"timezone_hour",
"timezone_minute",
"to",
"trailing",
"translate",
"translate_regex",
"translation",
"treat",
"trigger",
"trim",
"trim_array",
"true",
"truncate",
"uescape",
"union",
"unique",
"unknown",
"unnest",
"update",
"upper",
"user",
"using",
"value",
"values",
"value_of",
"var_pop",
"var_samp",
"varbinary",
"varchar",
"varying",
"versioning",
"when",
"whenever",
"where",
"width_bucket",
"window",
"with",
"within",
"without",
"year",
];
// these are reserved words we have identified to be functions
// and should only be highlighted in a dispatch-like context
// ie, array_agg(...), etc.
const RESERVED_FUNCTIONS = [
"abs",
"acos",
"array_agg",
"asin",
"atan",
"avg",
"cast",
"ceil",
"ceiling",
"coalesce",
"corr",
"cos",
"cosh",
"count",
"covar_pop",
"covar_samp",
"cume_dist",
"dense_rank",
"deref",
"element",
"exp",
"extract",
"first_value",
"floor",
"json_array",
"json_arrayagg",
"json_exists",
"json_object",
"json_objectagg",
"json_query",
"json_table",
"json_table_primitive",
"json_value",
"lag",
"last_value",
"lead",
"listagg",
"ln",
"log",
"log10",
"lower",
"max",
"min",
"mod",
"nth_value",
"ntile",
"nullif",
"percent_rank",
"percentile_cont",
"percentile_disc",
"position",
"position_regex",
"power",
"rank",
"regr_avgx",
"regr_avgy",
"regr_count",
"regr_intercept",
"regr_r2",
"regr_slope",
"regr_sxx",
"regr_sxy",
"regr_syy",
"row_number",
"sin",
"sinh",
"sqrt",
"stddev_pop",
"stddev_samp",
"substring",
"substring_regex",
"sum",
"tan",
"tanh",
"translate",
"translate_regex",
"treat",
"trim",
"trim_array",
"unnest",
"upper",
"value_of",
"var_pop",
"var_samp",
"width_bucket",
];
// these functions can
const POSSIBLE_WITHOUT_PARENS = [
"current_catalog",
"current_date",
"current_default_transform_group",
"current_path",
"current_role",
"current_schema",
"current_transform_group_for_type",
"current_user",
"session_user",
"system_time",
"system_user",
"current_time",
"localtime",
"current_timestamp",
"localtimestamp"
];
// those exist to boost relevance making these very
// "SQL like" keyword combos worth +1 extra relevance
const COMBOS = [
"create table",
"insert into",
"primary key",
"foreign key",
"not null",
"alter table",
"add constraint",
"grouping sets",
"on overflow",
"character set",
"respect nulls",
"ignore nulls",
"nulls first",
"nulls last",
"depth first",
"breadth first"
];
const FUNCTIONS = RESERVED_FUNCTIONS;
const KEYWORDS = [
...RESERVED_WORDS,
...NON_RESERVED_WORDS
].filter((keyword) => {
return !RESERVED_FUNCTIONS.includes(keyword);
});
const VARIABLE = {
scope: "variable",
match: /@[a-z0-9][a-z0-9_]*/,
};
const OPERATOR = {
scope: "operator",
match: /[-+*/=%^~]|&&?|\|\|?|!=?|<(?:=>?|<|>)?|>[>=]?/,
relevance: 0,
};
const FUNCTION_CALL = {
match: regex.concat(/\b/, regex.either(...FUNCTIONS), /\s*\(/),
relevance: 0,
keywords: { built_in: FUNCTIONS }
};
// turns a multi-word keyword combo into a regex that doesn't
// care about extra whitespace etc.
// input: "START QUERY"
// output: /\bSTART\s+QUERY\b/
function kws_to_regex(list) {
return regex.concat(
/\b/,
regex.either(...list.map((kw) => {
return kw.replace(/\s+/, "\\s+")
})),
/\b/
)
}
const MULTI_WORD_KEYWORDS = {
scope: "keyword",
match: kws_to_regex(COMBOS),
relevance: 0,
};
// keywords with less than 3 letters are reduced in relevancy
function reduceRelevancy(list, {
exceptions, when
} = {}) {
const qualifyFn = when;
exceptions = exceptions || [];
return list.map((item) => {
if (item.match(/\|\d+$/) || exceptions.includes(item)) {
return item;
} else if (qualifyFn(item)) {
return `${item}|0`;
} else {
return item;
}
});
}
return {
name: 'SQL',
case_insensitive: true,
// does not include {} or HTML tags `</`
illegal: /[{}]|<\//,
keywords: {
$pattern: /\b[\w\.]+/,
keyword:
reduceRelevancy(KEYWORDS, { when: (x) => x.length < 3 }),
literal: LITERALS,
type: TYPES,
built_in: POSSIBLE_WITHOUT_PARENS
},
contains: [
{
scope: "type",
match: kws_to_regex(MULTI_WORD_TYPES)
},
MULTI_WORD_KEYWORDS,
FUNCTION_CALL,
VARIABLE,
STRING,
QUOTED_IDENTIFIER,
hljs.C_NUMBER_MODE,
hljs.C_BLOCK_COMMENT_MODE,
COMMENT_MODE,
OPERATOR
]
};
}

View File

@@ -0,0 +1,205 @@
/*
Language: HTML, XML
Website: https://www.w3.org/XML/
Category: common, web
Audit: 2020
*/
export default function (hljs) {
const regex = hljs.regex;
// XML names can have the following additional letters: https://www.w3.org/TR/xml/#NT-NameChar
// OTHER_NAME_CHARS = /[:\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]/;
// Element names start with NAME_START_CHAR followed by optional other Unicode letters, ASCII digits, hyphens, underscores, and periods
// const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);;
// const XML_IDENT_RE = /[A-Z_a-z:\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]+/;
// const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);
// however, to cater for performance and more Unicode support rely simply on the Unicode letter class
const TAG_NAME_RE = regex.concat(/[\p{L}_]/u, regex.optional(/[\p{L}0-9_.-]*:/u), /[\p{L}0-9_.-]*/u);
const XML_IDENT_RE = /[\p{L}0-9._:-]+/u;
const XML_ENTITIES = {
className: 'symbol',
begin: /&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/,
};
const XML_META_KEYWORDS = {
begin: /\s/,
contains: [
{
className: 'keyword',
begin: /#?[a-z_][a-z1-9_-]+/,
illegal: /\n/,
},
],
};
const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, {
begin: /\(/,
end: /\)/,
});
const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, { className: 'string' });
const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, { className: 'string' });
const TAG_INTERNALS = {
endsWithParent: true,
illegal: /</,
relevance: 0,
contains: [
{
className: 'attr',
begin: XML_IDENT_RE,
relevance: 0,
},
{
begin: /=\s*/,
relevance: 0,
contains: [
{
className: 'string',
endsParent: true,
variants: [
{
begin: /"/,
end: /"/,
contains: [XML_ENTITIES],
},
{
begin: /'/,
end: /'/,
contains: [XML_ENTITIES],
},
{ begin: /[^\s"'=<>`]+/ },
],
},
],
},
],
};
return {
name: 'HTML, XML',
aliases: ['html', 'xhtml', 'rss', 'atom', 'xjb', 'xsd', 'xsl', 'plist', 'wsf', 'svg'],
case_insensitive: true,
unicodeRegex: true,
contains: [
{
className: 'meta',
begin: /<![a-z]/,
end: />/,
relevance: 10,
contains: [
XML_META_KEYWORDS,
QUOTE_META_STRING_MODE,
APOS_META_STRING_MODE,
XML_META_PAR_KEYWORDS,
{
begin: /\[/,
end: /\]/,
contains: [
{
className: 'meta',
begin: /<![a-z]/,
end: />/,
contains: [XML_META_KEYWORDS, XML_META_PAR_KEYWORDS, QUOTE_META_STRING_MODE, APOS_META_STRING_MODE],
},
],
},
],
},
hljs.COMMENT(/<!--/, /-->/, { relevance: 10 }),
{
begin: /<!\[CDATA\[/,
end: /\]\]>/,
relevance: 10,
},
XML_ENTITIES,
// xml processing instructions
{
className: 'meta',
end: /\?>/,
variants: [
{
begin: /<\?xml/,
relevance: 10,
contains: [QUOTE_META_STRING_MODE],
},
{
begin: /<\?[a-z][a-z0-9]+/,
},
],
},
{
className: 'tag',
/*
The lookahead pattern (?=...) ensures that 'begin' only matches
'<style' as a single word, followed by a whitespace or an
ending bracket.
*/
begin: /<style(?=\s|>)/,
end: />/,
keywords: { name: 'style' },
contains: [TAG_INTERNALS],
starts: {
end: /<\/style>/,
returnEnd: true,
subLanguage: ['css', 'xml'],
},
},
{
className: 'tag',
// See the comment in the <style tag about the lookahead pattern
begin: /<script(?=\s|>)/,
end: />/,
keywords: { name: 'script' },
contains: [TAG_INTERNALS],
starts: {
end: /<\/script>/,
returnEnd: true,
subLanguage: ['javascript', 'handlebars', 'xml'],
},
},
// we need this for now for jSX
{
className: 'tag',
begin: /<>|<\/>/,
},
// open tag
{
className: 'tag',
begin: regex.concat(
/</,
regex.lookahead(
regex.concat(
TAG_NAME_RE,
// <tag/>
// <tag>
// <tag ...
regex.either(/\/>/, />/, /\s/)
)
)
),
end: /\/?>/,
contains: [
{
className: 'name',
begin: TAG_NAME_RE,
relevance: 0,
starts: TAG_INTERNALS,
},
],
},
// close tag
{
className: 'tag',
begin: regex.concat(/<\//, regex.lookahead(regex.concat(TAG_NAME_RE, />/))),
contains: [
{
className: 'name',
begin: TAG_NAME_RE,
relevance: 0,
},
{
begin: />/,
relevance: 0,
endsParent: true,
},
],
},
],
};
}