hexo/node_modules/highlight.js/lib/languages/r.js

258 lines
8.0 KiB
JavaScript
Raw Normal View History

2023-10-03 11:14:36 +08:00
/*
Language: R
Description: R is a free software environment for statistical computing and graphics.
Author: Joe Cheng <joe@rstudio.org>
Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
Website: https://www.r-project.org
Category: common,scientific
*/
/** @type LanguageFn */
function r(hljs) {
const regex = hljs.regex;
// Identifiers in R cannot start with `_`, but they can start with `.` if it
// is not immediately followed by a digit.
// R also supports quoted identifiers, which are near-arbitrary sequences
// delimited by backticks (`…`), which may contain escape sequences. These are
// handled in a separate mode. See `test/markup/r/names.txt` for examples.
// FIXME: Support Unicode identifiers.
const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
const NUMBER_TYPES_RE = regex.either(
// Special case: only hexadecimal binary powers can contain fractions
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
// Hexadecimal numbers without fraction and optional binary power
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
// Decimal numbers
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
);
const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
const PUNCTUATION_RE = regex.either(
/[()]/,
/[{}]/,
/\[\[/,
/[[\]]/,
/\\/,
/,/
);
return {
name: 'R',
keywords: {
$pattern: IDENT_RE,
keyword:
'function if in break next repeat else for while',
literal:
'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 '
+ 'NA_character_|10 NA_complex_|10',
built_in:
// Builtin constants
'LETTERS letters month.abb month.name pi T F '
// Primitive functions
// These are all the functions in `base` that are implemented as a
// `.Primitive`, minus those functions that are also keywords.
+ 'abs acos acosh all any anyNA Arg as.call as.character '
+ 'as.complex as.double as.environment as.integer as.logical '
+ 'as.null.default as.numeric as.raw asin asinh atan atanh attr '
+ 'attributes baseenv browser c call ceiling class Conj cos cosh '
+ 'cospi cummax cummin cumprod cumsum digamma dim dimnames '
+ 'emptyenv exp expression floor forceAndCall gamma gc.time '
+ 'globalenv Im interactive invisible is.array is.atomic is.call '
+ 'is.character is.complex is.double is.environment is.expression '
+ 'is.finite is.function is.infinite is.integer is.language '
+ 'is.list is.logical is.matrix is.na is.name is.nan is.null '
+ 'is.numeric is.object is.pairlist is.raw is.recursive is.single '
+ 'is.symbol lazyLoadDBfetch length lgamma list log max min '
+ 'missing Mod names nargs nzchar oldClass on.exit pos.to.env '
+ 'proc.time prod quote range Re rep retracemem return round '
+ 'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt '
+ 'standardGeneric substitute sum switch tan tanh tanpi tracemem '
+ 'trigamma trunc unclass untracemem UseMethod xtfrm',
},
contains: [
// Roxygen comments
hljs.COMMENT(
/#'/,
/$/,
{ contains: [
{
// Handle `@examples` separately to cause all subsequent code
// until the next `@`-tag on its own line to be kept as-is,
// preventing highlighting. This code is example R code, so nested
// doctags shouldnt be treated as such. See
// `test/markup/r/roxygen.txt` for an example.
scope: 'doctag',
match: /@examples/,
starts: {
end: regex.lookahead(regex.either(
// end if another doc comment
/\n^#'\s*(?=@[a-zA-Z]+)/,
// or a line with no comment
/\n^(?!#')/
)),
endsParent: true
}
},
{
// Handle `@param` to highlight the parameter name following
// after.
scope: 'doctag',
begin: '@param',
end: /$/,
contains: [
{
scope: 'variable',
variants: [
{ match: IDENT_RE },
{ match: /`(?:\\.|[^`\\])+`/ }
],
endsParent: true
}
]
},
{
scope: 'doctag',
match: /@[a-zA-Z]+/
},
{
scope: 'keyword',
match: /\\[a-zA-Z]+/
}
] }
),
hljs.HASH_COMMENT_MODE,
{
scope: 'string',
contains: [ hljs.BACKSLASH_ESCAPE ],
variants: [
hljs.END_SAME_AS_BEGIN({
begin: /[rR]"(-*)\(/,
end: /\)(-*)"/
}),
hljs.END_SAME_AS_BEGIN({
begin: /[rR]"(-*)\{/,
end: /\}(-*)"/
}),
hljs.END_SAME_AS_BEGIN({
begin: /[rR]"(-*)\[/,
end: /\](-*)"/
}),
hljs.END_SAME_AS_BEGIN({
begin: /[rR]'(-*)\(/,
end: /\)(-*)'/
}),
hljs.END_SAME_AS_BEGIN({
begin: /[rR]'(-*)\{/,
end: /\}(-*)'/
}),
hljs.END_SAME_AS_BEGIN({
begin: /[rR]'(-*)\[/,
end: /\](-*)'/
}),
{
begin: '"',
end: '"',
relevance: 0
},
{
begin: "'",
end: "'",
relevance: 0
}
],
},
// Matching numbers immediately following punctuation and operators is
// tricky since we need to look at the character ahead of a number to
// ensure the number is not part of an identifier, and we cannot use
// negative look-behind assertions. So instead we explicitly handle all
// possible combinations of (operator|punctuation), number.
// TODO: replace with negative look-behind when available
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
{
relevance: 0,
variants: [
{
scope: {
1: 'operator',
2: 'number'
},
match: [
OPERATORS_RE,
NUMBER_TYPES_RE
]
},
{
scope: {
1: 'operator',
2: 'number'
},
match: [
/%[^%]*%/,
NUMBER_TYPES_RE
]
},
{
scope: {
1: 'punctuation',
2: 'number'
},
match: [
PUNCTUATION_RE,
NUMBER_TYPES_RE
]
},
{
scope: { 2: 'number' },
match: [
/[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
NUMBER_TYPES_RE
]
}
]
},
// Operators/punctuation when they're not directly followed by numbers
{
// Relevance boost for the most common assignment form.
scope: { 3: 'operator' },
match: [
IDENT_RE,
/\s+/,
/<-/,
/\s+/
]
},
{
scope: 'operator',
relevance: 0,
variants: [
{ match: OPERATORS_RE },
{ match: /%[^%]*%/ }
]
},
{
scope: 'punctuation',
relevance: 0,
match: PUNCTUATION_RE
},
{
// Escaped identifier
begin: '`',
end: '`',
contains: [ { begin: /\\./ } ]
}
]
};
}
module.exports = r;