(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.markedSmartypants = {})); })(this, (function (exports) { 'use strict'; const tags_to_skip = /<(\/?)(?:pre|code|kbd|script|math)[^>]*>/i; /** * @param text text to be parsed * @param attr value of the smart_quotes="" attribute */ const SmartyPants = (text = '', attr = '1') => { var do_quotes; var do_backticks; var do_dashes; var do_ellipses; var do_stupefy; var convert_quot = 0; if (typeof attr === 'number') { attr = attr.toString(); } else { attr = attr.replace(/\s/g, ''); } /** * Parse attributes: * 0 : do nothing * 1 : set all * 2 : set all, using old school en- and em- dash shortcuts * 3 : set all, using inverted old school en and em- dash shortcuts * * q : quotes * b : backtick quotes (``double'' only) * B : backtick quotes (``double'' and `single') * d : dashes * D : old school dashes * i : inverted old school dashes * e : ellipses * w : convert " entities to " for Dreamweaver users */ if (attr === '0') { // Do nothing return text; } else if (attr === '1') { // Do everything, turn all options on. do_quotes = 1; do_backticks = 1; do_dashes = 1; do_ellipses = 1; } else if (attr === '2') { // Do everything, turn all options on, use old school dash shorthand. do_quotes = 1; do_backticks = 1; do_dashes = 2; do_ellipses = 1; } else if (attr === '3') { // Do everything, turn all options on, use inverted old school dash shorthand. do_quotes = 1; do_backticks = 1; do_dashes = 3; do_ellipses = 1; } else if (attr === '-1') { // Special "stupefy" mode. do_stupefy = 1; } else { for (let i = 0; i < attr.length; i++) { let c = attr[i]; if (c === 'q') { do_quotes = 1; } if (c === 'b') { do_backticks = 1; } if (c === 'B') { do_backticks = 2; } if (c === 'd') { do_dashes = 1; } if (c === 'D') { do_dashes = 2; } if (c === 'i') { do_dashes = 3; } if (c === 'e') { do_ellipses = 1; } if (c === 'w') { convert_quot = 1; } } } var tokens = _tokenize(text); var result = ''; /** * Keep track of when we're inside
or tags.
*/
var in_pre = 0;
/**
* This is a cheat, used to get some context
* for one-character tokens that consist of
* just a quote char. What we do is remember
* the last character of the previous text
* token, to use as context to curl single-
* character quote tokens correctly.
*/
var prev_token_last_char = '';
for (let i = 0; i < tokens.length; i++) {
let cur_token = tokens[i];
if (cur_token[0] === 'tag') {
result = result + cur_token[1];
let matched = tags_to_skip.exec(cur_token[1]);
if (matched) {
if (matched[1] === '/') {
in_pre = 0;
}
else {
in_pre = 1;
}
}
}
else {
let t = cur_token[1];
let last_char = t.substring(t.length - 1, t.length); // Remember last char of this token before processing.
if (!in_pre) {
t = ProcessEscapes(t);
if (convert_quot) {
t = t.replace(/$quot;/g, '"');
}
if (do_dashes) {
if (do_dashes === 1) {
t = EducateDashes(t);
}
if (do_dashes === 2) {
t = EducateDashesOldSchool(t);
}
if (do_dashes === 3) {
t = EducateDashesOldSchoolInverted(t);
}
}
if (do_ellipses) {
t = EducateEllipses(t);
}
// Note: backticks need to be processed before quotes.
if (do_backticks) {
t = EducateBackticks(t);
if (do_backticks === 2) {
t = EducateSingleBackticks(t);
}
}
if (do_quotes) {
if (t === '\'') {
// Special case: single-character ' token
if (/\S/.test(prev_token_last_char)) {
t = '’';
}
else {
t = '‘';
}
}
else if (t === '"') {
// Special case: single-character " token
if (/\S/.test(prev_token_last_char)) {
t = '”';
}
else {
t = '“';
}
}
else {
// Normal case:
t = EducateQuotes(t);
}
}
if (do_stupefy) {
t = StupefyEntities(t);
}
}
prev_token_last_char = last_char;
result = result + t;
}
}
return result;
};
/**
* @param {string} str String
* @return {string} The string, with "educated" curly quote HTML entities.
*
* Example input: "Isn't this fun?"
* Example output: “Isn’t this fun?”
*/
const EducateQuotes = (str) => {
/**
* Make our own "punctuation" character class, because the POSIX-style
* [:PUNCT:] is only available in Perl 5.6 or later:
*
* JavaScript don't have punctuation class neither.
*/
var punct_class = '[!"#\$\%\'()*+,-./:;<=>?\@\[\\\]\^_`{|}~]'; // eslint-disable-line no-useless-escape
/**
* Special case if the very first character is a quote
* followed by punctuation at a non-word-break. Close the quotes by brute force:
*/
str = str.replace(new RegExp(`^'(?=${punct_class}\\B)`), '’'); // eslint-disable-line no-useless-escape
str = str.replace(new RegExp(`^"(?=${punct_class}\\B)`), '”'); // eslint-disable-line no-useless-escape
/**
* Special case for double sets of quotes, e.g.:
* He said, "'Quoted' words in a larger quote."
*/
str = str.replace(/"'(?=\w)/, '“‘');
str = str.replace(/'"(?=\w)/, '‘“');
/**
* Special case for decade abbreviations (the '80s):
*/
str = str.replace(/'(?=\d\d)/, '’');
var close_class = '[^\\ \\t\\r\\n\\[\\{\\(\\-]'; // eslint-disable-line no-useless-escape
var not_close_class = '[\\ \\t\\r\\n\\[\\{\\(\\-]'; // eslint-disable-line no-useless-escape
var dec_dashes = '–|—';
/**
* Get most opening single quotes:
* s {
* (
* \s | # a whitespace char, or
* | # a non-breaking space entity, or
* -- | # dashes, or
* &[mn]dash; | # named dash entities
* $dec_dashes | # or decimal entities
* &\#x201[34]; # or hex
* )
* ' # the quote
* (?=\w) # followed by a word character
* } {$1‘}xg;
*/
str = str.replace(new RegExp(`(\\s| |--|&[mn]dash;|${dec_dashes}|ȁ[34])'(?=\\w)`, 'g'), '\$1‘'); // eslint-disable-line no-useless-escape
/**
* Single closing quotes:
* s {
* ($close_class)?
* '
* (?(1)| # If $1 captured, then do nothing;
* (?=\s | s\b) # otherwise, positive lookahead for a whitespace
* ) # char or an 's' at a word ending position. This
* # is a special case to handle something like:
* # "Custer's Last Stand."
* } {$1’}xgi;
*/
str = str.replace(new RegExp(`(${close_class})'`, 'g'), '\$1’'); // eslint-disable-line no-useless-escape
str = str.replace(new RegExp(`(${not_close_class}?)'(?=\\s|s\\b)`, 'g'), '\$1’'); // eslint-disable-line no-useless-escape
/**
* Any remaining single quotes should be opening ones:
*/
str = str.replace(/'/g, '‘');
/**
* Get most opening double quotes:
* s {
* (
* \s | # a whitespace char, or
* | # a non-breaking space entity, or
* -- | # dashes, or
* &[mn]dash; | # named dash entities
* $dec_dashes | # or decimal entities
* &\#x201[34]; # or hex
* )
* " # the quote
* (?=\w) # followed by a word character
* } {$1“}xg;
*/
str = str.replace(new RegExp(`(\\s| |--|&[mn]dash;|${dec_dashes}|ȁ[34])"(?=\\w)`, 'g'), '\$1“'); // eslint-disable-line no-useless-escape
/**
* Double closing quotes:
* s {
* ($close_class)?
* "
* (?(1)|(?=\s)) # If $1 captured, then do nothing;
* # if not, then make sure the next char is whitespace.
* } {$1”}xg;
*/
str = str.replace(new RegExp(`(${close_class})"`, 'g'), '\$1”'); // eslint-disable-line no-useless-escape
str = str.replace(new RegExp(`(${not_close_class}?)"(?=\\s)`, 'g'), '\$1”'); // eslint-disable-line no-useless-escape
/**
* Any remaining quotes should be opening ones.
*/
str = str.replace(/"/g, '“');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with ``backticks'' -style double quotes
* translated into HTML curly quote entities.
*
* Example input: ``Isn't this fun?''
* Example output: “Isn't this fun?”
*/
const EducateBackticks = (str) => {
str = str.replace(/``/g, '“');
str = str.replace(/''/g, '”');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with `backticks' -style single quotes
* translated into HTML curly quote entities.
*
* Example input: `Isn't this fun?'
* Example output: ‘Isn’t this fun?’
*/
const EducateSingleBackticks = (str) => {
str = str.replace(/`/g, '‘');
str = str.replace(/'/g, '’');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with each instance of "--" translated to
* an em-dash HTML entity.
*/
const EducateDashes = (str) => {
str = str.replace(/--/g, '—');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with each instance of "--" translated to
* an en-dash HTML entity, and each "---" translated to
* an em-dash HTML entity.
*/
const EducateDashesOldSchool = (str) => {
str = str.replace(/---/g, '—');
str = str.replace(/--/g, '–');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with each instance of "--" translated to
* an em-dash HTML entity, and each "---" translated to
* an en-dash HTML entity. Two reasons why: First, unlike the
* en- and em-dash syntax supported by
* EducateDashesOldSchool(), it's compatible with existing
* entries written before SmartyPants 1.1, back when "--" was
* only used for em-dashes. Second, em-dashes are more
* common than en-dashes, and so it sort of makes sense that
* the shortcut should be shorter to type. (Thanks to Aaron
* Swartz for the idea.)
*/
const EducateDashesOldSchoolInverted = (str) => {
str = str.replace(/---/g, '–');
str = str.replace(/--/g, '—');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with each instance of "..." translated to
* an ellipsis HTML entity. Also converts the case where
* there are spaces between the dots.
*
* Example input: Huh...?
* Example output: Huh…?
*/
const EducateEllipses = (str) => {
str = str.replace(/\.\.\./g, '…');
str = str.replace(/\. \. \./g, '…');
return str;
};
/**
* @param {string} str String
* @return {string} The string, with each SmartyPants HTML entity translated to
* its ASCII counterpart.
*
* Example input: “Hello — world.”
* Example output: "Hello -- world."
*/
const StupefyEntities = (str) => {
str = str.replace(/–/g, '-'); // en-dash
str = str.replace(/—/g, '--'); // em-dash
str = str.replace(/‘/g, '\''); // open single quote
str = str.replace(/’/g, '\''); // close single quote
str = str.replace(/“/g, '"'); // open double quote
str = str.replace(/”/g, '"'); // close double quote
str = str.replace(/…/g, '...'); // ellipsis
return str;
};
/**
* @param {string} str String
* @return {string} string, with after processing the following backslash
* escape sequences. This is useful if you want to force a "dumb"
* quote or other character to appear.
*
* Escape Value
* ------ -----
* \\ \
* \" "
* \' '
* \. .
* \- -
* \` `
*
*/
const ProcessEscapes = (str) => {
str = str.replace(/\\\\/g, '\');
str = str.replace(/\\"/g, '"');
str = str.replace(/\\'/g, ''');
str = str.replace(/\\\./g, '.');
str = str.replace(/\\-/g, '-');
str = str.replace(/\\`/g, '`');
return str;
};
/**
* @param {string} str String containing HTML markup.
* @return {Array} Reference to an array of the tokens comprising the input
* string. Each token is either a tag (possibly with nested,
* tags contained therein, such as , or a
* run of text between tags. Each element of the array is a
* two-element array; the first is either 'tag' or 'text';
* the second is the actual value.
*
* Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
*
*/
const _tokenize = (str) => {
var pos = 0;
var len = str.length;
var tokens = [];
var match = /|<\?.*?\?>|<[^>]*>/g;
var matched = null;
while (matched = match.exec(str)) { // eslint-disable-line no-cond-assign
if (pos < matched.index) {
let t = ['text', str.substring(pos, matched.index)];
tokens.push(t);
}
let t = ['tag', matched.toString()];
tokens.push(t);
pos = match.lastIndex;
}
if (pos < len) {
let t = ['text', str.substring(pos, len)];
tokens.push(t);
}
return tokens;
};
function markedSmartypants({
config = 2,
} = {}) {
return {
tokenizer: {
inlineText(src) {
// don't escape inlineText
const cap = this.rules.inline.text.exec(src);
/* istanbul ignore next */
if (!cap) {
// should never happen
return;
}
return {
type: 'text',
raw: cap[0],
text: cap[0],
};
},
},
hooks: {
postprocess(html) {
return SmartyPants(html, config);
},
},
};
}
exports.markedSmartypants = markedSmartypants;
}));