/*** @fileoverview* some functions for browser-side pretty printing of code contained in html.** The lexer should work on a number of languages including C and friends,* Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles.* It works passably on Ruby, PHP and Awk and a decent subset of Perl, but,* because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or* CAML-like languages.** If there's a language not mentioned here, then I don't know it, and don't* know whether it works.  If it has a C-like, Bash-like, or XML-like syntax* then it should work passably.** Usage:* 1) include this source file in an html page via* <script type="text/javascript" src="/path/to/prettify.js"></script>* 2) define style rules.  See the example page for examples.* 3) mark the <pre> and <code> tags in your source with class=prettyprint.*    You can also use the (html deprecated) <xmp> tag, but the pretty printer*    needs to do more substantial DOM manipulations to support that, so some*    css styles may not be preserved.* That's it.  I wanted to keep the API as simple as possible, so there's no* need to specify which language the code is in.** Change log:* cbeust, 2006/08/22*   Java annotations (start with "@") are now captured as literals ("lit")*//*global console, document, navigator, setTimeout, window *//*** Split {@code prettyPrint} into multiple timeouts so as not to interfere with* UI events.* If set to {@code false}, {@code prettyPrint()} is synchronous.*/
window['PR_SHOULD_USE_CONTINUATION'] = true;/** the number of characters between tab columns */
window['PR_TAB_WIDTH'] = 8;/** Walks the DOM returning a properly escaped version of innerHTML.* @param {Node} node* @param {Array.<string>} out output buffer that receives chunks of HTML.*/
window['PR_normalizedHtml']/** Contains functions for creating and registering new language handlers.* @type {Object}*/= window['PR']/** Pretty print a chunk of code.** @param {string} sourceCodeHtml code as html* @return {string} code as html, but prettier*/= window['prettyPrintOne']/** find all the < pre > and < code > tags in the DOM with class=prettyprint* and prettify them.* @param {Function} opt_whenDone if specified, called when the last entry*     has been finished.*/= window['prettyPrint'] = void 0;/** browser detection. @extern */
window['_pr_isIE6'] = function() {
var isIE6 = navigator && navigator.userAgent && /\bMSIE 6\./.test(navigator.userAgent);
window['_pr_isIE6'] = function() {
return isIE6;};
return isIE6;};(function() {/** Splits input on space and returns an Object mapping each non-empty part to* true.*/
function wordSet(words) {
words = words.split(/ /g);
var set = {};
for (var i = words.length;--i >= 0;) {
var w = words[i];
if (w) {
set[w] = null;}}
return set;}
var FLOW_CONTROL_KEYWORDS = "break continue do else for if return while ";
var C_KEYWORDS = FLOW_CONTROL_KEYWORDS+"auto case char const default "+"double enum extern float goto int long register short signed sizeof "+"static struct switch typedef union unsigned void volatile ";
var COMMON_KEYWORDS = C_KEYWORDS+"catch class delete false import "+"new operator private protected public this throw true try ";
var CPP_KEYWORDS = COMMON_KEYWORDS+"alignof align_union asm axiom bool "+"concept concept_map const_cast constexpr decltype "+"dynamic_cast explicit export friend inline late_check "+"mutable namespace nullptr reinterpret_cast static_assert static_cast "+"template typeid typename typeof using virtual wchar_t where ";
var JAVA_KEYWORDS = COMMON_KEYWORDS+"boolean byte extends final finally implements import instanceof null "+"native package strictfp super synchronized throws transient ";
var CSHARP_KEYWORDS = JAVA_KEYWORDS+"as base by checked decimal delegate descending event "+"fixed foreach from group implicit in interface internal into is lock "+"object out override orderby params readonly ref sbyte sealed "+"stackalloc string select uint ulong unchecked unsafe ushort var ";
var JSCRIPT_KEYWORDS = COMMON_KEYWORDS+"debugger eval export function get null set undefined var with "+"Infinity NaN ";
var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for "+"goto if import last local my next no our print package redo require "+"sub undef unless until use wantarray while BEGIN END ";
var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS+"and as assert class def del "+"elif except exec finally from global import in is lambda "+"nonlocal not or pass print raise try with yield "+"False True None ";
var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS+"alias and begin case class def"+" defined elsif end ensure false in module next nil not or redo rescue "+"retry self super then true undef unless until when yield BEGIN END ";
var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS+"case done elif esac eval fi "+"function in local set then until ";
var ALL_KEYWORDS = (
CPP_KEYWORDS+CSHARP_KEYWORDS+JSCRIPT_KEYWORDS+PERL_KEYWORDS+PYTHON_KEYWORDS+RUBY_KEYWORDS+SH_KEYWORDS);/** token style for a string literal */
var PR_STRING = 'str';/** token style for a keyword */
var PR_KEYWORD = 'kwd';/** token style for a comment */
var PR_COMMENT = 'com';/** token style for a type */
var PR_TYPE = 'typ';/** token style for a literal value.  e.g. 1, null, true. */
var PR_LITERAL = 'lit';/** token style for a punctuation string. */
var PR_PUNCTUATION = 'pun';/** token style for a punctuation string. */
var PR_PLAIN = 'pln';/** token style for an sgml tag. */
var PR_TAG = 'tag';/** token style for a markup declaration such as a DOCTYPE. */
var PR_DECLARATION = 'dec';/** token style for embedded source. */
var PR_SOURCE = 'src';/** token style for an sgml attribute name. */
var PR_ATTRIB_NAME = 'atn';/** token style for an sgml attribute value. */
var PR_ATTRIB_VALUE = 'atv';/*** A class that indicates a section of markup that is not code, e.g. to allow* embedding of line numbers within code listings.*/
var PR_NOCODE = 'nocode';
function isWordChar(ch) {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');}/** Splice one array into another.* Like the python <code>* container[containerPosition:containerPosition+countReplaced] = inserted* </code>* @param {Array} inserted* @param {Array} container modified in place* @param {Number} containerPosition* @param {Number} countReplaced*/
function spliceArrayInto(
inserted, container, containerPosition, countReplaced) {
inserted.unshift(containerPosition, countReplaced || 0);
try {
container.splice.apply(container, inserted);} finally {
inserted.splice(0, 2);}}/** A set of tokens that can precede a regular expression literal in* javascript.* http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full* list, but I've removed ones that might be problematic when seen in* languages that don't support regular expression literals.** <p>Specifically, I've removed any keywords that can't precede a regexp* literal in a syntactically legal javascript program, and I've removed the* "in" keyword since it's not a keyword in many languages, and might be used* as a count of inches.** <p>The link a above does not accurately describe EcmaScript rules since* it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works* very well in practice.** @private*/
var REGEXP_PRECEDER_PATTERN = function() {
var preceders = ["!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=",/* "+", */
"+=", ",",/* "-", */
"-=", "->",/*".", "..", "...", handled below */
"/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~"/* handles =~ and !~ */, "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof"];
var pattern = '(?:'+'(?:(?:^|[^0-9.])\\.{1,3})|'+// a dot that's not part of a number
'(?:(?:^|[^\\+])\\+)|'+// allow+but not++
'(?:(?:^|[^\\-])-)'; // allow-but not--
for (var i = 0; i < preceders.length;++i) {
var preceder = preceders[i];
if (isWordChar(preceder.charAt(0))) {
pattern+= '|\\b'+preceder;} else {
pattern+= '|'+preceder.replace(/([^=<>:&])/g, '\\$1');}}
pattern+= '|^)\\s*$'; // matches at end, and matches empty string
return new RegExp(pattern);} ();
var pr_amp = /&/g;
var pr_lt = /</g;
var pr_gt = />/g;
var pr_quot = /\"/g;/** like textToHtml but escapes double quotes to be attribute safe. */
function attribToHtml(str) {
return str.replace(pr_amp, '&amp;').replace(pr_lt, '&lt;').replace(pr_gt, '&gt;').replace(pr_quot, '&quot;');}/** escapest html special characters to html. */
function textToHtml(str) {
return str.replace(pr_amp, '&amp;').replace(pr_lt, '&lt;').replace(pr_gt, '&gt;');}
var pr_ltEnt = /&lt;/g;
var pr_gtEnt = /&gt;/g;
var pr_aposEnt = /&apos;/g;
var pr_quotEnt = /&quot;/g;
var pr_ampEnt = /&amp;/g;
var pr_nbspEnt = /&nbsp;/g;/** unescapes html to plain text. */
function htmlToText(html) {
var pos = html.indexOf('&');
if (pos < 0) {
return html;}
for (--pos;(pos = html.indexOf('&#', pos+1)) >= 0;) {
var end = html.indexOf(';', pos);
if (end >= 0) {
var num = html.substring(pos+3, end);
var radix = 10;
if (num && num.charAt(0) === 'x') {
num = num.substring(1);
radix = 16;}
var codePoint = parseInt(num, radix);
if (!isNaN(codePoint)) {
html = (html.substring(0, pos)+String.fromCharCode(codePoint)+html.substring(end+1));}}}
return html.replace(pr_ltEnt, '<').replace(pr_gtEnt, '>').replace(pr_aposEnt, "'").replace(pr_quotEnt, '"').replace(pr_ampEnt, '&').replace(pr_nbspEnt, ' ');}/** is the given node's innerHTML normally unescaped? */
function isRawContent(node) {
return 'XMP' === node.tagName;}
function normalizedHtml(node, out) {
switch (node.nodeType) {
case 1:
var name = node.tagName.toLowerCase();
out.push('<', name);
for (var i = 0; i < node.attributes.length;++i) {
var attr = node.attributes[i];
if (!attr.specified) {
continue;}
out.push(' ');
normalizedHtml(attr, out);}
out.push('>');
for (var child = node.firstChild; child; child = child.nextSibling) {
normalizedHtml(child, out);}
if (node.firstChild || !/^(?:br|link|img)$/.test(name)) {
out.push('<\/', name, '>');}
break;
case 2:
out.push(node.name.toLowerCase(), '="', attribToHtml(node.value), '"');
break;
case 3:
case 4:
out.push(textToHtml(node.nodeValue));
break;}}
var PR_innerHtmlWorks = null;
function getInnerHtml(node) {
if (null === PR_innerHtmlWorks) {
var testNode = document.createElement('PRE');
testNode.appendChild(
document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />'));
PR_innerHtmlWorks = !/</.test(testNode.innerHTML);}
if (PR_innerHtmlWorks) {
var content = node.innerHTML;
if (isRawContent(node)) {
content = textToHtml(content);}
return content;}
var out = [];
for (var child = node.firstChild; child; child = child.nextSibling) {
normalizedHtml(child, out);}
return out.join('');}/** returns a function that expand tabs to spaces.  This function can be fed* successive chunks of text, and will maintain its own internal state to* keep track of how tabs are expanded.* @return {function (string) : string} a function that takes*   plain text and return the text with tabs expanded.* @private*/
function makeTabExpander(tabWidth) {
var SPACES = '                ';
var charInLine = 0;
return function(plainText) {
var out = null;
var pos = 0;
for (var i = 0, n = plainText.length; i < n;++i) {
var ch = plainText.charAt(i);
switch (ch) {
case '\t':
if (!out) {
out = [];}
out.push(plainText.substring(pos, i));
var nSpaces = tabWidth-(charInLine % tabWidth);
charInLine+= nSpaces;
for (; nSpaces >= 0; nSpaces-= SPACES.length) {
out.push(SPACES.substring(0, nSpaces));}
pos = i+1;
break;
case '\n':
charInLine = 0;
break;
default:++charInLine;}}
if (!out) {
return plainText;}
out.push(plainText.substring(pos));
return out.join('');};}
var pr_chunkPattern = /(?:[^<]+|<!--[\s\S]*?-->|<!\[CDATA\[([\s\S]*?)\]\]>|<\/?[a-zA-Z][^>]*>|<)/g;
var pr_commentPrefix = /^<!--/;
var pr_cdataPrefix = /^<\[CDATA\[/;
var pr_brPrefix = /^<br\b/i;
var pr_tagNameRe = /^<(\/?)([a-zA-Z]+)/;/** split markup into chunks of html tags (style null) and* plain text (style {@link #PR_PLAIN}), converting tags which are* significant for tokenization (<br>) into their textual equivalent.** @param {string} s html where whitespace is considered significant.* @return {Object} source code and extracted tags.* @private*/
function extractTags(s) {
var matches = s.match(pr_chunkPattern);
var sourceBuf = [];
var sourceBufLen = 0;
var extractedTags = [];
if (matches) {
for (var i = 0, n = matches.length; i < n;++i) {
var match = matches[i];
if (match.length > 1 && match.charAt(0) === '<') {
if (pr_commentPrefix.test(match)) {
continue;}
if (pr_cdataPrefix.test(match)) {
sourceBuf.push(match.substring(9, match.length-3));
sourceBufLen+= match.length-12;} else if (pr_brPrefix.test(match)) {
sourceBuf.push('\n');++sourceBufLen;} else {
if (match.indexOf(PR_NOCODE) >= 0 && isNoCodeTag(match)) {
var name = match.match(pr_tagNameRe)[2];
var depth = 1;
var j;
end_tag_loop: for (j = i+1; j < n;++j) {
var name2 = matches[j].match(pr_tagNameRe);
if (name2 && name2[2] === name) {
if (name2[1] === '/') {
if (--depth === 0) {
break end_tag_loop;}} else {++depth;}}}
if (j < n) {
extractedTags.push(
sourceBufLen, matches.slice(i, j+1).join(''));
i = j;} else { // Ignore unclosed sections.
extractedTags.push(sourceBufLen, match);}} else {
extractedTags.push(sourceBufLen, match);}}} else {
var literalText = htmlToText(match);
sourceBuf.push(literalText);
sourceBufLen+= literalText.length;}}}
return {
source: sourceBuf.join(''),
tags: extractedTags};}/** True if the given tag contains a class attribute with the nocode class. */
function isNoCodeTag(tag) {
return !! tag.replace(/\s(\w+)\s*=\s*(?:\"([^\"]*)\"|'([^\']*)'|(\S+))/g, ' $1="$2$3$4"').match(/[cC][lL][aA][sS][sS]=\"[^\"]*\bnocode\b/);}/*** Apply the given language handler to sourceCode and add the resulting* decorations to out.* @param {number} offset the index of sourceCode within the chunk of source*    whose decorations are already present on out.*/
function appendDecorations(offset, sourceCode, langHandler, out) {
if (!sourceCode) {
return;}
var decorations = langHandler.call({},
sourceCode);
if (offset) {
for (var i = decorations.length;(i-= 2) >= 0;) {
decorations[i]+= offset;}}
out.push.apply(out, decorations);}/** Given triples of [style, pattern, context] returns a lexing function,* The lexing function interprets the patterns to find token boundaries and* returns a decoration list of the form* [index_0, style_0, index_1, style_1, ..., index_n, style_n]* where index_n is an index into the sourceCode, and style_n is a style* constant like PR_PLAIN.  index_n-1 <= index_n, and style_n-1 applies to* all characters in sourceCode[index_n-1:index_n].** The stylePatterns is a list whose elements have the form* [style : string, pattern : RegExp, context : RegExp, shortcut : string].** Style is a style constant like PR_PLAIN, or can be a string of the* form 'lang-FOO', where FOO is a language extension describing the* language of the portion of the token in $1 after pattern executes.* E.g., if style is 'lang-lisp', and group 1 contains the text* '(hello (world))', then that portion of the token will be passed to the* registered lisp handler for formatting.* The text before and after group 1 will be restyled using this decorator* so decorators should take care that this doesn't result in infinite* recursion.  For example, the HTML lexer rule for SCRIPT elements looks* something like ['lang-js', /<[s]cript>(.+?)<\/script>/].  This may match* '<script>foo()<\/script>', which would cause the current decorator to* be called with '<script>' which would not match the same rule since* group 1 must not be empty, so it would be instead styled as PR_TAG by* the generic tag rule.  The handler registered for the 'js' extension would* then be called with 'foo()', and finally, the current decorator would* be called with '<\/script>' which would not match the original rule and* so the generic tag rule would identify it as a tag.** Pattern must only match prefixes, and if it matches a prefix and context* is null or matches the last non-comment token parsed, then that match is* considered a token with the same style.** Context is applied to the last non-whitespace, non-comment token* recognized.** Shortcut is an optional string of characters, any of which, if the first* character, gurantee that this pattern and only this pattern matches.** @param {Array} shortcutStylePatterns patterns that always start with*   a known character.  Must have a shortcut string.* @param {Array} fallthroughStylePatterns patterns that will be tried in*   order if the shortcut ones fail.  May have shortcuts.** @return {function (string, number?) : Array.<number|string>} a*   function that takes source code and returns a list of decorations.*/
function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
var shortcuts = {};(function() {
var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
for (var i = allPatterns.length;--i >= 0;) {
var patternParts = allPatterns[i];
var shortcutChars = patternParts[3];
if (shortcutChars) {
for (var c = shortcutChars.length;--c >= 0;) {
shortcuts[shortcutChars.charAt(c)] = patternParts;}}}})();
var nPatterns = fallthroughStylePatterns.length;
var notWs = /\S/;
var decorate = function(sourceCode, opt_basePos) {
opt_basePos = opt_basePos || 0;
var decorations = [opt_basePos, PR_PLAIN];
var lastToken = '';
var pos = 0; // index into sourceCode
var tail = sourceCode;
while (tail.length) {
var style;
var token = null;
var match;
var patternParts = shortcuts[tail.charAt(0)];
if (patternParts) {
match = tail.match(patternParts[1]);
token = match[0];
style = patternParts[0];} else {
for (var i = 0; i < nPatterns;++i) {
patternParts = fallthroughStylePatterns[i];
var contextPattern = patternParts[2];
if (contextPattern && !contextPattern.test(lastToken)) {
continue;}
match = tail.match(patternParts[1]);
if (match) {
token = match[0];
style = patternParts[0];
break;}}
if (!token) { // make sure that we make progress
style = PR_PLAIN;
token = tail.substring(0, 1);}}
var isEmbedded = 'lang-' === style.substring(0, 5);
if (isEmbedded && !(match && match[1])) {
isEmbedded = false;
style = PR_SOURCE;}
if (!isEmbedded) {
decorations.push(opt_basePos+pos, style);} else { // Treat group 1 as an embedded block of source code.
var embeddedSource = match[1];
var embeddedSourceStart = token.indexOf(embeddedSource);
var embeddedSourceEnd = embeddedSourceStart+embeddedSource.length;
var lang = style.substring(5);
if (!langHandlerRegistry.hasOwnProperty(lang)) {
lang = /^\s*</.test(embeddedSource) ? 'default-markup': 'default-code';}
var size = decorations.length-10;
appendDecorations(
opt_basePos+pos, token.substring(0, embeddedSourceStart), decorate, decorations);
appendDecorations(
opt_basePos+pos+embeddedSourceStart, token.substring(embeddedSourceStart, embeddedSourceEnd), langHandlerRegistry[lang], decorations);
appendDecorations(
opt_basePos+pos+embeddedSourceEnd, token.substring(embeddedSourceEnd), decorate, decorations);}
pos+= token.length;
tail = tail.substring(token.length);
if (style !== PR_COMMENT && notWs.test(token)) {
lastToken = token;}}
return decorations;};
return decorate;}
var PR_MARKUP_LEXER = createSimpleLexer([], [[PR_PLAIN, /^[^<?]+/, null], [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/, null], [PR_COMMENT, /^<!--[\s\S]*?(?:-->|$)/, null],['lang-', /^<\?([\s\S]+?)(?:\?>|$)/, null], ['lang-', /^<%([\s\S]+?)(?:%>|$)/, null], [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/, null], ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i, null],['lang-js', /^<script\b[^>]*>([\s\S]+?)<\/script\b[^>]*>/i, null],['lang-css', /^<style\b[^>]*>([\s\S]+?)<\/style\b[^>]*>/i, null], [PR_TAG, /^<\/?\w[^<>]*>/, null]]);
var PR_SOURCE_CHUNK_PARTS = /^(<[^>]*>)([\s\S]*)(<\/[^>]*>)$/;/** split markup on tags, comments, application directives, and other top* level constructs.  Tags are returned as a single token-attributes are* not yet broken out.* @private*/
function tokenizeMarkup(source) {
var decorations = PR_MARKUP_LEXER(source);
for (var i = 0; i < decorations.length; i+= 2) {
if (decorations[i+1] === PR_SOURCE) {
var start, end;
start = decorations[i];
end = i+2 < decorations.length ? decorations[i+2] : source.length;
var sourceChunk = source.substring(start, end);
var match = sourceChunk.match(PR_SOURCE_CHUNK_PARTS);
if (match) {
decorations.splice(
i, 2, start, PR_TAG, // the open chunk
start+match[1].length, PR_SOURCE, start+match[1].length+(match[2] || '').length, PR_TAG);}}}
return decorations;}
var PR_TAG_LEXER = createSimpleLexer([[PR_ATTRIB_VALUE, /^\'[^\']*(?:\'|$)/, null, "'"], [PR_ATTRIB_VALUE, /^\"[^\"]*(?:\"|$)/, null, '"'], [PR_PUNCTUATION, /^[<>\/=]+/, null, '<>/=']], [[PR_TAG, /^[\w:\-]+/, /^</], [PR_ATTRIB_VALUE, /^[\w\-]+/, /^=/], [PR_ATTRIB_NAME, /^[\w:\-]+/, null], [PR_PLAIN, /^\s+/, null, ' \t\r\n']]);/** split tags attributes and their values out from the tag name, and* recursively lex source chunks.* @private*/
function splitTagAttributes(source, decorations) {
for (var i = 0; i < decorations.length; i+= 2) {
var style = decorations[i+1];
if (style === PR_TAG) {
var start, end;
start = decorations[i];
end = i+2 < decorations.length ? decorations[i+2] : source.length;
var chunk = source.substring(start, end);
var subDecorations = PR_TAG_LEXER(chunk, start);
spliceArrayInto(subDecorations, decorations, i, 2);
i+= subDecorations.length-2;}}
return decorations;}/** returns a function that produces a list of decorations from source text.** This code treats ", ', and ` as string delimiters, and \ as a string* escape.  It does not recognize perl's qq() style strings.* It has no special handling for double delimiter escapes as in basic, or* the tripled delimiters used in python, but should work on those regardless* although in those cases a single string literal may be broken up into* multiple adjacent string literals.** It recognizes C, C++, and shell style comments.** @param {Object} options a set of optional parameters.* @return {function (string) : Array.<string|number>} a*     decorator that takes sourceCode as plain text and that returns a*     decoration list*/
function sourceDecorator(options) {
var shortcutStylePatterns = [],
fallthroughStylePatterns = [];
if (options['tripleQuotedStrings']) {
shortcutStylePatterns.push([PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, null, '\'"']);} else if (options['multiLineStrings']) {
shortcutStylePatterns.push([PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, null, '\'"`']);} else {
shortcutStylePatterns.push([PR_STRING, /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, null, '"\'']);}
fallthroughStylePatterns.push([PR_PLAIN, /^(?:[^\'\"\`\/\#]+)/, null, ' \r\n']);
if (options['hashComments']) {
shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);}
if (options['cStyleComments']) {
fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
fallthroughStylePatterns.push([PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);}
if (options['regexLiterals']) {
var REGEX_LITERAL = (
'^/(?=[^/*])'+'(?:[^/\\x5B\\x5C]'+'|\\x5C[\\s\\S]'+'|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+'+'(?:/|$)');
fallthroughStylePatterns.push([PR_STRING, new RegExp(REGEX_LITERAL), REGEXP_PRECEDER_PATTERN]);}
var keywords = wordSet(options['keywords']);
options = null;/** splits the given string into comment, string, and "other" tokens.* @param {string} sourceCode as plain text* @return {Array.<number|string>} a decoration list.* @private*/
var splitStringAndCommentTokens = createSimpleLexer(
shortcutStylePatterns, fallthroughStylePatterns);
var styleLiteralIdentifierPuncRecognizer = createSimpleLexer([], [[PR_PLAIN, /^\s+/, null, ' \r\n'],[PR_PLAIN, /^[a-z_$@][a-z_$@0-9]*/i, null],[PR_LITERAL, /^0x[a-f0-9]+[a-z]/i, null],[PR_LITERAL, /^(?:\d(?:_\d+)*\d*(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+)?[a-z]*/i, null, '123456789'], [PR_PUNCTUATION, /^[^\s\w\.$@]+/, null]]);/** splits plain text tokens into more specific tokens, and then tries to* recognize keywords, and types.* @private*/
function splitNonStringNonCommentTokens(source, decorations) {
for (var i = 0; i < decorations.length; i+= 2) {
var style = decorations[i+1];
if (style === PR_PLAIN) {
var start, end, chunk, subDecs;
start = decorations[i];
end = i+2 < decorations.length ? decorations[i+2] : source.length;
chunk = source.substring(start, end);
subDecs = styleLiteralIdentifierPuncRecognizer(chunk, start);
for (var j = 0, m = subDecs.length; j < m; j+= 2) {
var subStyle = subDecs[j+1];
if (subStyle === PR_PLAIN) {
var subStart = subDecs[j];
var subEnd = j+2 < m ? subDecs[j+2] : chunk.length;
var token = source.substring(subStart, subEnd);
if (token === '.') {
subDecs[j+1] = PR_PUNCTUATION;} else if (token in keywords) {
subDecs[j+1] = PR_KEYWORD;} else if (/^@?[A-Z][A-Z$]*[a-z][A-Za-z$]*$/.test(token)) {
subDecs[j+1] = token.charAt(0) === '@' ? PR_LITERAL: PR_TYPE;}}}
spliceArrayInto(subDecs, decorations, i, 2);
i+= subDecs.length-2;}}
return decorations;}
return function(sourceCode) {
var decorations = splitStringAndCommentTokens(sourceCode);
decorations = splitNonStringNonCommentTokens(sourceCode, decorations);
return decorations;};}
var decorateSource = sourceDecorator({
'keywords': ALL_KEYWORDS,
'hashComments': true,
'cStyleComments': true,
'multiLineStrings': true,
'regexLiterals': true});/** identify attribute values that really contain source code and recursively* lex them.* @private*/
function splitSourceAttributes(source, decorations) {
var nextValueIsSource = false;
for (var i = 0; i < decorations.length; i+= 2) {
var style = decorations[i+1];
var start, end;
if (style === PR_ATTRIB_NAME) {
start = decorations[i];
end = i+2 < decorations.length ? decorations[i+2] : source.length;
nextValueIsSource = /^on|^style$/i.test(source.substring(start, end));} else if (style === PR_ATTRIB_VALUE) {
if (nextValueIsSource) {
start = decorations[i];
end = i+2 < decorations.length ? decorations[i+2] : source.length;
var attribValue = source.substring(start, end);
var attribLen = attribValue.length;
var quoted = (attribLen >= 2 && /^[\"\']/.test(attribValue) && attribValue.charAt(0) === attribValue.charAt(attribLen-1));
var attribSource;
var attribSourceStart;
var attribSourceEnd;
if (quoted) {
attribSourceStart = start+1;
attribSourceEnd = end-1;
attribSource = attribValue;} else {
attribSourceStart = start+1;
attribSourceEnd = end-1;
attribSource = attribValue.substring(1, attribValue.length-1);}
var attribSourceDecorations = decorateSource(attribSource);
for (var j = 0, m = attribSourceDecorations.length; j < m; j+= 2) {
attribSourceDecorations[j]+= attribSourceStart;}
if (quoted) {
attribSourceDecorations.push(attribSourceEnd, PR_ATTRIB_VALUE);
spliceArrayInto(attribSourceDecorations, decorations, i+2, 0);} else {
spliceArrayInto(attribSourceDecorations, decorations, i, 2);}}
nextValueIsSource = false;}}
return decorations;}/** returns a decoration list given a string of markup.** This code recognizes a number of constructs.* <!--...--> comment* <!\w ... >   declaration* <\w ... >    tag* </\w ... >   tag* <?...?>      embedded source* <%...%>      embedded source* &[#\w]...;   entity** It does not recognizes %foo; doctype entities from  .** It will recurse into any <style>, <script>, and on* attributes using* PR_lexSource.*/
function decorateMarkup(sourceCode) {
var decorations = tokenizeMarkup(sourceCode);
decorations = splitTagAttributes(sourceCode, decorations);
decorations = splitSourceAttributes(sourceCode, decorations);
return decorations;}/*** @param {string} sourceText plain text* @param {Array.<number|string>} extractedTags chunks of raw html preceded*   by their position in sourceText in order.* @param {Array.<number|string>} decorations style classes preceded by their*   position in sourceText in order.* @return {string} html* @private*/
function recombineTagsAndDecorations(sourceText, extractedTags, decorations) {
var html = [];
var outputIdx = 0;
var openDecoration = null;
var currentDecoration = null;
var tagPos = 0; // index into extractedTags
var decPos = 0; // index into decorations
var tabExpander = makeTabExpander(window['PR_TAB_WIDTH']);
var adjacentSpaceRe = /([\r\n ]) /g;
var startOrSpaceRe = /(^| ) /gm;
var newlineRe = /\r\n?|\n/g;
var trailingSpaceRe = /[ \r\n]$/;
var lastWasSpace = true; // the last text chunk emitted ended with a space.
function emitTextUpTo(sourceIdx) {
if (sourceIdx > outputIdx) {
if (openDecoration && openDecoration !== currentDecoration) {
html.push('</span>');
openDecoration = null;}
if (!openDecoration && currentDecoration) {
openDecoration = currentDecoration;
html.push('<span class="', openDecoration, '">');}
var htmlChunk = textToHtml(
tabExpander(sourceText.substring(outputIdx, sourceIdx))).replace(lastWasSpace ? startOrSpaceRe: adjacentSpaceRe, '$1&nbsp;');
lastWasSpace = trailingSpaceRe.test(htmlChunk);
html.push(htmlChunk.replace(newlineRe, '<br />'));
outputIdx = sourceIdx;}}
while (true) {
var outputTag;
if (tagPos < extractedTags.length) {
if (decPos < decorations.length) {
outputTag = extractedTags[tagPos] <= decorations[decPos];} else {
outputTag = true;}} else {
outputTag = false;}
if (outputTag) {
emitTextUpTo(extractedTags[tagPos]);
if (openDecoration) {
html.push('</span>');
openDecoration = null;}
html.push(extractedTags[tagPos+1]);
tagPos+= 2;} else if (decPos < decorations.length) {
emitTextUpTo(decorations[decPos]);
currentDecoration = decorations[decPos+1];
decPos+= 2;} else {
break;}}
emitTextUpTo(sourceText.length);
if (openDecoration) {
html.push('</span>');}
return html.join('');}/** Maps language-specific file extensions to handlers. */
var langHandlerRegistry = {};/** Register a language handler for the given file extensions.* @param {function (string) : Array.<number|string>} handler*     a function from source code to a list of decorations.* @param {Array.<string>} fileExtensions*/
function registerLangHandler(handler, fileExtensions) {
for (var i = fileExtensions.length;--i >= 0;) {
var ext = fileExtensions[i];
if (!langHandlerRegistry.hasOwnProperty(ext)) {
langHandlerRegistry[ext] = handler;} else if ('console' in window) {
console.log('cannot override language handler %s', ext);}}}
registerLangHandler(decorateSource, ['default-code']);
registerLangHandler(
decorateMarkup, ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
registerLangHandler(sourceDecorator({
'keywords': CPP_KEYWORDS,
'hashComments': true,
'cStyleComments': true}), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
registerLangHandler(sourceDecorator({
'keywords': CSHARP_KEYWORDS,
'hashComments': true,
'cStyleComments': true}), ['cs']);
registerLangHandler(sourceDecorator({
'keywords': JAVA_KEYWORDS,
'cStyleComments': true}), ['java']);
registerLangHandler(sourceDecorator({
'keywords': SH_KEYWORDS,
'hashComments': true,
'multiLineStrings': true}), ['bsh', 'csh', 'sh']);
registerLangHandler(sourceDecorator({
'keywords': PYTHON_KEYWORDS,
'hashComments': true,
'multiLineStrings': true,
'tripleQuotedStrings': true}), ['cv', 'py']);
registerLangHandler(sourceDecorator({
'keywords': PERL_KEYWORDS,
'hashComments': true,
'multiLineStrings': true,
'regexLiterals': true}), ['perl', 'pl', 'pm']);
registerLangHandler(sourceDecorator({
'keywords': RUBY_KEYWORDS,
'hashComments': true,
'multiLineStrings': true,
'regexLiterals': true}), ['rb']);
registerLangHandler(sourceDecorator({
'keywords': JSCRIPT_KEYWORDS,
'cStyleComments': true,
'regexLiterals': true}), ['js']);
function prettyPrintOne(sourceCodeHtml, opt_langExtension) {
try {
var sourceAndExtractedTags = extractTags(sourceCodeHtml);/** Plain text. @type {string} */
var source = sourceAndExtractedTags.source;/** Even entries are positions in source in ascending order.  Odd entries* are tags that were extracted at that position.* @type {Array.<number|string>}*/
var extractedTags = sourceAndExtractedTags.tags;
if (!langHandlerRegistry.hasOwnProperty(opt_langExtension)) {
opt_langExtension = /^\s*</.test(source) ? 'default-markup': 'default-code';}/** Even entries are positions in source in ascending order.  Odd enties* are style markers (e.g., PR_COMMENT) that run from that position until* the end.* @type {Array.<number|string>}*/
var decorations = langHandlerRegistry[opt_langExtension].call({},
source);
return recombineTagsAndDecorations(source, extractedTags, decorations);} catch(e) {
if ('console' in window) {
console.log(e);
console.trace();}
return sourceCodeHtml;}}
function prettyPrint(opt_whenDone) {
var isIE6 = window['_pr_isIE6']();
var codeSegments = [
document.getElementsByTagName('pre'), document.getElementsByTagName('code'), document.getElementsByTagName('xmp')];
var elements = [];
for (var i = 0; i < codeSegments.length;++i) {
for (var j = 0, n = codeSegments[i].length; j < n;++j) {
elements.push(codeSegments[i][j]);}}
codeSegments = null;
var k = 0;
function doWork() {
var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ? new Date().getTime()+250/* ms */: Infinity);
for (; k < elements.length && new Date().getTime() < endTime; k++) {
var cs = elements[k];
if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
var langExtension = cs.className.match(/\blang-(\w+)\b/);
if (langExtension) {
langExtension = langExtension[1];}
var nested = false;
for (var p = cs.parentNode; p; p = p.parentNode) {
if ((p.tagName === 'pre' || p.tagName === 'code' || p.tagName === 'xmp') && p.className && p.className.indexOf('prettyprint') >= 0) {
nested = true;
break;}}
if (!nested) {
var content = getInnerHtml(cs);
content = content.replace(/(?:\r\n?|\n)$/, '');
var newContent = prettyPrintOne(content, langExtension);
if (!isRawContent(cs)) {
cs.innerHTML = newContent;} else {
var pre = document.createElement('PRE');
for (var i = 0; i < cs.attributes.length;++i) {
var a = cs.attributes[i];
if (a.specified) {
var aname = a.name.toLowerCase();
if (aname === 'class') {
pre.className = a.value; // For IE 6} else {
pre.setAttribute(a.name, a.value);}}}
pre.innerHTML = newContent;
cs.parentNode.replaceChild(pre, cs);
cs = pre;}
if (isIE6 && cs.tagName === 'PRE') {
var lineBreaks = cs.getElementsByTagName('br');
for (var j = lineBreaks.length;--j >= 0;) {
var lineBreak = lineBreaks[j];
lineBreak.parentNode.replaceChild(
document.createTextNode('\r\n'), lineBreak);}}}}}
if (k < elements.length) {
setTimeout(doWork, 250);} else if (opt_whenDone) {
opt_whenDone();}}
doWork();}
window['PR_normalizedHtml'] = normalizedHtml;
window['prettyPrintOne'] = prettyPrintOne;
window['prettyPrint'] = prettyPrint;
window['PR'] = {
'createSimpleLexer': createSimpleLexer,
'registerLangHandler': registerLangHandler,
'sourceDecorator': sourceDecorator,
'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
'PR_COMMENT': PR_COMMENT,
'PR_DECLARATION': PR_DECLARATION,
'PR_KEYWORD': PR_KEYWORD,
'PR_LITERAL': PR_LITERAL,
'PR_NOCODE': PR_NOCODE,
'PR_PLAIN': PR_PLAIN,
'PR_PUNCTUATION': PR_PUNCTUATION,
'PR_SOURCE': PR_SOURCE,
'PR_STRING': PR_STRING,
'PR_TAG': PR_TAG,
'PR_TYPE': PR_TYPE};})();
addLoadEvent(prettyPrint);
