Parsing BBCode in Javascript

I am using this (http://coursesweb.net/javascript/convert-bbcode-html-javascript_cs) as my script for parsing BBCode. I have extended the BBCodes that it can process, however I am encountering a problem when a newline immediately follows an opening tag, e.g.

  [code]
     code....
  [/code]

The problem does not occur if the code is 'inline' [code]code....[/code]`

The regex being used to match what's inside these tags is (.*?) which I know does not match newlines. I have tried ([^\r\n]) to match newlines but this hasn't worked either.

I imagine it's a simple issue but I have little experience with regex so any help would be appreciated

EDIT: this is the full list of regex's I am using

  var tokens = {
'URL' : '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)))',
'LINK' : '([a-z0-9\-\./]+[^"\' ]*)',
'EMAIL' : '((?:[\\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+\.)*(?:[\\w\!\#$\%\'\*\+\-\/\=\?\^\`{\|\}\~]|&)+@(?:(?:(?:(?:(?:[a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(?:\\d{1,3}\.){3}\\d{1,3}(?:\:\\d{1,5})?))',
'TEXT' : '(.*?)',
'SIMPLETEXT' : '([a-zA-Z0-9-+.,_ ]+)',
'INTTEXT' : '([a-zA-Z0-9-+,_. ]+)',
'IDENTIFIER' : '([a-zA-Z0-9-_]+)',
'COLOR' : '([a-z]+|#[0-9abcdef]+)',
'NUMBER'  : '([0-9]+)',
'ALL'  : '([^\r\n])',

};

EDIT 2: Full JS for matching

var token_match = /{[A-Z_]+[0-9]*}/ig;


var _getRegEx = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';

if (nrmatches <= 0) {
  return new RegExp(preg_quote(str), 'g');        // no tokens so return the escaped string
}

for(; i < nrmatches; i += 1) {
  // Remove {, } and numbers from the token so it can match the
  // keys in tokens
  var token = matches[i].replace(/[{}0-9]/g, '');

  if (tokens[token]) {
    // Escape everything before the token
    replacement += preg_quote(str.substr(0, str.indexOf(matches[i]))) + tokens[token];

    // Remove everything before the end of the token so it can be used
    // with the next token. Doing this so that parts can be escaped
    str = str.substr(str.indexOf(matches[i]) + matches[i].length);
  }
}

replacement += preg_quote(str);      

 return new RegExp(replacement, 'gi');
};


var _getTpls = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
var positions = {};
var next_position = 0;

if (nrmatches <= 0) {
  return str;       // no tokens so return the string
}

for(; i < nrmatches; i += 1) {
  // Remove {, } and numbers from the token so it can match the
  // keys in tokens
  var token = matches[i].replace(/[{}0-9]/g, '');
  var position;

  // figure out what $# to use ($1, $2)
  if (positions[matches[i]]) {
    position = positions[matches[i]];       
  } else {
    // token doesn't have a position so increment the next position
    // and record this token's position
    next_position += 1;
    position = next_position;
    positions[matches[i]] = position;
  }

  if (tokens[token]) {
    replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position;
    str = str.substr(str.indexOf(matches[i]) + matches[i].length);
  }
}

replacement += str;

return replacement;
};

标签： javascript regex bbcode

2条回答

够拽才男人

2楼-- · 2020-04-15 05:32

This does the trick for me: (updated this one too to avoid confusion)

\[code\]([\s\S]*?)\[\/code\]

See regexpal and enter the following:

[code]
    code....
[/code]

[code]code.... [/code]

Update: Fixed the regex to the following and this works in the Chrome Console for me:

/\[code\]([\s\S]*?)\[\/code\]/g.exec("[code]hello world \n[/code]")

0人赞添加讨论(0) 举报

女痞

3楼-- · 2020-04-15 05:35

JavaScript does not handle multi-line RegExp matches. Instead you have to use the [\s\S] trick described in this SO answer. Perhaps?

/\[code\][\s\S]*\[code\]/

Also RegExps probably isn't the best choice for parsing syntax. It's is extremely over complicated. I would suggest parsing the string and building an Abstract Syntax Tree then rendering the HTML from that.

0人赞添加讨论(0) 举报

Parsing BBCode in Javascript

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间