可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
Two days ago I started working on a code parser and I'm stuck.
How can I split a string by commas that are not inside brackets, let me show you what I mean:
I have this string to parse:
one, two, three, (four, (five, six), (ten)), seven
I would like to get this result:
array(
"one";
"two";
"three";
"(four, (five, six), (ten))";
"seven"
)
but instead I get:
array(
"one";
"two";
"three";
"(four";
"(five";
"six)";
"(ten))";
"seven"
)
How can I do this in PHP RegEx.
Thank you in advance !
回答1:
You can do that easier:
preg_match_all('/[^(,\s]+|\([^)]+\)/', $str, $matches)
But it would be better if you use a real parser. Maybe something like this:
$str = 'one, two, three, (four, (five, six), (ten)), seven';
$buffer = '';
$stack = array();
$depth = 0;
$len = strlen($str);
for ($i=0; $i<$len; $i++) {
$char = $str[$i];
switch ($char) {
case '(':
$depth++;
break;
case ',':
if (!$depth) {
if ($buffer !== '') {
$stack[] = $buffer;
$buffer = '';
}
continue 2;
}
break;
case ' ':
if (!$depth) {
continue 2;
}
break;
case ')':
if ($depth) {
$depth--;
} else {
$stack[] = $buffer.$char;
$buffer = '';
continue 2;
}
break;
}
$buffer .= $char;
}
if ($buffer !== '') {
$stack[] = $buffer;
}
var_dump($stack);
回答2:
Hm... OK already marked as answered, but since you asked for an easy solution I will try nevertheless:
<?php
$test = "one, two, three, , , ,(four, five, six), seven, (eight, nine)";
$split = "/([(].*?[)])|(\w)+/";
preg_match_all($split, $test, $out);
print_r($out[0]);
die();
?>
Output
Array
(
[0] => one
[1] => two
[2] => three
[3] => (four, five, six)
[4] => seven
[5] => (eight, nine)
)
回答3:
You can't, directly. You'd need, at minimum, variable-width lookbehind, and last I knew PHP's PCRE only has fixed-width lookbehind.
My first recommendation would be to first extract parenthesized expressions from the string. I don't know anything about your actual problem, though, so I don't know if that will be feasible.
回答4:
I can't think of a way to do it using a single regex, but it's quite easy to hack together something that works:
function process($data)
{
$entries = array();
$filteredData = $data;
if (preg_match_all("/\(([^)]*)\)/", $data, $matches)) {
$entries = $matches[0];
$filteredData = preg_replace("/\(([^)]*)\)/", "-placeholder-", $data);
}
$arr = array_map("trim", explode(",", $filteredData));
if (!$entries) {
return $arr;
}
$j = 0;
foreach ($arr as $i => $entry) {
if ($entry != "-placeholder-") {
continue;
}
$arr[$i] = $entries[$j];
$j++;
}
return $arr;
}
If you invoke it like this:
$data = "one, two, three, (four, five, six), seven, (eight, nine)";
print_r(process($data));
It outputs:
Array
(
[0] => one
[1] => two
[2] => three
[3] => (four, five, six)
[4] => seven
[5] => (eight, nine)
)
回答5:
Clumsy, but it does the job...
<?php
function split_by_commas($string) {
preg_match_all("/\(.+?\)/", $string, $result);
$problem_children = $result[0];
$i = 0;
$temp = array();
foreach ($problem_children as $submatch) {
$marker = '__'.$i++.'__';
$temp[$marker] = $submatch;
$string = str_replace($submatch, $marker, $string);
}
$result = explode(",", $string);
foreach ($result as $key => $item) {
$item = trim($item);
$result[$key] = isset($temp[$item])?$temp[$item]:$item;
}
return $result;
}
$test = "one, two, three, (four, five, six), seven, (eight, nine), ten";
print_r(split_by_commas($test));
?>
回答6:
I am afraid that it could be very difficult to parse nested brackets like
one, two, (three, (four, five))
only with RegExp.
回答7:
I feel that its worth noting, that you should always avoid regular expressions when you possibly can. To that end, you should know that for PHP 5.3+ you could use str_getcsv(). However, if you're working with files (or file streams), such as CSV files, then the function fgetcsv() might be what you need, and its been available since PHP4.
Lastly, I'm surprised nobody used preg_split(), or did it not work as needed?