Detect position of first difference in 2 strings

2020-03-17 06:04发布

问题:

What is the cleanest way of finding the position of the first difference in any two strings in Javascript?

var a = 'in the';
var b = 'in he';
findFirstDiffPos(a, b); // 3

var c = 'in the beginning';
findFirstDiffPos(a, c); // 6 

回答1:

You can simply iterate through your strings and check it character-by-character.

document.body.innerHTML += findFirstDiffPos("in he", "in the") + "<br/>";
document.body.innerHTML += findFirstDiffPos("abcd", "abcde") + "<br/>";
document.body.innerHTML += findFirstDiffPos("zxc", "zxc");

function findFirstDiffPos(a, b)
{
   var shorterLength = Math.min(a.length, b.length);

   for (var i = 0; i < shorterLength; i++)
   {
       if (a[i] !== b[i]) return i;
   }

   if (a.length !== b.length) return shorterLength;

   return -1;
}

The output is 3 4 -1:
3: because strings differ at position 3
4: string abcd is a prefix of abcde, but they are of different length. The 4-th (0-based) character does not exist in string abcd. You can change this logic in accordance with your requirements
-1: strings are equal

Update: As @torazaburo mentioned in comments, the code can be even easier - just make a loop until the Math.max() of their length. It will work because s[i] for i >= s.length will return undefined and condition will result in true.

document.body.innerHTML += findFirstDiffPos("in he", "in the") + "<br/>";
document.body.innerHTML += findFirstDiffPos("abcd", "abcde") + "<br/>";
document.body.innerHTML += findFirstDiffPos("zxc", "zxc");

function findFirstDiffPos(a, b)
{
  var longerLength = Math.max(a.length, b.length);
  for (var i = 0; i < longerLength; i++)
  {
     if (a[i] !== b[i]) return i;
  }

  return -1;
}



回答2:

Looping

The looping approach could be written a bit more succinctly as

function findFirstDiffPos(a, b) {
  var i = 0;
  if (a === b) return -1;
  while (a[i] === b[i]) i++;
  return i;
}

According to jsperf, this alternative is an unsurprising 5-20 times faster than the other ones here.

Array#findIndex

Since we are trying to find the index at which a certain condition holds, this seems like a perfect application for findIndex:

function findFirstDiffPos(a, b) {
  if (a.length < b.length) [a, b] = [b, a];
  return [...a].findIndex((chr, i) => chr !== b[i]);
}

( We need the longer array to be the one we look up into, so we reverse the order if necessary. We use [...a] to convert the string into an array of characters.)

Disclaimer: This is an ES6 interface that you will have to polyfill on IE (but not Edge).

This alternative is a staggering 20 times slower than the straight loop.

Recursion

Just for fun, here is a recursive solution:

function findFirstDiffPos(a, b) {
  return function _iterate([headA, ...tailA], [headB, ...tailB], n) {
    return headA !== headB ? n : headA === undefined) ? -1 : _iterate(tailA, tailB, n+1);
  }(a.split(''), b.split(''), 0);
}

Regexp

Also in the "just for fun" category, a regexp solution. We will construct a regexp of the form /^(a(b(c)?)?)?/ from one string and match it against the other one, and check the length of the match.

function make_regexp(str) {
  var result = '';
  for (var i = str.length-1; i >= 0; i--)
    result = '(' + str[i] + result + ')?';
  return '^' + result;
}

function findFirstDiffPos(a, b) {
  return a === b ? -1 : b.match(make_regexp(a))[0].length;
}

Even if we precompile the regexp, this is still five times slower than a plain old loop.



回答3:

The function can use some ES5 features:

function firstDiff(a, b) {
  var idx;

  // Short ciruit if strings are the same
  if (a == b) return -1;

  // Go until difference found
  a.split('').every(function (c, i) {
    idx = i;
    return c == b[i]; 
  });
  return idx;
}

This will automatically return at the end of the shortest string.

Edit

A bit of code golf leads to the following:

// Concise for loop
function firstDiff(a, b) {
  for (var i=0; i<a.length; i++)
    if (a[i] != b[i]) return i;
  return i<b.length? i : -1;
}

Or using ECMAScript 2015 findIndex:

function firstDiff(a, b) {
  var i = a.split('').findIndex(function(c, i) {return c != b[i]});
  return a == b? -1 : i == -1? a.length : i;
}

But perhaps readability suffers. What is the criterion for selection?

For loop version of torazaburo's while loop effort (it's worthwhile using basic methods because they are usually much faster than iterators and not much more code, if any):

function findFirstDiffPos(a, b) {
  if (a === b) return -1;
  for (var i=0; a[i] == b[i]; i++) {}
  return i;
}


回答4:

For fun, here's a one liner. It's not particularly readable though

const findFirstDiffPos = (a, b) => [a, b].sort((a, b) => b.length - a.length).reduce((a, b) => [...a].findIndex((c, i) => c !== b[i]))