Maximum product prefix string

2020-02-26 11:13发布

问题:

The following is a demo question from a coding interview site called codility:

A prefix of a string S is any leading contiguous part of S. For example, "c" and "cod" are prefixes of the string "codility". For simplicity, we require prefixes to be non-empty.

The product of prefix P of string S is the number of occurrences of P multiplied by the length of P. More precisely, if prefix P consists of K characters and P occurs exactly T times in S, then the product equals K * T.

For example, S = "abababa" has the following prefixes:

  • "a", whose product equals 1 * 4 = 4,
  • "ab", whose product equals 2 * 3 = 6,
  • "aba", whose product equals 3 * 3 = 9,
  • "abab", whose product equals 4 * 2 = 8,
  • "ababa", whose product equals 5 * 2 = 10,
  • "ababab", whose product equals 6 * 1 = 6,
  • "abababa", whose product equals 7 * 1 = 7.

The longest prefix is identical to the original string. The goal is to choose such a prefix as maximizes the value of the product. In above example the maximal product is 10.

Below is my poor solution in Java requiring O(N^2) time. It is apparently possible to do this in O(N). I was thinking Kadanes algorithm. But I can't think of any way that I can encode some information at each step that lets me find the running max. Can any one think of an O(N) algorithm for this?

import java.util.HashMap;

class Solution {
    public int solution(String S) {
        int N = S.length();
        if(N<1 || N>300000){
            System.out.println("Invalid length");
            return(-1);
        }
        HashMap<String,Integer> prefixes = new HashMap<String,Integer>();
        for(int i=0; i<N; i++){
            String keystr = "";
            for(int j=i; j>=0; j--) {
                keystr += S.charAt(j);
                if(!prefixes.containsKey(keystr))
                    prefixes.put(keystr,keystr.length());
                else{
                    int newval = prefixes.get(keystr)+keystr.length();
                    if(newval > 1000000000)return 1000000000;
                    prefixes.put(keystr,newval);
                }
            }
        }
        int maax1 = 0;
        for(int val : prefixes.values())
            if(val>maax1)
                maax1 = val;
        return maax1;
    }
}

回答1:

Here's a O(n log n) version based on suffix arrays. There are O(n) construction algorithms for suffix arrays, I just don't have the patience to code them.

Example output (this output isn't O(n), but it's only to show that we can indeed compute all the scores):

4*1 a
3*3 aba
2*5 ababa
1*7 abababa
3*2 ab
2*4 abab
1*6 ababab

Basically you have to reverse the string, and compute the suffix array (SA) and the longest common prefix (LCP).

Then you have traverse the SA array backwards looking for LCPs that match the entire suffix (prefix in the original string). If there's a match, increment the counter, otherwise reset it to 1. Each suffix (prefix) receive a "score" (SCR) that corresponds to the number of times it appears in the original string.

#include <iostream>
#include <cstring>
#include <string>
#define MAX 10050
using namespace std;

int RA[MAX], tempRA[MAX];
int SA[MAX], tempSA[MAX];
int C[MAX];                
int Phi[MAX], PLCP[MAX], LCP[MAX];

int SCR[MAX];

void suffix_sort(int n, int k) {
    memset(C, 0, sizeof C);        

    for (int i = 0; i < n; i++)        
        C[i + k < n ? RA[i + k] : 0]++;

    int sum = 0;
    for (int i = 0; i < max(256, n); i++) {                     
        int t = C[i]; 
        C[i] = sum; 
        sum += t;
    }

    for (int i = 0; i < n; i++)        
        tempSA[C[SA[i] + k < n ? RA[SA[i] + k] : 0]++] = SA[i];

    memcpy(SA, tempSA, n*sizeof(int));
}

void suffix_array(string &s) {             
    int n = s.size();

    for (int i = 0; i < n; i++) 
        RA[i] = s[i] - 1;              

    for (int i = 0; i < n; i++) 
        SA[i] = i;

    for (int k = 1; k < n; k *= 2) {     
        suffix_sort(n, k);
        suffix_sort(n, 0);

        int r = tempRA[SA[0]] = 0;
        for (int i = 1; i < n; i++) {
            int s1 = SA[i], s2 = SA[i-1];
            bool equal = true;
            equal &= RA[s1] == RA[s2];
            equal &= RA[s1+k] == RA[s2+k];

            tempRA[SA[i]] = equal ? r : ++r;     
        }

        memcpy(RA, tempRA, n*sizeof(int));
    } 
}

void lcp(string &s) {
    int n = s.size();

    Phi[SA[0]] = -1;         
    for (int i = 1; i < n; i++)  
        Phi[SA[i]] = SA[i-1];  

    int L = 0;
    for (int i = 0; i < n; i++) {
        if (Phi[i] == -1) { 
            PLCP[i] = 0; 
            continue; 
        }
        while (s[i + L] == s[Phi[i] + L]) 
            L++;

        PLCP[i] = L;
        L = max(L-1, 0);                      
    }

    for (int i = 1; i < n; i++)                 
        LCP[i] = PLCP[SA[i]];
}

void score(string &s) {
    SCR[s.size()-1] = 1;

    int sum = 1;
    for (int i=s.size()-2; i>=0; i--) {
        if (LCP[i+1] < s.size()-SA[i]-1) {
            sum = 1;
        } else {
            sum++; 
        }
        SCR[i] = sum;
    }
}

int main() {
    string s = "abababa";
    s = string(s.rbegin(), s.rend()) +".";

    suffix_array(s);
    lcp(s);
    score(s);

    for(int i=0; i<s.size(); i++) {
        string ns = s.substr(SA[i], s.size()-SA[i]-1);
        ns = string(ns.rbegin(), ns.rend());
        cout << SCR[i] << "*" << ns.size() << " " << ns << endl;
    }
}

Most of this code (specially the suffix array and LCP implementations) I have been using for some years in contests. This version in special I adapted from this one I wrote some years ago.



回答2:

public class Main {
    public static void main(String[] args) {
        String input = "abababa";
        String prefix;
        int product;
        int maxProduct = 0;
        for (int i = 1; i <= input.length(); i++) {
            prefix = input.substring(0, i);
            String substr;
            int occurs = 0;
            for (int j = prefix.length(); j <= input.length(); j++) {
                substr = input.substring(0, j);
                if (substr.endsWith(prefix))
                    occurs++;
            }
            product = occurs*prefix.length();
            System.out.println("product of " + prefix + " = " +
                prefix.length() + " * " + occurs +" = " + product);
            maxProduct = (product > maxProduct)?product:maxProduct;
        }
        System.out.println("maxProduct = " + maxProduct);
    }
}


回答3:

I was working on this challenge for more than 4 days , reading a lot of documentation, I found a solution with O(N) .

I got 81%, the idea is simple using a window slide.

def solution(s: String): Int = {

var max = s.length  // length of the string 
var i, j = 1  // start with i=j=1 ( is the beginning of the slide and j the end of the slide )
val len = s.length // the length of the string 
val count = Array.ofDim[Int](len)  // to store intermediate results 

while (i < len - 1 || j < len) {
  if (i < len && s(0) != s(i)) {
    while (i < len && s(0) != s(i)) {  // if the begin of the slide is different from 
                                      // the first letter of the string skip it 
      i = i + 1
    }
  }
  j = i + 1
  var k = 1


  while (j < len && s(j).equals(s(k))) { // check for equality and update the array count 
    if (count(k) == 0) {
      count(k) = 1
    }
    count(k) = count(k) + 1
    max = math.max((k + 1) * count(k), max)
    k = k + 1
    j = j + 1
  }

  i = i + 1

}

max // return the max 

}