java codility training Genomic-range-query

2020-05-20 05:39发布

The task is:

A non-empty zero-indexed string S is given. String S consists of N characters from the set of upper-case English letters A, C, G, T.

This string actually represents a DNA sequence, and the upper-case letters represent single nucleotides.

You are also given non-empty zero-indexed arrays P and Q consisting of M integers. These arrays represent queries about minimal nucleotides. We represent the letters of string S as integers 1, 2, 3, 4 in arrays P and Q, where A = 1, C = 2, G = 3, T = 4, and we assume that A < C < G < T.

Query K requires you to find the minimal nucleotide from the range (P[K], Q[K]), 0 ≤ P[i] ≤ Q[i] < N.

For example, consider string S = GACACCATA and arrays P, Q such that:

P[0] = 0    Q[0] = 8
P[1] = 0    Q[1] = 2
P[2] = 4    Q[2] = 5
P[3] = 7    Q[3] = 7

The minimal nucleotides from these ranges are as follows:

    (0, 8) is A identified by 1,
    (0, 2) is A identified by 1,
    (4, 5) is C identified by 2,
    (7, 7) is T identified by 4.

Write a function:

class Solution { public int[] solution(String S, int[] P, int[] Q); } 

that, given a non-empty zero-indexed string S consisting of N characters and two non-empty zero-indexed arrays P and Q consisting of M integers, returns an array consisting of M characters specifying the consecutive answers to all queries.

The sequence should be returned as:

    a Results structure (in C), or
    a vector of integers (in C++), or
    a Results record (in Pascal), or
    an array of integers (in any other programming language).

For example, given the string S = GACACCATA and arrays P, Q such that:

P[0] = 0    Q[0] = 8
P[1] = 0    Q[1] = 2
P[2] = 4    Q[2] = 5
P[3] = 7    Q[3] = 7

the function should return the values [1, 1, 2, 4], as explained above.

Assume that:

    N is an integer within the range [1..100,000];
    M is an integer within the range [1..50,000];
    each element of array P, Q is an integer within the range [0..N − 1];
    P[i] ≤ Q[i];
    string S consists only of upper-case English letters A, C, G, T.

Complexity:

    expected worst-case time complexity is O(N+M);
    expected worst-case space complexity is O(N), 
         beyond input storage 
         (not counting the storage required for input arguments).

Elements of input arrays can be modified.

My solution is:

class Solution {
    public int[] solution(String S, int[] P, int[] Q) {
        final  char c[] = S.toCharArray();
        final int answer[] = new int[P.length];
        int tempAnswer;
        char tempC;

        for (int iii = 0; iii < P.length; iii++) {
            tempAnswer = 4;
            for (int zzz = P[iii]; zzz <= Q[iii]; zzz++) {
                tempC = c[zzz];
                if (tempC == 'A') {
                    tempAnswer = 1;
                    break;
                } else if (tempC == 'C') {
                    if (tempAnswer > 2) {
                        tempAnswer = 2;
                    }
                } else if (tempC == 'G') {
                    if (tempAnswer > 3) {
                        tempAnswer = 3;
                    }

                }
            }
            answer[iii] = tempAnswer;
        }

        return answer;
    }
}

It is not optimal, I believe it's supposed to be done within one loop, any hint how can I achieve it?

You can check quality of your solution here https://codility.com/train/ test name is Genomic-range-query.

30条回答
叼着烟拽天下
2楼-- · 2020-05-20 05:50

In case anyone cares about C:

#include <string.h>

struct Results solution(char *S, int P[], int Q[], int M) {    
    int i, a, b, N, *pA, *pC, *pG;
    struct Results result;

    result.A = malloc(sizeof(int) * M);
    result.M = M;

    // calculate prefix sums
    N = strlen(S);
    pA = malloc(sizeof(int) * N);
    pC = malloc(sizeof(int) * N);
    pG = malloc(sizeof(int) * N);
    pA[0] = S[0] == 'A' ? 1 : 0;
    pC[0] = S[0] == 'C' ? 1 : 0;
    pG[0] = S[0] == 'G' ? 1 : 0;
    for (i = 1; i < N; i++) {
        pA[i] = pA[i - 1] + (S[i] == 'A' ? 1 : 0);
        pC[i] = pC[i - 1] + (S[i] == 'C' ? 1 : 0);
        pG[i] = pG[i - 1] + (S[i] == 'G' ? 1 : 0);
    }

    for (i = 0; i < M; i++) {
        a = P[i] - 1;
        b = Q[i];

        if ((pA[b] - pA[a]) > 0) {
            result.A[i] = 1;
        } else if ((pC[b] - pC[a]) > 0) {
            result.A[i] = 2;
        } else if ((pG[b] - pG[a]) > 0) {
            result.A[i] = 3;
        } else {
            result.A[i] = 4;
        }
    }


    return result;
}
查看更多
啃猪蹄的小仙女
3楼-- · 2020-05-20 05:50

Here is my solution Using Segment Tree O(n)+O(log n)+O(M) time

public class DNAseq {


public static void main(String[] args) {
    String S="CAGCCTA";
    int[] P={2, 5, 0};
    int[] Q={4, 5, 6};
    int [] results=solution(S,P,Q);
    System.out.println(results[0]);
}

static class segmentNode{
    int l;
    int r;
    int min;
    segmentNode left;
    segmentNode right;
}



public static segmentNode buildTree(int[] arr,int l,int r){
    if(l==r){
        segmentNode n=new segmentNode();
        n.l=l;
        n.r=r;
        n.min=arr[l];
        return n;
    }
    int mid=l+(r-l)/2;
    segmentNode le=buildTree(arr,l,mid);
    segmentNode re=buildTree(arr,mid+1,r);
    segmentNode root=new segmentNode();
    root.left=le;
    root.right=re;
    root.l=le.l;
    root.r=re.r;

    root.min=Math.min(le.min,re.min);

    return root;
}

public static int getMin(segmentNode root,int l,int r){
    if(root.l>r || root.r<l){
        return Integer.MAX_VALUE;
    }
    if(root.l>=l&& root.r<=r) {
        return root.min;
    }
    return Math.min(getMin(root.left,l,r),getMin(root.right,l,r));
}
public static int[] solution(String S, int[] P, int[] Q) {
    int[] arr=new int[S.length()];
    for(int i=0;i<S.length();i++){
        switch (S.charAt(i)) {
        case 'A':
            arr[i]=1;
            break;
        case 'C':
            arr[i]=2;
            break;
        case 'G':
            arr[i]=3;
            break;
        case 'T':
            arr[i]=4;
            break;
        default:
            break;
        }
    }

    segmentNode root=buildTree(arr,0,S.length()-1);
    int[] result=new int[P.length];
    for(int i=0;i<P.length;i++){
        result[i]=getMin(root,P[i],Q[i]);
    }
    return result;
} }
查看更多
贼婆χ
4楼-- · 2020-05-20 05:50

scala solution 100/100

import scala.annotation.switch
import scala.collection.mutable

object Solution {
  def solution(s: String, p: Array[Int], q: Array[Int]): Array[Int] = {

    val n = s.length

    def arr = mutable.ArrayBuffer.fill(n + 1)(0L)

    val a = arr
    val c = arr
    val g = arr
    val t = arr

    for (i <- 1 to n) {
      def inc(z: mutable.ArrayBuffer[Long]): Unit = z(i) = z(i - 1) + 1L

      def shift(z: mutable.ArrayBuffer[Long]): Unit = z(i) = z(i - 1)

      val char = s(i - 1)
      (char: @switch) match {
        case 'A' => inc(a); shift(c); shift(g); shift(t);
        case 'C' => shift(a); inc(c); shift(g); shift(t);
        case 'G' => shift(a); shift(c); inc(g); shift(t);
        case 'T' => shift(a); shift(c); shift(g); inc(t);
      }
    }

    val r = mutable.ArrayBuffer.fill(p.length)(0)

    for (i <- p.indices) {
      val start = p(i)
      val end = q(i) + 1
      r(i) =
        if (a(start) != a(end)) 1
        else if (c(start) != c(end)) 2
        else if (g(start) != g(end)) 3
        else if (t(start) != t(end)) 4
        else 0
    }

    r.toArray
  }
}
查看更多
Lonely孤独者°
5楼-- · 2020-05-20 05:53

I think I'm using dynamic programming. Here is my solution. Little space. Code is mot really clean, just show my idea.

class Solution {
public int[] solution(String S, int[] P, int[] Q) {
    int[] preDominator = new int[S.length()];
    int A = -1;
    int C = -1;
    int G = -1;
    int T = -1;

    for (int i = 0; i < S.length(); i++) {
        char c = S.charAt(i);
        if (c == 'A') { 
            A = i;
            preDominator[i] = -1;
        } else if (c == 'C') {
            C = i;
            preDominator[i] = A;
        } else if (c == 'G') {
            G = i;
            preDominator[i] = Math.max(A, C);
        } else {
            T = i;
            preDominator[i] = Math.max(Math.max(A, C), G);
        }
    }

    int N = preDominator.length;
    int M = Q.length;
    int[] result = new int[M];
    for (int i = 0; i < M; i++) {
        int p = P[i];
        int q = Math.min(N, Q[i]);
        for (int j = q;;) {
            if (preDominator[j] < p) {
                char c = S.charAt(j);
                if (c == 'A') {
                    result[i] = 1;
                } else if (c == 'C') {
                    result[i] = 2;
                } else if (c == 'G') {
                    result[i] = 3;
                } else {
                    result[i] = 4;
                }
                break;
            }
            j = preDominator[j];
        }
    }
    return result;
}

}

查看更多
forever°为你锁心
6楼-- · 2020-05-20 05:54

Here's my Java (100/100) Solution:

class Solution {
    private ImpactFactorHolder[] mHolder;
    private static final int A=0,C=1,G=2,T=3;

    public int[] solution(String S, int[] P, int[] Q) { 
        mHolder = createImpactHolderArray(S);

        int queriesLength = P.length;
        int[] result = new int[queriesLength];

        for (int i = 0; i < queriesLength; ++i ) {
            int value = 0;
            if( P[i] == Q[i]) {
              value = lookupValueForIndex(S.charAt(P[i])) + 1;
            } else {
             value = calculateMinImpactFactor(P[i], Q[i]);
            }
            result[i] = value;
        }
        return result;    

    }

    public int calculateMinImpactFactor(int P, int Q) {
        int minImpactFactor = 3;

        for (int nucleotide = A; nucleotide <= T; ++nucleotide ) {
            int qValue = mHolder[nucleotide].mOcurrencesSum[Q];
            int pValue = mHolder[nucleotide].mOcurrencesSum[P];
            // handling special cases when the less value is assigned on the P index
            if( P-1 >= 0 ) {
                pValue = mHolder[nucleotide].mOcurrencesSum[P-1] == 0 ? 0 : pValue;
            } else if ( P == 0 ) {
                pValue = mHolder[nucleotide].mOcurrencesSum[P] == 1 ? 0 : pValue;
            }

            if ( qValue - pValue > 0) {
                minImpactFactor = nucleotide;
                break;
            } 
        }        
        return minImpactFactor + 1;
    } 

    public int lookupValueForIndex(char nucleotide) {
        int value = 0;
        switch (nucleotide) {
            case 'A' :
                    value = A;
                    break;
                case 'C' :
                    value = C;
                    break;
                case 'G':
                   value = G;
                    break;
                case 'T':
                    value = T;
                    break;
                default:                    
                    break;
        }
        return value;
    }

    public ImpactFactorHolder[] createImpactHolderArray(String S) {
        int length = S.length();
        ImpactFactorHolder[] holder = new ImpactFactorHolder[4];
        holder[A] = new ImpactFactorHolder(1,'A', length);
        holder[C] = new ImpactFactorHolder(2,'C', length);
        holder[G] = new ImpactFactorHolder(3,'G', length);
        holder[T] = new ImpactFactorHolder(4,'T', length);
        int i =0;
        for(char c : S.toCharArray()) {
            int nucleotide = lookupValueForIndex(c);
            ++holder[nucleotide].mAcum;
            holder[nucleotide].mOcurrencesSum[i] = holder[nucleotide].mAcum;  
            holder[A].mOcurrencesSum[i] = holder[A].mAcum;
            holder[C].mOcurrencesSum[i] = holder[C].mAcum;
            holder[G].mOcurrencesSum[i] = holder[G].mAcum;
            holder[T].mOcurrencesSum[i] = holder[T].mAcum;
            ++i;
        }

        return holder;
    }

    private static class ImpactFactorHolder {
        public ImpactFactorHolder(int impactFactor, char nucleotide, int length) {
            mImpactFactor = impactFactor;
            mNucleotide = nucleotide;
            mOcurrencesSum = new int[length];
            mAcum = 0;
        }
        int mImpactFactor;
        char mNucleotide;
        int[] mOcurrencesSum;
        int mAcum;
    }
}

Link: https://codility.com/demo/results/demoJFB5EV-EG8/ I'm looking forward to implement a Segment Tree similar to @Abhishek Kumar solution

查看更多
戒情不戒烟
7楼-- · 2020-05-20 05:55

In ruby (100/100)

def interval_sum x,y,p
    p[y+1] - p[x]
end

def solution(s,p,q)

    #Hash of arrays with prefix sums

    p_sums = {}
    respuesta = []


    %w(A C G T).each do |letter|
        p_sums[letter] = Array.new s.size+1, 0
    end 

    (0...s.size).each do |count|
        %w(A C G T).each do |letter|
            p_sums[letter][count+1] = p_sums[letter][count] 
        end if count > 0

        case s[count]
        when 'A'
            p_sums['A'][count+1] += 1
        when 'C'
            p_sums['C'][count+1] += 1
        when 'G'
            p_sums['G'][count+1] += 1
        when 'T'
            p_sums['T'][count+1] += 1
        end 

    end




    (0...p.size).each do |count|


        x = p[count]
        y = q[count]


        if interval_sum(x, y, p_sums['A']) > 0 then
            respuesta << 1
            next
        end 

        if interval_sum(x, y, p_sums['C']) > 0 then
            respuesta << 2
            next
        end 

        if interval_sum(x, y, p_sums['G']) > 0 then
            respuesta << 3
            next
        end 

        if interval_sum(x, y, p_sums['T']) > 0 then
            respuesta << 4
            next
        end 

    end

    respuesta

end
查看更多
登录 后发表回答