发现DNA的Java的超层发现、DNA、Java

2023-09-11 03:56:06 作者:三封情书

我挣扎着有寻找超层的算法。

输入是一组字符串

 字符串A =caagccacctacatca;
字符串B =cgagccatccgtaaagttg;
字符串C =agaacctgctaaatgctaga;
 

结果将正确对齐组字符串(和下一步应该合并)

 串E =CA公司CCA CC TA猫CA;
字符串F =C插科打诨CCAT ccgtaaa摹TT G;
字符串G =阿迦ACC TGC taaatgc吨嘎;
 

感谢您提出的任何建议(我坐在这个任务超过一天)

合并后弦将

  cagagaccatgccgtaaatgcattacga
 
近亲不能结婚,人类祖先又不懂,他们是如何繁衍至今的

超层的这种情况的定义会是这样

该字符串R被包含在超层■如果且仅当在一个串R中的所有字符都present在超层S在的顺序发生在输入序列R。

解决方案我试图(和做一遍它的错误的方式)是:

 公共类Solution4
{
    静态布尔[] []图= NULL;
    静态INT大小= 0;

    公共静态无效的主要(字串[] args)
    {
        字符串A =caagccacctacatca;
        字符串B =cgagccatccgtaaagttg;
        字符串C =agaacctgctaaatgctaga;

        堆栈数据=新的堆栈();
        data.push(A);
        data.push(B);
        data.push(C);


        叠clone1 = data.clone();
        叠clone2 = data.clone();

        INT长度= 26;
        大小= MAX_SIZE(数据);

        的System.out.println(尺寸++长度);
        图=新的布尔[26] [尺寸]

        的char []结果=新的char [大小]

        HashSet的<字符串>块=新的HashSet<字符串>();
        而(!clone1.isEmpty())
        {
            字符串= clone1.pop();

            的char []残渣= make_residue(一);

            的System.out.println(---);
            的System.out.println(旧+一);
            的System.out.println(渣+将String.valueOf(渣));


            的String [] R =将String.valueOf(渣).split();

            的for(int i = 0; I< r.length;我++)
            {
                如果(R [I] .equals())继续;
                //chunks.add(spaces.substring(0,i)+r[i]);
                chunks.add(R [I]);
            }
        }

        对于(字符串块:块)
        {
            的System.out.println(程序块+块);
        }
    }

    静态的char [] make_residue(字符串候选人)
    {
        的char []结果=新的char [大小]
        的for(int i = 0; I< candidate.length();我++)
        {
            INT POS = find_position_for(candidate.charAt(ⅰ),ⅰ);
            对于(INT J =; J< POS; J ++)结果[J] ='';
            如果(POS ==  -  1)结果[candidate.length() -  1] = candidate.charAt(ⅰ);
            别的结果[POS] = candidate.charAt(ⅰ);
        }
        返回结果;
    }

    静态INT find_position_for(CHAR字符,诠释抵消)
    {
        人物 -  =((int)的'A');

        的for(int i =抵消; I<大小;我++)
        {
        //的System.out.println(确认+将String.valueOf((char)的(字符+((int)的'A')))+在+ I);
            如果(!地图[人物] [I])
            {
                图[人物] [我] =真;
                返回我;
            }
        }
        返回-1;
    }

    静态字符串move_right(字符串一,从int)
    {
        返回a.substring(0,距离)++ a.substring(从);
    }


    拍摄静态布尔(INT性格,诠释的位置)
    {返回地图[人物] [位置] }

    静态无效取(CHAR字符,INT位置)
    {
        //System.out.println("taking+将String.valueOf(字符)+在+位置+(char_index  - +(字符 - ((int)的'一个'))+));
        图[人物 - ((int)的'一')] [位置] =真;
    }

    静态INT MAX_SIZE(堆栈堆栈)
    {
        INT最大= 0;
        而(!stack.isEmpty())
        {
            字符串s = stack.pop();
            如果(s.length()>最多)最大= s.length();
        }

        返回最大值;
    }

}
 

解决方案

查找任何常见的超层并不是一个困难的任务:

在您的例子可能的解决办法是这样的:

公共类SuperSequenceTest {

 公共静态无效的主要(字串[] args){
    字符串A =caagccacctacatca;
    字符串B =cgagccatccgtaaagttg;
    字符串C =agaacctgctaaatgctaga;

    INT的iA = 0;
    INT IB = 0;
    INT IC = 0;

    的char [] A = A.toCharArray();
    的char [] B = B.toCharArray();
    的char [] C = C.toCharArray();


    StringBuilder的SB =新的StringBuilder();

    而集团(IA<则为a.length || IB< b.length个|| IC标签; c.length){
        如果(IA<则为a.length和放大器;与Ⅰb< b.length个和放大器;&功放IC标签; c.length和放大器;及(一[IA] == B〔IB])及及(一[IA] == C [k])){
            sb.append(A [IA]);
            的iA +;
            IB ++;
            IC ++;
        }
        否则,如果(IA<则为a.length和放大器;与Ⅰb< b.length个和放大器;&放大器;一个[IA] == B〔IB]){
            sb.append(A [IA]);
            的iA +;
            IB ++;
        }
        否则,如果(IA<则为a.length和放大器;&功放IC标签; c.length和放大器;&放大器;一个[IA] == C [k]){
            sb.append(A [IA]);
            的iA +;
            IC ++;
        }
        否则,如果(IB< b.length个和放大器;&功放IC标签; c.length和放大器;和b的[ib] == C [k]){
            sb.append(B的[ib]);
            IB ++;
            IC ++;
        } 其他 {
            如果(IC标签; c.length){
                sb.append(C [k]);
                IC ++;
            }
            否则,如果(IB< b.length个){
                sb.append(B的[ib]);
                IB ++;
            }否则如果(IA<则为a.length){
                sb.append(A [IA]);
                的iA +;
            }
        }
    }
    的System.out.println(超层+ sb.toString());
}
 

}

但真正要解决的问题是要找到对的最短公共超层序的 http://en.wikipedia.org/wiki/Shortest_common_supersequence , 这是不那么容易。

有很多研究其关注的话题。

请参阅例如:

http://www.csd.uwo.ca/~lila/pdfs/Towards%20a%20DNA%20solution%20to%20the%20Shortest%20Common%20Superstring%20Problem.pdf

http://www.ncbi.nlm.nih.gov/pubmed/14534185

I am struggling with a "find supersequence" algorithm.

The input is for set of strings

String A = "caagccacctacatca";
String B = "cgagccatccgtaaagttg";
String C = "agaacctgctaaatgctaga";

the result would be properly aligned set of strings (and next step should be merge)

String E = "ca ag cca  cc ta    cat  c a";
String F = "c gag ccat ccgtaaa g  tt  g";
String G = " aga acc tgc  taaatgc t a ga";

Thank you for any advice (I am sitting on this task for more than a day)

after merge the superstring would be

cagagaccatgccgtaaatgcattacga

The definition of supersequence in "this case" would be something like

The string R is contained in supersequence S if and only if all characters in a string R are present in supersequence S in the order in which they occur in the input sequence R.

The "solution" i tried (and again its the wrong way of doing it) is:

public class Solution4
{
    static  boolean[][] map = null;
    static int size = 0;

    public static void main(String[] args)
    {
        String A = "caagccacctacatca";
        String B = "cgagccatccgtaaagttg";
        String C = "agaacctgctaaatgctaga";

        Stack data = new Stack();
        data.push(A);
        data.push(B);
        data.push(C);


        Stack clone1 = data.clone();
        Stack clone2 = data.clone();

        int length  =  26;
        size        =  max_size(data);

        System.out.println(size+" "+length);
        map = new boolean[26][size];

        char[] result = new char[size];

        HashSet<String> chunks = new HashSet<String>();
        while(!clone1.isEmpty())
        {
            String a = clone1.pop();

            char[] residue = make_residue(a);

            System.out.println("---");
            System.out.println("OLD     : "+a);
            System.out.println("RESIDUE : "+String.valueOf(residue));


            String[] r = String.valueOf(residue).split(" ");

            for(int i=0; i<r.length; i++)
            {
                if(r[i].equals(" ")) continue;
                //chunks.add(spaces.substring(0,i)+r[i]);
                chunks.add(r[i]);
            }
        }

        for(String chunk : chunks)
        {
            System.out.println("CHUNK   : "+chunk);
        }
    }

    static char[] make_residue(String candidate)
    {
        char[] result = new char[size];
        for(int i=0; i<candidate.length(); i++)
        {
            int pos = find_position_for(candidate.charAt(i),i);
            for(int j=i; j<pos; j++) result[j]=' ';
            if(pos==-1) result[candidate.length()-1] = candidate.charAt(i);
            else        result[pos] = candidate.charAt(i);
        }
        return result;
    }

    static int find_position_for(char character, int offset)
    {
        character-=((int)'a');

        for(int i=offset; i<size; i++)
        {
        //  System.out.println("checking "+String.valueOf((char)(character+((int)'a')))+" at "+i);
            if(!map[character][i])
            {
                map[character][i]=true;
                return i;
            }
        }
        return -1;
    }

    static String move_right(String a, int from)
    {
        return a.substring(0, from)+" "+a.substring(from);  
    }


    static boolean taken(int character, int position)
    { return map[character][position]; }

    static void take(char character, int position)
    {
        //System.out.println("taking "+String.valueOf(character)+" at "+position+" (char_index-"+(character-((int)'a'))+")");
        map[character-((int)'a')][position]=true;
    }

    static int max_size(Stack stack)
    {
        int max=0;
        while(!stack.isEmpty())
        {
            String s = stack.pop();
            if(s.length()>max) max=s.length();
        }

        return max;
    }

}

解决方案

Finding any common supersequence is not a difficult task:

In your example possible solution would be something like:

public class SuperSequenceTest {

public static void main(String[] args) {
    String A = "caagccacctacatca";
    String B = "cgagccatccgtaaagttg";
    String C = "agaacctgctaaatgctaga";

    int iA = 0;
    int iB = 0;
    int iC = 0;

    char[] a = A.toCharArray();
    char[] b = B.toCharArray();
    char[] c = C.toCharArray();


    StringBuilder sb = new StringBuilder();

    while (iA < a.length || iB < b.length || iC < c.length) {
        if (iA < a.length && iB < b.length && iC < c.length && (a[iA] == b[iB]) && (a[iA] == c[iC])) {
            sb.append(a[iA]);
            iA++;
            iB++;
            iC++;
        }
        else if (iA < a.length && iB < b.length && a[iA] == b[iB]) {
            sb.append(a[iA]);
            iA++;
            iB++;
        }
        else if (iA < a.length && iC < c.length && a[iA] == c[iC]) {
            sb.append(a[iA]);
            iA++;
            iC++;
        }
        else if (iB < b.length && iC < c.length && b[iB] == c[iC]) {
            sb.append(b[iB]);
            iB++;
            iC++;
        } else {
            if (iC < c.length) {
                sb.append(c[iC]);
                iC++;
            }
            else if (iB < b.length) {
                sb.append(b[iB]);
                iB++;
            } else if (iA < a.length) {
                sb.append(a[iA]);
                iA++;
            }
        }
    }
    System.out.println("SUPERSEQUENCE " + sb.toString());
}

}

However the real problem to solve is to find the solution for the known problem of Shortest Common Supersequence http://en.wikipedia.org/wiki/Shortest_common_supersequence, which is not that easy.

There is a lot of researches which concern the topic.

See for instance:

http://www.csd.uwo.ca/~lila/pdfs/Towards%20a%20DNA%20solution%20to%20the%20Shortest%20Common%20Superstring%20Problem.pdf

http://www.ncbi.nlm.nih.gov/pubmed/14534185