发现DNA的Java的超层发现、DNA、Java

2023-09-11 03:56:06 作者：三封情书

我挣扎着有寻找超层的算法。

输入是一组字符串

 字符串A =caagccacctacatca;
字符串B =cgagccatccgtaaagttg;
字符串C =agaacctgctaaatgctaga;

结果将正确对齐组字符串（和下一步应该合并）

 串E =CA公司CCA CC TA猫CA;
字符串F =C插科打诨CCAT ccgtaaa摹TT G;
字符串G =阿迦ACC TGC taaatgc吨嘎;

感谢您提出的任何建议（我坐在这个任务超过一天）

合并后弦将

  cagagaccatgccgtaaatgcattacga

超层的这种情况的定义会是这样

该字符串R被包含在超层■如果且仅当在一个串R中的所有字符都present在超层S在的顺序发生在输入序列R。

解决方案我试图（和做一遍它的错误的方式）是：

 公共类Solution4
{
    静态布尔[] []图= NULL;
    静态INT大小= 0;

    公共静态无效的主要（字串[] args）
    {
        字符串A =caagccacctacatca;
        字符串B =cgagccatccgtaaagttg;
        字符串C =agaacctgctaaatgctaga;

        堆栈数据=新的堆栈（）;
        data.push（A）;
        data.push（B）;
        data.push（C）;


        叠clone1 = data.clone（）;
        叠clone2 = data.clone（）;

        INT长度= 26;
        大小= MAX_SIZE（数据）;

        的System.out.println（尺寸++长度）;
        图=新的布尔[26] [尺寸]

        的char []结果=新的char [大小]

        HashSet的＆LT;字符串＆GT;块=新的HashSet＆LT;字符串＆GT;（）;
        而（！clone1.isEmpty（））
        {
            字符串= clone1.pop（）;

            的char []残渣= make_residue（一）;

            的System.out.println（---）;
            的System.out.println（旧+一）;
            的System.out.println（渣+将String.valueOf（渣））;


            的String [] R =将String.valueOf（渣）.split（）;

            的for（int i = 0; I＆LT; r.length;我++）
            {
                如果（R [I] .equals（））继续;
                //chunks.add(spaces.substring(0,i)+r[i]）;
                chunks.add（R [I]）;
            }
        }

        对于（字符串块：块）
        {
            的System.out.println（程序块+块）;
        }
    }

    静态的char [] make_residue（字符串候选人）
    {
        的char []结果=新的char [大小]
        的for（int i = 0; I＆LT; candidate.length（）;我++）
        {
            INT POS = find_position_for（candidate.charAt（ⅰ），ⅰ）;
            对于（INT J =; J＆LT; POS; J ++）结果[J] ='';
            如果（POS ==  -  1）结果[candidate.length（） -  1] = candidate.charAt（ⅰ）;
            别的结果[POS] = candidate.charAt（ⅰ）;
        }
        返回结果;
    }

    静态INT find_position_for（CHAR字符，诠释抵消）
    {
        人物 -  =（（int）的'A'）;

        的for（int i =抵消; I＆LT;大小;我++）
        {
        //的System.out.println（确认+将String.valueOf（（char）的（字符+（（int）的'A'）））+在+ I）;
            如果（！地图[人物] [I]）
            {
                图[人物] [我] =真;
                返回我;
            }
        }
        返回-1;
    }

    静态字符串move_right（字符串一，从int）
    {
        返回a.substring（0，距离）++ a.substring（从）;
    }


    拍摄静态布尔（INT性格，诠释的位置）
    {返回地图[人物] [位置] }

    静态无效取（CHAR字符，INT位置）
    {
        //System.out.println("taking+将String.valueOf（字符）+在+位置+（char_index  - +（字符 - （（int）的'一个'））+））;
        图[人物 - （（int）的'一'）] [位置] =真;
    }

    静态INT MAX_SIZE（堆栈堆栈）
    {
        INT最大= 0;
        而（！stack.isEmpty（））
        {
            字符串s = stack.pop（）;
            如果（s.length（）＆GT;最多）最大= s.length（）;
        }

        返回最大值;
    }

}

解决方案

查找任何常见的超层并不是一个困难的任务：

在您的例子可能的解决办法是这样的：

公共类SuperSequenceTest {

 公共静态无效的主要（字串[] args）{
    字符串A =caagccacctacatca;
    字符串B =cgagccatccgtaaagttg;
    字符串C =agaacctgctaaatgctaga;

    INT的iA = 0;
    INT IB = 0;
    INT IC = 0;

    的char [] A = A.toCharArray（）;
    的char [] B = B.toCharArray（）;
    的char [] C = C.toCharArray（）;


    StringBuilder的SB =新的StringBuilder（）;

    而集团（IA＆LT;则为a.length || IB＆LT; b.length个|| IC标签; c.length）{
        如果（IA＆LT;则为a.length和放大器;与Ⅰb＆LT; b.length个和放大器;＆功放IC标签; c.length和放大器;及（一[IA] == B〔IB]）及及（一[IA] == C [k]））{
            sb.append（A [IA]）;
            的iA +;
            IB ++;
            IC ++;
        }
        否则，如果（IA＆LT;则为a.length和放大器;与Ⅰb＆LT; b.length个和放大器;＆放大器;一个[IA] == B〔IB]）{
            sb.append（A [IA]）;
            的iA +;
            IB ++;
        }
        否则，如果（IA＆LT;则为a.length和放大器;＆功放IC标签; c.length和放大器;＆放大器;一个[IA] == C [k]）{
            sb.append（A [IA]）;
            的iA +;
            IC ++;
        }
        否则，如果（IB＆LT; b.length个和放大器;＆功放IC标签; c.length和放大器;和b的[ib] == C [k]）{
            sb.append（B的[ib]）;
            IB ++;
            IC ++;
        } 其他 {
            如果（IC标签; c.length）{
                sb.append（C [k]）;
                IC ++;
            }
            否则，如果（IB＆LT; b.length个）{
                sb.append（B的[ib]）;
                IB ++;
            }否则如果（IA＆LT;则为a.length）{
                sb.append（A [IA]）;
                的iA +;
            }
        }
    }
    的System.out.println（超层+ sb.toString（））;
}

}

但真正要解决的问题是要找到对的最短公共超层序的 http://en.wikipedia.org/wiki/Shortest_common_supersequence ，这是不那么容易。

有很多研究其关注的话题。

请参阅例如：

http://www.csd.uwo.ca/~lila/pdfs/Towards%20a%20DNA%20solution%20to%20the%20Shortest%20Common%20Superstring%20Problem.pdf

http://www.ncbi.nlm.nih.gov/pubmed/14534185

I am struggling with a "find supersequence" algorithm.

The input is for set of strings

String A = "caagccacctacatca";
String B = "cgagccatccgtaaagttg";
String C = "agaacctgctaaatgctaga";

the result would be properly aligned set of strings (and next step should be merge)

String E = "ca ag cca  cc ta    cat  c a";
String F = "c gag ccat ccgtaaa g  tt  g";
String G = " aga acc tgc  taaatgc t a ga";

Thank you for any advice (I am sitting on this task for more than a day)

after merge the superstring would be

cagagaccatgccgtaaatgcattacga

The definition of supersequence in "this case" would be something like

The string R is contained in supersequence S if and only if all characters in a string R are present in supersequence S in the order in which they occur in the input sequence R.

The "solution" i tried (and again its the wrong way of doing it) is:

public class Solution4
{
    static  boolean[][] map = null;
    static int size = 0;

    public static void main(String[] args)
    {
        String A = "caagccacctacatca";
        String B = "cgagccatccgtaaagttg";
        String C = "agaacctgctaaatgctaga";

        Stack data = new Stack();
        data.push(A);
        data.push(B);
        data.push(C);


        Stack clone1 = data.clone();
        Stack clone2 = data.clone();

        int length  =  26;
        size        =  max_size(data);

        System.out.println(size+" "+length);
        map = new boolean[26][size];

        char[] result = new char[size];

        HashSet<String> chunks = new HashSet<String>();
        while(!clone1.isEmpty())
        {
            String a = clone1.pop();

            char[] residue = make_residue(a);

            System.out.println("---");
            System.out.println("OLD     : "+a);
            System.out.println("RESIDUE : "+String.valueOf(residue));


            String[] r = String.valueOf(residue).split(" ");

            for(int i=0; i<r.length; i++)
            {
                if(r[i].equals(" ")) continue;
                //chunks.add(spaces.substring(0,i)+r[i]);
                chunks.add(r[i]);
            }
        }

        for(String chunk : chunks)
        {
            System.out.println("CHUNK   : "+chunk);
        }
    }

    static char[] make_residue(String candidate)
    {
        char[] result = new char[size];
        for(int i=0; i<candidate.length(); i++)
        {
            int pos = find_position_for(candidate.charAt(i),i);
            for(int j=i; j<pos; j++) result[j]=' ';
            if(pos==-1) result[candidate.length()-1] = candidate.charAt(i);
            else        result[pos] = candidate.charAt(i);
        }
        return result;
    }

    static int find_position_for(char character, int offset)
    {
        character-=((int)'a');

        for(int i=offset; i<size; i++)
        {
        //  System.out.println("checking "+String.valueOf((char)(character+((int)'a')))+" at "+i);
            if(!map[character][i])
            {
                map[character][i]=true;
                return i;
            }
        }
        return -1;
    }

    static String move_right(String a, int from)
    {
        return a.substring(0, from)+" "+a.substring(from);  
    }


    static boolean taken(int character, int position)
    { return map[character][position]; }

    static void take(char character, int position)
    {
        //System.out.println("taking "+String.valueOf(character)+" at "+position+" (char_index-"+(character-((int)'a'))+")");
        map[character-((int)'a')][position]=true;
    }

    static int max_size(Stack stack)
    {
        int max=0;
        while(!stack.isEmpty())
        {
            String s = stack.pop();
            if(s.length()>max) max=s.length();
        }

        return max;
    }

}

解决方案

Finding any common supersequence is not a difficult task:

In your example possible solution would be something like:

public class SuperSequenceTest {

public static void main(String[] args) {
    String A = "caagccacctacatca";
    String B = "cgagccatccgtaaagttg";
    String C = "agaacctgctaaatgctaga";

    int iA = 0;
    int iB = 0;
    int iC = 0;

    char[] a = A.toCharArray();
    char[] b = B.toCharArray();
    char[] c = C.toCharArray();


    StringBuilder sb = new StringBuilder();

    while (iA < a.length || iB < b.length || iC < c.length) {
        if (iA < a.length && iB < b.length && iC < c.length && (a[iA] == b[iB]) && (a[iA] == c[iC])) {
            sb.append(a[iA]);
            iA++;
            iB++;
            iC++;
        }
        else if (iA < a.length && iB < b.length && a[iA] == b[iB]) {
            sb.append(a[iA]);
            iA++;
            iB++;
        }
        else if (iA < a.length && iC < c.length && a[iA] == c[iC]) {
            sb.append(a[iA]);
            iA++;
            iC++;
        }
        else if (iB < b.length && iC < c.length && b[iB] == c[iC]) {
            sb.append(b[iB]);
            iB++;
            iC++;
        } else {
            if (iC < c.length) {
                sb.append(c[iC]);
                iC++;
            }
            else if (iB < b.length) {
                sb.append(b[iB]);
                iB++;
            } else if (iA < a.length) {
                sb.append(a[iA]);
                iA++;
            }
        }
    }
    System.out.println("SUPERSEQUENCE " + sb.toString());
}

}

However the real problem to solve is to find the solution for the known problem of Shortest Common Supersequence http://en.wikipedia.org/wiki/Shortest_common_supersequence, which is not that easy.

There is a lot of researches which concern the topic.

See for instance:

http://www.csd.uwo.ca/~lila/pdfs/Towards%20a%20DNA%20solution%20to%20the%20Shortest%20Common%20Superstring%20Problem.pdf

http://www.ncbi.nlm.nih.gov/pubmed/14534185

上一篇：宝石迷阵板代迷阵

下一篇：鲜明的回文子串数回文、鲜明、子串数

相关推荐