我挣扎着有寻找超层的算法。
输入是一组字符串
字符串A =caagccacctacatca;
字符串B =cgagccatccgtaaagttg;
字符串C =agaacctgctaaatgctaga;
结果将正确对齐组字符串(和下一步应该合并)
串E =CA公司CCA CC TA猫CA;
字符串F =C插科打诨CCAT ccgtaaa摹TT G;
字符串G =阿迦ACC TGC taaatgc吨嘎;
感谢您提出的任何建议(我坐在这个任务超过一天)
合并后弦将
cagagaccatgccgtaaatgcattacga
超层的这种情况的定义会是这样
该字符串R被包含在超层■如果且仅当在一个串R中的所有字符都present在超层S在的顺序发生在输入序列R。
解决方案我试图(和做一遍它的错误的方式)是:
公共类Solution4
{
静态布尔[] []图= NULL;
静态INT大小= 0;
公共静态无效的主要(字串[] args)
{
字符串A =caagccacctacatca;
字符串B =cgagccatccgtaaagttg;
字符串C =agaacctgctaaatgctaga;
堆栈数据=新的堆栈();
data.push(A);
data.push(B);
data.push(C);
叠clone1 = data.clone();
叠clone2 = data.clone();
INT长度= 26;
大小= MAX_SIZE(数据);
的System.out.println(尺寸++长度);
图=新的布尔[26] [尺寸]
的char []结果=新的char [大小]
HashSet的<字符串>块=新的HashSet<字符串>();
而(!clone1.isEmpty())
{
字符串= clone1.pop();
的char []残渣= make_residue(一);
的System.out.println(---);
的System.out.println(旧+一);
的System.out.println(渣+将String.valueOf(渣));
的String [] R =将String.valueOf(渣).split();
的for(int i = 0; I< r.length;我++)
{
如果(R [I] .equals())继续;
//chunks.add(spaces.substring(0,i)+r[i]);
chunks.add(R [I]);
}
}
对于(字符串块:块)
{
的System.out.println(程序块+块);
}
}
静态的char [] make_residue(字符串候选人)
{
的char []结果=新的char [大小]
的for(int i = 0; I< candidate.length();我++)
{
INT POS = find_position_for(candidate.charAt(ⅰ),ⅰ);
对于(INT J =; J< POS; J ++)结果[J] ='';
如果(POS == - 1)结果[candidate.length() - 1] = candidate.charAt(ⅰ);
别的结果[POS] = candidate.charAt(ⅰ);
}
返回结果;
}
静态INT find_position_for(CHAR字符,诠释抵消)
{
人物 - =((int)的'A');
的for(int i =抵消; I<大小;我++)
{
//的System.out.println(确认+将String.valueOf((char)的(字符+((int)的'A')))+在+ I);
如果(!地图[人物] [I])
{
图[人物] [我] =真;
返回我;
}
}
返回-1;
}
静态字符串move_right(字符串一,从int)
{
返回a.substring(0,距离)++ a.substring(从);
}
拍摄静态布尔(INT性格,诠释的位置)
{返回地图[人物] [位置] }
静态无效取(CHAR字符,INT位置)
{
//System.out.println("taking+将String.valueOf(字符)+在+位置+(char_index - +(字符 - ((int)的'一个'))+));
图[人物 - ((int)的'一')] [位置] =真;
}
静态INT MAX_SIZE(堆栈堆栈)
{
INT最大= 0;
而(!stack.isEmpty())
{
字符串s = stack.pop();
如果(s.length()>最多)最大= s.length();
}
返回最大值;
}
}
解决方案
查找任何常见的超层并不是一个困难的任务:
在您的例子可能的解决办法是这样的:
公共类SuperSequenceTest {
公共静态无效的主要(字串[] args){
字符串A =caagccacctacatca;
字符串B =cgagccatccgtaaagttg;
字符串C =agaacctgctaaatgctaga;
INT的iA = 0;
INT IB = 0;
INT IC = 0;
的char [] A = A.toCharArray();
的char [] B = B.toCharArray();
的char [] C = C.toCharArray();
StringBuilder的SB =新的StringBuilder();
而集团(IA<则为a.length || IB< b.length个|| IC标签; c.length){
如果(IA<则为a.length和放大器;与Ⅰb< b.length个和放大器;&功放IC标签; c.length和放大器;及(一[IA] == B〔IB])及及(一[IA] == C [k])){
sb.append(A [IA]);
的iA +;
IB ++;
IC ++;
}
否则,如果(IA<则为a.length和放大器;与Ⅰb< b.length个和放大器;&放大器;一个[IA] == B〔IB]){
sb.append(A [IA]);
的iA +;
IB ++;
}
否则,如果(IA<则为a.length和放大器;&功放IC标签; c.length和放大器;&放大器;一个[IA] == C [k]){
sb.append(A [IA]);
的iA +;
IC ++;
}
否则,如果(IB< b.length个和放大器;&功放IC标签; c.length和放大器;和b的[ib] == C [k]){
sb.append(B的[ib]);
IB ++;
IC ++;
} 其他 {
如果(IC标签; c.length){
sb.append(C [k]);
IC ++;
}
否则,如果(IB< b.length个){
sb.append(B的[ib]);
IB ++;
}否则如果(IA<则为a.length){
sb.append(A [IA]);
的iA +;
}
}
}
的System.out.println(超层+ sb.toString());
}
}
但真正要解决的问题是要找到对的最短公共超层序的 http://en.wikipedia.org/wiki/Shortest_common_supersequence , 这是不那么容易。
有很多研究其关注的话题。
请参阅例如:
http://www.csd.uwo.ca/~lila/pdfs/Towards%20a%20DNA%20solution%20to%20the%20Shortest%20Common%20Superstring%20Problem.pdf
http://www.ncbi.nlm.nih.gov/pubmed/14534185
I am struggling with a "find supersequence" algorithm.
The input is for set of strings
String A = "caagccacctacatca";
String B = "cgagccatccgtaaagttg";
String C = "agaacctgctaaatgctaga";
the result would be properly aligned set of strings (and next step should be merge)
String E = "ca ag cca cc ta cat c a";
String F = "c gag ccat ccgtaaa g tt g";
String G = " aga acc tgc taaatgc t a ga";
Thank you for any advice (I am sitting on this task for more than a day)
after merge the superstring would be
cagagaccatgccgtaaatgcattacga
The definition of supersequence in "this case" would be something like
The string R is contained in supersequence S if and only if all characters in a string R are present in supersequence S in the order in which they occur in the input sequence R.
The "solution" i tried (and again its the wrong way of doing it) is:
public class Solution4
{
static boolean[][] map = null;
static int size = 0;
public static void main(String[] args)
{
String A = "caagccacctacatca";
String B = "cgagccatccgtaaagttg";
String C = "agaacctgctaaatgctaga";
Stack data = new Stack();
data.push(A);
data.push(B);
data.push(C);
Stack clone1 = data.clone();
Stack clone2 = data.clone();
int length = 26;
size = max_size(data);
System.out.println(size+" "+length);
map = new boolean[26][size];
char[] result = new char[size];
HashSet<String> chunks = new HashSet<String>();
while(!clone1.isEmpty())
{
String a = clone1.pop();
char[] residue = make_residue(a);
System.out.println("---");
System.out.println("OLD : "+a);
System.out.println("RESIDUE : "+String.valueOf(residue));
String[] r = String.valueOf(residue).split(" ");
for(int i=0; i<r.length; i++)
{
if(r[i].equals(" ")) continue;
//chunks.add(spaces.substring(0,i)+r[i]);
chunks.add(r[i]);
}
}
for(String chunk : chunks)
{
System.out.println("CHUNK : "+chunk);
}
}
static char[] make_residue(String candidate)
{
char[] result = new char[size];
for(int i=0; i<candidate.length(); i++)
{
int pos = find_position_for(candidate.charAt(i),i);
for(int j=i; j<pos; j++) result[j]=' ';
if(pos==-1) result[candidate.length()-1] = candidate.charAt(i);
else result[pos] = candidate.charAt(i);
}
return result;
}
static int find_position_for(char character, int offset)
{
character-=((int)'a');
for(int i=offset; i<size; i++)
{
// System.out.println("checking "+String.valueOf((char)(character+((int)'a')))+" at "+i);
if(!map[character][i])
{
map[character][i]=true;
return i;
}
}
return -1;
}
static String move_right(String a, int from)
{
return a.substring(0, from)+" "+a.substring(from);
}
static boolean taken(int character, int position)
{ return map[character][position]; }
static void take(char character, int position)
{
//System.out.println("taking "+String.valueOf(character)+" at "+position+" (char_index-"+(character-((int)'a'))+")");
map[character-((int)'a')][position]=true;
}
static int max_size(Stack stack)
{
int max=0;
while(!stack.isEmpty())
{
String s = stack.pop();
if(s.length()>max) max=s.length();
}
return max;
}
}
解决方案
Finding any common supersequence is not a difficult task:
In your example possible solution would be something like:
public class SuperSequenceTest {
public static void main(String[] args) {
String A = "caagccacctacatca";
String B = "cgagccatccgtaaagttg";
String C = "agaacctgctaaatgctaga";
int iA = 0;
int iB = 0;
int iC = 0;
char[] a = A.toCharArray();
char[] b = B.toCharArray();
char[] c = C.toCharArray();
StringBuilder sb = new StringBuilder();
while (iA < a.length || iB < b.length || iC < c.length) {
if (iA < a.length && iB < b.length && iC < c.length && (a[iA] == b[iB]) && (a[iA] == c[iC])) {
sb.append(a[iA]);
iA++;
iB++;
iC++;
}
else if (iA < a.length && iB < b.length && a[iA] == b[iB]) {
sb.append(a[iA]);
iA++;
iB++;
}
else if (iA < a.length && iC < c.length && a[iA] == c[iC]) {
sb.append(a[iA]);
iA++;
iC++;
}
else if (iB < b.length && iC < c.length && b[iB] == c[iC]) {
sb.append(b[iB]);
iB++;
iC++;
} else {
if (iC < c.length) {
sb.append(c[iC]);
iC++;
}
else if (iB < b.length) {
sb.append(b[iB]);
iB++;
} else if (iA < a.length) {
sb.append(a[iA]);
iA++;
}
}
}
System.out.println("SUPERSEQUENCE " + sb.toString());
}
}
However the real problem to solve is to find the solution for the known problem of Shortest Common Supersequence http://en.wikipedia.org/wiki/Shortest_common_supersequence, which is not that easy.
There is a lot of researches which concern the topic.
See for instance:
http://www.csd.uwo.ca/~lila/pdfs/Towards%20a%20DNA%20solution%20to%20the%20Shortest%20Common%20Superstring%20Problem.pdf
http://www.ncbi.nlm.nih.gov/pubmed/14534185
上一篇:宝石迷阵板代迷阵