提问人:Hugo Oliveira 提问时间:11/10/2023 更新时间:11/10/2023 访问量:28
求解罗莎琳德问题“寻找共享主题”的 Julia 算法中的执行时间问题
Problem with execution time in Julia algorithm for solving Rosalind problem "Finding a Shared Motif"
问:
代码上下文:我正在尝试解决罗莎琳德问题“寻找共享主题”(https://rosalind.info/problems/lcsm/)。我使用的方法包括识别最短序列(长度 n)并存储每个可能的子字符串(最小值 2,最大值 n-1)。然后,我搜索每个序列中最长的公共子字符串。代码实际上返回了正确的答案,但它需要很长时间(在我的 PC 中需要 ~20 分钟)。如何优化它以花费更少的时间?
法典:
#Reading the document in fasta format
if !isfile("i.txt")
error("'i.txt' não é um arquivo")
end
input = open("i.txt", "r")
#Every sequence (seq) is stored in a vector of chars (seqs)
global seqs = Vector{Vector{Char}}()
global seq = Vector{Char}()
global seq_descr = Vector{String}()
for line in eachline(input)
if startswith(line, '>')
if !isempty(seq)
push!(seqs, seq)
end
push!(seq_descr, line[2:end])
global seq = Vector{Char}()
else
seq = vcat(seq, collect(line))
end
end
if !isempty(seq)
push!(seqs, seq)
end
#Finding the shortest sequence in seqs
function shortest_sequence(seqs)
shortest_seq = seqs[1]
for seq in seqs
if length(seq) < length(shortest_seq)
shortest_seq = seq
end
end
return shortest_seq
end
minor = shortest_sequence(seqs)
#Finding all possible substrings of a sequence by sliding a window of size n
function all_possible_substrings(x)
results = String[]
for n in 2:length(x)-1 #The shortest substring allowed is 2, and the longest is length(x)-1
for i in 1:n-1 #Limit the first index to n-1 to avoid out of bounds
sub = x[i:n] #Saving the substring in a variable
push!(results, join(sub))
end
end
return results #Return a vector of all possible substrings
end
motifs = all_possible_substrings(minor)
sorted_substrings = sort(motifs, by=x->length(x), rev=true) #Sorting the substrings by decreasing length
#println(sorted_substrings)
function find_common_motif(seqs)
for motif in sorted_substrings
temp = []
for seq in seqs #Check if the motif occurs in every sequence, if so, push 1 to temp
if occursin(motif, join(seq))
push!(temp, 1)
end
end
if sum(temp) == length(seqs) #If the motif occurs in every sequence (sum(temp) == length(seqs)), return the motif
return motif
else
continue
end
end
end
common_motif = find_common_motif(seqs)
println(common_motif)
close(input)
答: 暂无答案
评论
push!
不断分配新的内存。如果预先分配内存,它已经产生了很大的不同。find_common_motif
temp = []
push!