求解罗莎琳德问题“寻找共享主题”的 Julia 算法中的执行时间问题-解网

问：

代码上下文：我正在尝试解决罗莎琳德问题“寻找共享主题”（https://rosalind.info/problems/lcsm/）。我使用的方法包括识别最短序列（长度 n）并存储每个可能的子字符串（最小值 2，最大值 n-1）。然后，我搜索每个序列中最长的公共子字符串。代码实际上返回了正确的答案，但它需要很长时间（在我的 PC 中需要 ~20 分钟）。如何优化它以花费更少的时间？

法典：

#Reading the document in fasta format
if !isfile("i.txt")
    error("'i.txt' não é um arquivo")
end

input = open("i.txt", "r")

#Every sequence (seq) is stored in a vector of chars (seqs)
global seqs = Vector{Vector{Char}}()
global seq = Vector{Char}()
global seq_descr = Vector{String}()

for line in eachline(input)
    if startswith(line, '>')
        if !isempty(seq)
            push!(seqs, seq)    
        end
        push!(seq_descr, line[2:end])
        global seq = Vector{Char}()
    else 
        seq = vcat(seq, collect(line))
    end
end

if !isempty(seq)
    push!(seqs, seq)
end

#Finding the shortest sequence in seqs
function shortest_sequence(seqs)
    shortest_seq = seqs[1]
    for seq in seqs
        if length(seq) < length(shortest_seq)
            shortest_seq = seq
        end
    end
    return shortest_seq
end

minor = shortest_sequence(seqs)

#Finding all possible substrings of a sequence by sliding a window of size n
function all_possible_substrings(x)
    results = String[]
    for n in 2:length(x)-1      #The shortest substring allowed is 2, and the longest is length(x)-1 
        for i in 1:n-1          #Limit the first index to n-1 to avoid out of bounds
            sub = x[i:n]        #Saving the substring in a variable
            push!(results, join(sub)) 
        end
    end
    return results            #Return a vector of all possible substrings
end

motifs = all_possible_substrings(minor)

sorted_substrings = sort(motifs, by=x->length(x), rev=true) #Sorting the substrings by decreasing length
#println(sorted_substrings)

function find_common_motif(seqs)
    for motif in sorted_substrings
        temp = []
        for seq in seqs                     #Check if the motif occurs in every sequence, if so, push 1 to temp
            if occursin(motif, join(seq))   
                push!(temp, 1)
            end
        end
    if sum(temp) == length(seqs)            #If the motif occurs in every sequence (sum(temp) == length(seqs)), return the motif
            return motif
        else
            continue
        end
    end
end

common_motif = find_common_motif(seqs)
println(common_motif)

close(input)

茱莉亚生物信息学罗莎琳德

代码中有几件事使其效率低下。您应该从阅读 docs.julialang.org/en/v1/manual/performance-tips 开始，特别是，您应该将所有代码放在函数中，不惜一切代价避免全局变量（例如访问全局变量），并避免使用非类型化容器，例如 .在可能的情况下，预先分配代替 ing。find_common_motiftemp = []push!

答： 暂无答案

上一个：如何控制智能体绘制顺序以将某些智能体放置在彼此之上？

下一个：傅里叶空间中三维网格的数值积分

求解罗莎琳德问题“寻找共享主题”的 Julia 算法中的执行时间问题

Problem with execution time in Julia algorithm for solving Rosalind problem "Finding a Shared Motif"

评论