/ 猿问

# 查找字符串中最长的重复序列

2019-12-26 14:19:14

## 3 回答

from collections import defaultdict

def getsubs(loc, s):

substr = s[loc:]

i = -1

while(substr):

yield substr

substr = s[loc:i]

i -= 1

def longestRepetitiveSubstring(r, minocc=3):

occ = defaultdict(int)

# tally all occurrences of all substrings

for i in range(len(r)):

for sub in getsubs(i,r):

occ[sub] += 1

# filter out all substrings with fewer than minocc occurrences

occ_minocc = [k for k,v in occ.items() if v >= minocc]

if occ_minocc:

maxkey =  max(occ_minocc, key=len)

return maxkey, occ[maxkey]

else:

raise ValueError("no repetitions of any substring of '%s' with %d or more occurrences" % (r,minocc))

('helloworld', 3)

ibeautiful

from collections import Counter

times=3

for n in range(1,len(a)/times+1)[::-1]:

substrings=[a[i:i+n] for i in range(len(a)-n+1)]

freqs=Counter(substrings)

if freqs.most_common(1)[0][1]>=3:

seq=freqs.most_common(1)[0][0]

break

print "sequence '%s' of length %s occurs %s or more times"%(seq,n,times)

>>> sequence 'helloworld' of length 10 occurs 3 or more times

from collections import Counter

times=3

for n in range(1,len(a)/times+1):

substrings=[a[i:i+n] for i in range(len(a)-n+1)]

freqs=Counter(substrings)

if freqs.most_common(1)[0][1]<3:

n-=1

break

else:

seq=freqs.most_common(1)[0][0]

print "sequence '%s' of length %s occurs %s or more times"%(seq,n,times)

• 3 回答
• 0 关注
• 372 浏览

0/150