Longest common subsequence
pystrgrp: https://drive.google.com/open?id=1Ig_ATnmLUJIuHbFPRGlvZdM3Xd5Yp32U
Example
from pystrgrp import Strgrp
def pystrgrp(strings):
clusters = Strgrp(0.7)
for string in (x.strip() for x in strings):
seq, id = string.split(',')
clusters.add(seq, id)
return clusters
data = sorted(['12345,1','1234567,2','1234568,3','2345678,4',
'2345679,5','345678,6','1234578,7','3456789,8','abcdefg,9','bcdefg,10'], reverse=0)
grps = pystrgrp(data)
grps
grps_list = [g for g in grps]
grps_list
import pandas as pd
df = pd.DataFrame()
for i in range(len(grps_list)):
grp = [g for g in grps_list[i]]
for j in range(len(grp)):
print(i, grp[j].key(), grp[j].value())
df = pd.concat([df, pd.DataFrame([tuple([i, grp[j].key(), grp[j].value()])],
columns=['cluster','seq','id'])], ignore_index=True)
df
No comments:
Post a Comment
Note: Only a member of this blog may post a comment.