Friday, December 28, 2018

LCS

Longest common subsequence

pystrgrp: https://drive.google.com/open?id=1Ig_ATnmLUJIuHbFPRGlvZdM3Xd5Yp32U

Example

from pystrgrp import Strgrp

def pystrgrp(strings):
    clusters = Strgrp(0.7)
    for string in (x.strip() for x in strings):
        seq, id = string.split(',')
        clusters.add(seq, id)
    return clusters

data = sorted(['12345,1','1234567,2','1234568,3','2345678,4',
               '2345679,5','345678,6','1234578,7','3456789,8','abcdefg,9','bcdefg,10'], reverse=0)

grps = pystrgrp(data)
grps

grps_list = [g for g in grps]
grps_list

import pandas as pd

df = pd.DataFrame()

for i in range(len(grps_list)):
    grp = [g for g in grps_list[i]]
 
    for j in range(len(grp)):
        print(i, grp[j].key(), grp[j].value())
        df = pd.concat([df, pd.DataFrame([tuple([i, grp[j].key(), grp[j].value()])],
                                         columns=['cluster','seq','id'])], ignore_index=True)

df

No comments:

Post a Comment

Note: Only a member of this blog may post a comment.