import csv from sys import argv r = csv.reader(open(argv[1])) names = list(r) #convert csv to list countermax = 1 #set counter countersmax = 1 #names[0] is a header and [1:] is the name of the str's. #it starts from 1 because names[0][0] is the names. sequencelist = names[0][1:] values = [] namelist = [] strvalue = [] ret = False txtf = open(argv[2], "r") for lines in txtf: dna = lines #convert txt to string for n in range(len(sequencelist)): for x in range(len(dna)): counter = 1 l = len(sequencelist[n]) #length of the sequence for iteration #conditionals for control the recursion, if dna[x:x+l] (l is the length of str) equals str, we should control "is next one str" therefore we should add dna[x:x+l] == dna[x+l:x+2*l] and we set counter. if dna[x:x+l] == sequencelist[n]: while dna[x:x+l] == dna[x+l:x+2*l]: counter += 1 x = x+l #there are different recursions therefore we should take biggest one, and when we find bigger we should set countermax as a bigger one. and we have values list and this means biggest STR values. if counter > countermax: countermax = counter values.append(countermax) countermax = 1 #when we done we should set countermax again for next values. for numbers in range(len(names)-1): #this is for "name" database. now we have values and we should compare with database. m = names[numbers+1][1:] #names[numbers][0] is a "names" part. for example values are like this: Albus 3 5 7 9 11 as you see names[1][0] is Albus but we need 3,5,7,9,11 part. Therefore we should start from one and this means: names[numbers+1][1:] namelist.append(m) #and we have a new list a.k.a "namelist" for this values. for x in range(len(values)): new = str(values[x]) #we took values from dna sequences but they are in integer but namelist values are strings for comparison we should convert them to strings. strvalue.append(new) if argv[1] == "databases/large.csv": #problem starts here, we have a missing values. for example Albus values ['15', '49', '38', '5', '14', '44', '14', '12'] but our values ['15', '38', '5', '14', '44', '14', '12'] as you see 49 is missing. because of this condition, I skipped the namelist[x][1]. namelist[x][1] is 49 and my values don't include this. for x in range(len(namelist)): if namelist[x][0] == strvalue[0] and namelist[x][2] == strvalue[1] and namelist[x][3] == strvalue[2] and namelist[x][4] == strvalue[3] and namelist[x][5] == strvalue[4] and namelist[x][6] == strvalue[5] and namelist[x][7] == strvalue[6]: print(names[x+1][0]) #if this condition is correct we should take names[numbers][0] for print the names. ret = True if argv[1] == "databases/small.csv": for x in range(len(namelist)): if namelist[x][0] == strvalue[0] and namelist[x][2] == strvalue[1]: print(names[x][0]) ret = True if ret == False: print("No match")