import csv
from sys import argv
r = csv.reader(open(argv[1]))
names = list(r) #convert csv to list
countermax = 1 #set counter
countersmax = 1
#names[0] is a header and [1:] is the name of the str's.
#it starts from 1 because names[0][0] is the names.
sequencelist = names[0][1:]
values = []
namelist = []
strvalue = []
ret = False
txtf = open(argv[2], "r")
for lines in txtf:
dna = lines #convert txt to string
for n in range(len(sequencelist)):
for x in range(len(dna)):
counter = 1
l = len(sequencelist[n]) #length of the sequence for iteration
#conditionals for control the recursion, if dna[x:x+l] (l is the length of str) equals str, we should control "is next one str" therefore we should add dna[x:x+l] == dna[x+l:x+2*l] and we set counter.
if dna[x:x+l] == sequencelist[n]:
while dna[x:x+l] == dna[x+l:x+2*l]:
counter += 1
x = x+l
#there are different recursions therefore we should take biggest one, and when we find bigger we should set countermax as a bigger one. and we have values list and this means biggest STR values.
if counter > countermax:
countermax = counter
values.append(countermax)
countermax = 1 #when we done we should set countermax again for next values.
for numbers in range(len(names)-1):
#this is for "name" database. now we have values and we should compare with database.
m = names[numbers+1][1:] #names[numbers][0] is a "names" part. for example values are like this: Albus 3 5 7 9 11 as you see names[1][0] is Albus but we need 3,5,7,9,11 part. Therefore we should start from one and this means: names[numbers+1][1:]
namelist.append(m) #and we have a new list a.k.a "namelist" for this values.
for x in range(len(values)):
new = str(values[x]) #we took values from dna sequences but they are in integer but namelist values are strings for comparison we should convert them to strings.
strvalue.append(new)
if argv[1] == "databases/large.csv":
#problem starts here, we have a missing values. for example Albus values ['15', '49', '38', '5', '14', '44', '14', '12'] but our values ['15', '38', '5', '14', '44', '14', '12'] as you see 49 is missing. because of this condition, I skipped the namelist[x][1]. namelist[x][1] is 49 and my values don't include this.
for x in range(len(namelist)):
if namelist[x][0] == strvalue[0] and namelist[x][2] == strvalue[1] and namelist[x][3] == strvalue[2] and namelist[x][4] == strvalue[3] and namelist[x][5] == strvalue[4] and namelist[x][6] == strvalue[5] and namelist[x][7] == strvalue[6]:
print(names[x+1][0]) #if this condition is correct we should take names[numbers][0] for print the names.
ret = True
if argv[1] == "databases/small.csv":
for x in range(len(namelist)):
if namelist[x][0] == strvalue[0] and namelist[x][2] == strvalue[1]:
print(names[x][0])
ret = True
if ret == False:
print("No match")