Facebook
From Cobalt Mockingjay, 5 Years ago, written in C.
Embed
Download Paste or View Raw
Hits: 140
  1. import csv
  2. from sys import argv
  3.  
  4. r = csv.reader(open(argv[1]))
  5. names = list(r) #convert csv to list
  6. countermax = 1 #set counter
  7. countersmax = 1
  8. #names[0] is a header and [1:] is the name of the str's.
  9. #it starts from 1 because names[0][0] is the names.
  10. sequencelist = names[0][1:]
  11. values = []
  12. namelist = []
  13. strvalue = []
  14. ret = False
  15.    
  16. txtf = open(argv[2], "r")
  17. for lines in txtf:
  18.     dna = lines #convert txt to string
  19.    
  20. for n in range(len(sequencelist)):
  21.     for x in range(len(dna)):
  22.         counter = 1  
  23.         l = len(sequencelist[n]) #length of the sequence for iteration
  24.         #conditionals for control the recursion, if dna[x:x+l] (l is the length of str) equals str, we should control "is next one str" therefore we should add dna[x:x+l] == dna[x+l:x+2*l] and we set counter.
  25.         if dna[x:x+l] == sequencelist[n]:
  26.             while dna[x:x+l] == dna[x+l:x+2*l]:
  27.                 counter += 1
  28.                 x = x+l
  29.         #there are different recursions therefore we should take biggest one, and when we find bigger we should set countermax as a bigger one. and we have values list and this means biggest STR values.      
  30.         if counter > countermax:
  31.             countermax = counter
  32.             values.append(countermax)
  33.     countermax = 1 #when we done we should set countermax again for next values.
  34.  
  35. for numbers in range(len(names)-1):
  36.   #this is for "name" database. now we have values and we should compare with database.
  37.     m = names[numbers+1][1:] #names[numbers][0] is a "names" part. for example values are like this: Albus 3 5 7 9 11 as you see names[1][0] is Albus but we need 3,5,7,9,11 part. Therefore we should start from one and this means: names[numbers+1][1:]
  38.    
  39.     namelist.append(m) #and we have a new list a.k.a "namelist" for this values.
  40.    
  41. for x in range(len(values)):
  42.     new = str(values[x]) #we took values from dna sequences but they are in integer but namelist values are strings for comparison we should convert them to strings.
  43.     strvalue.append(new)
  44.  
  45.  
  46.  
  47. if argv[1] == "databases/large.csv":
  48. #problem starts here, we have a missing values. for example Albus values ['15', '49', '38', '5', '14', '44', '14', '12'] but our values ['15', '38', '5', '14', '44', '14', '12'] as you see 49 is missing. because of this condition, I skipped the namelist[x][1]. namelist[x][1] is 49 and my values don't include this.
  49.     for x in range(len(namelist)):
  50.         if namelist[x][0] == strvalue[0] and namelist[x][2] == strvalue[1] and namelist[x][3] == strvalue[2] and namelist[x][4] == strvalue[3] and namelist[x][5] == strvalue[4] and namelist[x][6] == strvalue[5] and namelist[x][7] == strvalue[6]:
  51.             print(names[x+1][0]) #if this condition is correct we should take names[numbers][0] for print the names.
  52.             ret = True
  53.        
  54. if argv[1] == "databases/small.csv":
  55.     for x in range(len(namelist)):
  56.         if namelist[x][0] == strvalue[0] and namelist[x][2] == strvalue[1]:
  57.             print(names[x][0])
  58.             ret = True
  59.            
  60. if ret == False:
  61.     print("No match")