#!/usr/local2/bin/python2.3 import string, sys from openeye.oechem import * ##### Looks for identical molecules - and removes them (time consuming, but safe -> Paranoia to loose sth try: molf = open(sys.argv[1], 'r') outf1 = open('a4'+sys.argv[1][string.find(sys.argv[1], '_'):string.find(sys.argv[1], '_out')]+'_out.ism', 'w') outf2 = open('a4'+sys.argv[1][string.find(sys.argv[1], '_'):string.find(sys.argv[1], '_out')]+'_doubles.ism', 'w') log = open('a4'+sys.argv[1][string.find(sys.argv[1], '_'):string.find(sys.argv[1], '_out')]+'.log', 'w') m1 = open(sys.argv[1][:string.find(sys.argv[1], '_out')]+'_metals_keep.txt', 'r') m2 = open(sys.argv[1][:string.find(sys.argv[1], '_out')]+'_metals.txt', 'r') except IndexError: print '\nUsage: a4_rm_doubles.py molfile\n' raise SystemExit() ism_mol_dir = {} # Metal lists generated by a3.2_size_cutoff_filter.py metal_list = [] for line in m2.readlines(): metal_list.append(line[:-1]) metal_list_keep = [] for line in m1.readlines(): metal_list_keep.append(line[:-1]) for line in molf.readlines(): if string.split(line)[1] not in metal_list_keep and string.split(line)[1] in metal_list: continue else: if ism_mol_dir.has_key(string.split(line)[0]): outf2.write(line) log.write(string.split(line)[1]+'\tidentical with '+ism_mol_dir[string.split(line)[0]]+'\n') else: ism_mol_dir[string.split(line)[0]] = string.split(line)[1] outf1.write(line) log.close() outf1.close() outf2.close() molf.close()