#!/usr/bin/python import string, os, sys from openeye.oechem import * from var_functions import start_and_end_with ##### Looks for identical molecules - and removes them (time consuming, but safe -> Paranoia to loose sth #************************************************************************** def check_4_doubles(smi_dir, infile, log, ofs): ifs.open(infile) for mol in ifs.GetOEMols(): ism = OECreateIsoSmiString(mol) if smi_dir.has_key(ism): log.write(mol.GetTitle()+' is equal to '+ism_mol_dir[ism]+'\n') else: ism_mol_dir[ism] = mol.GetTitle() OEWriteMolecule(ofs_1, mol) ifs.close() return ism_mol_dir #************************************************************************** try: start = sys.argv[1] end = sys.argv[2] except IndexError: print '\nUsage: b9_remove_doubles.py start-file-pattern end-file-pattern\n' raise SystemExit() ifs = oemolistream() log = open('b9_doubles.log', 'a') ofs_1 = oemolostream() ofs_1.open('b9_'+end[:-4]+'.ism') ism_mol_dir = {} for file in os.listdir('.'): if start_and_end_with(file, start, end): ism_mol_dir = check_4_doubles(ism_mol_dir, file, log, ofs_1) else: continue log.close() ofs_1.close()