#!/usr/local2/bin/python2.3 import string, os, shutil from openeye.oechem import * ##### Looks for identical molecules - and removes them (time consuming, but safe -> Paranoia to loose sth molf = open('ORIGINAL_MOLS/a3_kegg_012506.ism', 'r') outf1 = open('ORIGINAL_MOLS/a4_kegg_012506.ism', 'w') outf2 = open('ORIGINAL_MOLS/a4_kegg_012506_doubles.ism', 'w') log = open('ORIGINAL_MOLS/a4_doubles.log', 'w') ism_mol_dir = {} metal_list = ['C00032', 'C00324', 'C00923', 'C00924', 'C00985', 'C00995', 'C00998', 'C01783', 'C01913', 'C02139', 'C02880', 'C03184', 'C03444', 'C03516', 'C04536', 'C06229', 'C06767', 'C07269', 'C07597', 'C07695', 'C07870', 'C08193', 'C11092', 'C11093', 'C11151', 'C11336', 'C11340', 'C11829', 'C11830', 'C11831', 'C11832', 'C12217', 'C12218', 'C12862', 'C13086', 'C13104', 'C13427', 'C14282', 'C14413', 'D00237', 'D00582', 'D00614', 'D00728', 'D00861', 'D00864', 'D00991', 'D00992', 'D01194', 'D01208', 'D01363', 'D01398', 'D01416', 'D01424', 'D01503', 'D01611', 'D01626', 'D01643', 'D01757', 'D01867', 'D01938', 'D02012', 'D02033'] metal_list_keep = ['C00032', 'C00923', 'C00924', 'C00995', 'C00998', 'C02139', 'C02880', 'C03184', 'C03516', 'C04536', 'C06767', 'C07597', 'C11829', 'C11830', 'C11831', 'C11832', 'D01611'] for line in molf.readlines(): if string.split(line)[1] not in metal_list_keep and string.split(line)[1] in metal_list: continue else: if ism_mol_dir.has_key(string.split(line)[0]): outf2.write(line) log.write(string.split(line)[1]+'\tidentical with '+ism_mol_dir[string.split(line)[0]]+'\n') else: ism_mol_dir[string.split(line)[0]] = string.split(line)[1] outf1.write(line) log.close() outf1.close() outf2.close() molf.close()