#!/usr/local2/bin/python2.3 import string, os from openeye.oechem import * ifs = oemolistream() ifs.open('mol_062905.sdf') ifs.SetFormat(OEFormat_SDF) ofs_r1 = oemolostream() ofs_r1.open('a_kegg_filter_1_r.sdf') ofs_r2 = oemolostream() ofs_r2.open('a_kegg_filter_1_r.ism') ofs_p1 = oemolostream() ofs_p1.open('a_kegg_filter_1_p.sdf') ofs_p2 = oemolostream() ofs_p2.open('a_kegg_filter_1_p.ism') ofs_c1 = oemolostream() ofs_c1.open('a_kegg_filter_1_c.sdf') ofs_c2 = oemolostream() ofs_c2.open('a_kegg_filter_1_c.ism') ofs_l1 = oemolostream() ofs_l1.open('a_kegg_filter_l.sdf') ofs_l2 = oemolostream() ofs_l2.open('a_kegg_filter_l.ism') ofs_1 = oemolostream() ofs_2 = oemolostream() ofs_1.open('a_kegg_filter_1_prior.sdf') log = open('a_kegg_filter_1.log','w') #c = 1 count, count_r, count_p, count_f, count_l, count_k, count_c = 0,0,0,0,0,0,0 for mol in ifs.GetOEMols(): # print mol.GetTitle() count+=1 smi = OECreateAbsSmiString(mol) if mol.NumAtoms() >= 4: carbon = 0 polymer = 0 rest = 0 fragments = 1 # Check for the existence of carbons, at least 4 atoms in total and undefined rests and polymers in original Kegg-entry for atom in mol.GetAtoms(): if string.find(atom.GetName(), 'R') !=-1: rest = 1 elif string.find(atom.GetName(), '#') !=-1: rest = 1 elif string.find(atom.GetName(), '*') !=-1: polymer = 1 elif atom.IsCarbon()==1: carbon = 1 if string.find(smi, '.') !=-1: fragments = 2 if rest == 1: count_r+=1 OEWriteMolecule(ofs_r1, mol) OEWriteMolecule(ofs_r2, mol) log.write(mol.GetTitle()+' has an undefined alkyl-rest -> a_kegg_filter_1_r.*\n') elif polymer == 1: count_p+=1 OEWriteMolecule(ofs_p1, mol) OEWriteMolecule(ofs_p2, mol) log.write(mol.GetTitle()+' is a polymer -> a_kegg_filter_1_p.*\n') elif carbon == 0: count_c+=1 OEWriteMolecule(ofs_c1, mol) OEWriteMolecule(ofs_c2, mol) log.write(mol.GetTitle()+' has no carbon -> a_kegg_filter_1_c.*\n') elif fragments == 2: count_f+=1 OEWriteMolecule(ofs_1, mol) log.write(mol.GetTitle()+' has more than 1 fragment\n') else: OEWriteMolecule(ofs_1, mol) count_k+=1 else: OEWriteMolecule(ofs_l1, mol) OEWriteMolecule(ofs_l2, mol) log.write(mol.GetTitle()+' has less than 4 atoms -> a_kegg_filter_l.*\n') count_l+=1 print str(count)+' in total, '+str(count_k)+' kept, '+str(count_l)+' <4 atoms, '+str(count_p)+' polymer, '+str(count_r)+' undef.alkylrest, '+str(count_c)+' !carbon, '+str(count_f)+' >1 molecules' log.write(str(count)+' in total, '+str(count_k)+' kept, '+str(count_l)+' <4 atoms, '+str(count_p)+' polymer, '+str(count_r)+' undef.alkylrest, '+str(count_c)+' !carbon, '+str(count_f)+' >1 molecules') ifs.close() ofs_p1.close() ofs_p2.close() ofs_c1.close() ofs_c2.close() ofs_l1.close() ofs_l2.close() ofs_1.close() os.system('corina.nirvana -d errorfile=a_kegg_filter_1_corina_error.sdf,no3d,wb,rs,neu -t tracefile=a_kegg_filter_1_f_corina.trc -i t=sdf -o t=sdf a_kegg_filter_1_prior.sdf a_kegg_filter_1_prior_corina.sdf') # Doesn't work because sdconver removes all stereoinfo -> would lead to an explosion of numbers in the database #os.system('sdconvert -st -isd a_kegg_filter_1_prior.sdf -omae a_kegg_filter_1_prior.mae') #os.system('neutralizer a_kegg_filter_1_prior.mae a_kegg_filter_1_prior_neutralized.mae') #os.system('sdconvert -imae a_kegg_filter_1_prior_neutralized.mae -osd a_kegg_filter_1_prior_neutralized.sdf') #os.system('rm -rf a_kegg_filter_1_prior.mae a_kegg_filter_1_prior_neutralized.mae a_kegg_filter_1_prior.mae a_kegg_filter_1_prior_neutralized.mae') # Deprotonate every acid, filter for size and metal content. Put everything in just one sdf-file ifs_1 = oemolistream() ifs_1.open('a_kegg_filter_1_prior_corina.sdf') ofs_1.open('a_kegg_filter_1.sdf') ofs_2.open('a_kegg_filter_1.ism') rxn1 = OEUniMolecularRxn('[#8:97]=[#6:96]([#8h1:99])[A,a:101]>>[*:97]=[*:96]([#8-h0:99])[*:101]') rxn2 = OEUniMolecularRxn('[#8:98][#15:99][#8h1:100]>>[*:98][*:99][#8-h0:100]') rxn3 = OEUniMolecularRxn('[#8:98][#16:99][#8h1:100]>>[*:98][*:99][#8-h0:100]') csp2 = OESubSearch() phosphorus = OESubSearch() csp2.Init('[#6X3][#7,#8,#16]') phosphorus.Init('[#15X4][#7,#8,#16]') for mol in ifs_1.GetOEMols(): if mol.NumAtoms() < 76: if csp2.SingleMatch(mol) == 1 or phosphorus.SingleMatch(mol) == 1: metal = 0 for atom in mol.GetAtoms(): if atom.IsMetal(): metal = 1 break if metal == 0: rxn1(mol) rxn2(mol) rxn3(mol) OEWriteMolecule(ofs_1, mol) OEWriteMolecule(ofs_2, mol) else: log.write('No Pattern:'+mol.GetTitle()+' '+OECreateIsoSmiString(mol)+'\n') # print OECreateIsoSmiString(mol) else: log.write('To Big:'+mol.GetTitle()+' '+OECreateIsoSmiString(mol)+'\n') ofs_1.close() ofs_2.close() ifs_1.close() log.close()