-
Notifications
You must be signed in to change notification settings - Fork 7
/
my_io_pdb.svl
4748 lines (3809 loc) · 145 KB
/
my_io_pdb.svl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#svl
// io_pdb.svl Protein Data Bank format
//
// 07-jul-2011 (ps) Corrected inconsistencies between leap and Amber export
// 06-nov-2011 (kk) Use amber10.mdb to standardize hyb/ion/hintlp
// 14-oct-2011 (ms) replaced ChainLetter with cLetter
// 07-oct-2011 (kk) default translate_prime action * => '
// 18-sep-2011 (kk) Use std attr if proper subset of standard atoms
// 26-jul-2011 (kk) Protect against zero matrix in MTRIX records
// 07-jul-2011 (kk) parse_specification_list requires MOL_ID first
// 05-jul-2011 (kk) Adjust formal charges on metal-organic LINKs
// 05-jul-2011 (kk) Reverted to SystemPush/Pop
// 27-jun-2011 (kk) Append atom collections (ie ALTLOC) on read.
// 02-jun-2011 (kk) Allow variant text in REMARK 350 "CHAINS:" line
// 18-apr-2011 (kk) Full path for gzip; fix use_charges on read
// 13-apr-2011 (kk) Fix read line in fread_PDB_model
// 09-mar-2011 (kk) Force use_element for all-left-justified aname files
// 09-mar-2011 (kk) Support multi_entry option (catenated pdb files)
// 09-mar-2011 (kk) Force consecutive_atoms if there are duplicate atom ids
// 24-feb-2011 (kk) Crystal contact shells
// 08-feb-2011 (kk) Protect from false identity matrix in MTRIX records
// 25-jan-2011 (kk) Protect against false nres in SEQRES
// 19-jan-2011 (ah) write MOE_VERSION as XXXX.XX (2 places after .)
// 13-jan-2011 (kk) Amberize NME, ACE
// 07-jan-2011 (kk) Option to preserve order of atoms in file
// 22-dec-2010 (kk) Crystal Contact sets
// 08-dec-2010 (kk) Amberize monoatomic ions (ions94.lib)
// 02-dec-2010 (kk) Write bOrders as repeated CONECTS
// 29-nov-2010 (kk) fwrite_PDB writes some cards in opt
// 26-nov-2010 (kk) Corrected Inert/active logic for variants
// 15-nov-2010 (kk) SPRSDE/OBSLTE accomodate codes > 4 chars
// 12-nov-2010 (kk) Relax BIOMOL card recognition
// 04-nov-2010 (kk) Protect ApplyCTAB from hydrogen-suppressed dictionaries
// 23-sep-2010 (kk) Protect GetLine from non-ASCII chars
// 10-sep-2010 (kk) Turn contacts off during mol_Create etc
// 25-aug-2010 (kg) Fixed GLH (NE2->OE2)
// 16-aug-2010 (kk) Fixed assignment of types to empty residues
// 06-aug-2010 (kk) Add Amber variant residues to standard table
// 01-jul-2010 (kk) WriteStruct fixed
// 28-apr-2010 (kk) Set Sequence Variants to Inert
// 26-apr-2010 (kk) Fixed HXT vs Hprime in standardize atom names
// 08-apr-2010 (kk) Strip null revdat records
// 16-mar-2010 (kk) Stricter test for valid date in header
// 10-mar-2010 (kk) Use first char after dot for chain letter on write
// 10-mar-2010 (kk) Write Occupancy fixed; write symmetric connect
// 24-feb-2010 (kk) Set BIOMT operation chain names
// 10-feb-2010 (kk) Removed SIGUIJ etc from extension in SplitHets
// 26-jan-2010 (kk) force_TER option
// 26-jan-2010 (kk) Move pdb_SplitHets to fread_PDB from ReadPDB
// 18-jan-2010 (kk) SSBOND format correction (col. 60+)
// 18-jan-2010 (kk) Corrected GLH/ASH AMBER naming logic
// 07-jan-2010 (kk) Give mixed case element names to AutoTypeFromPosition
// 27-oct-2009 (kk) Check 600 Ctab for long bonds
// 20-oct-2009 (kk) Restore RCSB autoconect default
// 05-oct-2009 (kk) cSetName on SplitHets
// 24-aug-2009 (kk) Only clear UID/INS on write if > 1 residue
// 27-jul-2009 (kk) Hydrogen order; NH2+; Occ/Temp precision control
// 11-jul-2009 (kk) Multiple/selected biomolecule support
// 08-jul-2009 (kk) Write all conects for MOE
// 02-jul-2009 (kk) Correct ring hydrogens PDB names; AMBER output
// 30-jun-2009 (kk) SRC_MOE inconsistent between write/read
// 02-jun-2009 (kk) link support in pdb_ConnectAndType
// 02-jun-2009 (kk) trust element columns in V.3.15+
// 07-may-2009 (kk) BIOMT bug fix
// 16-mar-2009 (kk) Activate split hets logic in ReadPDB
// 06-mar-2009 (kk) Improved empty res alignment & numbering logic
// 24-feb-2009 (kk) Allow for extra H's after ctab application
// 18-feb-2009 (kk) apply link; require equal uids+ins for micro het logic
// 16-feb-2009 (kk) seqres collation errors fixed
// 13-feb-2009 (kk) Split hets from protein chains
// 21-jan-2009 (kk) Write CRYST1, HELIX & SHEET records
// 20-jan-2009 (kk) Use TER to separate residues with identical rFullName
// 15-jan-2009 (kk) Complete check for pH
// 15-jan-2009 (kk) Improved slist processing
// 15-jan-2009 (kk) v3.20 adjustments (SPLIT, DBREF1,2)
// 15-jan-2009 (kk) Improved alt_loc processing
// 10-oct-2008 (kk) Protect from empty file
// 09-oct-2008 (kk) Force use_elements on MOE-written PDB files
// 30-sep-2008 (kk) PDB v2.0, PDB v3.0 & IUPAC hydrogen naming
// 22-sep-2008 (kk) BIOMT parsing error fixed
// 03-sep-2008 (kk) MatchCTAB isH error fixed
// 28-aug-2008 (kk) ApplyCTAB in pdb_ConnectAndType for cif reader
// 07-aug-2008 (kk) HN convention removed for amide hydrogens
// 17-jul-2008 (kk) Incorporated ah's code for applying BIOMT records
// 23-jun-2008 (kk) UID INS write policy changed
// 09-apr-2008 (kk) FREE R parse corrected; R VALUE added
// 12-mar-2008 (kk) CAVEAT char num corrected
// 26-nov-2007 (kk) ILE CD from CHARMM
// 20-nov-2007 (kk) use aOccupancy & aSetOccupancy
// 08-nov-2007 (kk) 600 atom names packed
// 06-nov-2007 (kk) allowance for non-std revdat field
// 06-nov-2007 (kk) HET fields corrected
// 04-oct-2007 (kk) remark 2 allowances to pick up resolution
// 25-sep-2007 (kk) std amino glu & asp fix-ups
// 19-sep-2007 (kk) Write Version in remark 99; read remark "0"
// 13-sep-2007 (kk) protect ExtractCTAB from "non-elements"
// 24-aug-2007 (kk) N & UNK in AMINO/NUCLEIC tables
// 23-aug-2007 (kk) TidySeqNum; translate saccharide atom names to * form
// 14-aug-2007 (kk) simple wash in read
// 16-jul-2007 (kk) explicit HIS typing; create non-match SEQRES chains
// 12-jul-2007 (kk) get & apply ctab from REM 600
// 19-jun-2007 (kk) Proper AutoConnect for overlapping residues
// 24-may-2007 (kk) allow 76 char records; check EC vals
// 18-may-2007 (kk) Strip moldata in mol_Finalize; BOND_TOL decrease
// 24-apr-2007 (kk) guarantee undirected bondlist; update READ_DEFAULTS
// 18-apr-2007 (kk) app uniq bondlist when finished read model
// 17-apr-2007 (kk) pdb_GenerateImages argument bugfix
// 12-apr-2007 (kk) val-attr pairs bug (terminating ";")
// 29-mar-2007 (kk) dna-rna bonds are standard
// 26-mar-2007 (kk) proper charges for nucleic acid PO4
// 19-mar-2007 (kk) parse revdat; HEADER code field expanded to 8 chars
// 16-mar-2007 (kk) parse hetsyn & dbref records; add ftnote
// 15-mar-2007 (lc) add parser for the FORMUL entry line
// 08-mar-2007 (kk) keep unit_cell_Z from CRYST1; fix res from REMARK logic
// 27-feb-2007 (kk) only perturb res order to bring atoms together (1mts)
// 26-feb-2007 (kk) keep all revdat
// 08-jan-2007 (kk) collate_res on all models (eg 1f8h)
// 19-dec-2006 (kk) no autoconnect => no autotype on std res
// 19-dec-2006 (kk) rcsb atom names
// 29-nov-2006 (kk) withhold waters from AutoConect
// 24-nov-2006 (kk) atom_serial; model_num bug fixes
// 10-nov-2006 (kk) make multi-model tags
// 01-nov-2006 (kk) options to return raw atom data, use element col
// 01-nov-2006 (kk) Autoconnect LP's with H's
// 30-oct-2006 (kk) allow duplicate terminal O
// 27-oct-2006 (kk) AutoConnect... adds to, not replaces input bonds
// 26-oct-2006 (kk) guess a chain char to write
// 23-oct-2006 (kk) 'LP ' not read as Phosphorus
// 23-oct-2006 (kk) selected atoms on write bugfix
// 23-oct-2006 (kk) parse ANISOU etc
// 10-oct-2006 (kk) Verbatim modres & seqadv (leave dups & non-informative)
// 03-oct-2006 (kk) model_num vector from fread_PDB
// 28-sep-2006 (kk) Include all PDB Record Types in read; adjusted seqadv
// 26-sep-2006 (kk) fixed symmetry option handling
// 22-sep-2006 (kk) fixed out-of-order problem (BX3 in 1mts.ent)
// 08-sep-2006 (jd) Fixed ignore_hetero mask (missing not)
// 07-sep-2006 (kk) Allow non-uniq anames if no alt loc chars
// 31-aug-2006 (kk) Allow file to be fnum in fread_PDB
// 31-aug-2006 (kk) Restrict H-renaming; default occ to 1.0;
// 31-aug-2006 (kk) Restore ignore_conect etc
// 10-may-2006 (kk) Export pdb_GenerateImages, pdb_ConnectAndType
// 10-may-2006 (kk) Re-write to mol vectors complete
// 07-apr-2006 (kk) Autoimport ReadPDB, WritePDB
// 02-mar-2006 (kk) Default Histidine to HID
// 30-jan-2006 (kk) Bond & Type mol vector
// 17-jan-2006 (kk) Split reading header & models
// 13-dec-2005 (kk) Bugfix: iupac->rcsb hydrogen names
// 13-dec-2005 (kk) IUPAC option for hydrogen names
// 30-nov-2005 (kk) Removed db_ImportPDB (dbimport.svl)
// 28-nov-2005 (kk) all hydrogens bonded...
// 01-nov-2005 (kk) water split bug
// 31-oct-2005 (kk) fread_PDB gunzips *.gz,*.Z
// 28-oct-2005 (kk) Respect element column if format valid
// 27-oct-2005 (kk) collect contiguous waters only, rather than all
// 19-oct-2005 (kk) activate header_only option
// 13-sep-2005 (al) change 'PDB' to 'pdb'
// 20-jun-2005 (pl) read deuterium
// 20-jun-2005 (pl) use filename for header if no header or all blank
// 13-jun-2005 (pl) re-enabled multiple model reading
// 10-feb-2005 (kk) write_CONECT bugfix
// 17-jan-2005 (kk) atom name format repaired
// 03-jan-2005 (kk) handle BABEL inconsistancy
// 29-dec-2004 (kk) correctly rotate all HNames of form H*[0-9]
// 29-dec-2004 (kk) include hets from atom name adjustment
// 15-sep-2004 (jd) improved bond list validation
// 16-jul-2004 (jd) added fwrite_PDB
// 04-may-2004 (lc) bug fix on an ReadPDB option
// 12-apr-2004 (pl) fixed date handling on missing dates
// 10-mar-2004 (lc) refine Ryoka's changes. fix fopen in pdb_fread
// 11-sep-2003 (rk) PDB vs IUPAC convention for hydrogen names
// 17-feb-2004 (jd) missing db_Close in function db_ImportPDB
// 28-jan-2004 (pl) converted to new cell parameters stuff
// 15-apr-2003 (kk) bugfix handling invalid dates
// 23-jan-2003 (m2) changed symmetries: options to non_crys, all
// 07-jan-2003 (kk) pdb field interpreting in low-level read (pdb_read)
// 07-jan-2003 (m2) added support for hoh and hetero pdb_open panel
// 02-dec-2002 (kk) crys_sym options for ReadPDB
// 28-aug-2002 (kk) in GetLine account for lines < 80 chars
// 23-apr-2002 (kk) fix aname write: 4 letter names; HN's on pep N
// 22-mar-2002 (kk) bugfix: N's connected to bad amino res
// 08-feb-2002 (kk) nucleic attr : enforce sp3 backbone
// 07-feb-2002 (kk) alignment fixup; histidine match bug, HOH unbond bug
// 06-feb-2002 (kk) increased microhet radius from 0.10 to 0.25
// 31-jan-2002 (kk) atom occupancy written to aScalar
// 20-jan-2002 (kk) use mol_XXX functions
// 15-may-2001 (kk) BOND_TOL increased for auto-bonding polymer links
// 15-may-2001 (kk) uracil fix in set_nucleic_attr
// 15-may-2001 (kk) exclusion of out-bonded amino fix in set_amino_attr
// 27-feb-2001 (kk) attribute fixes (part. histidine)
// 21-feb-2001 (kk) auto-connect fixes
// 20-feb-2001 (kk) move open_Op stuff to f_sys.svl
// 15-feb-2001 (kk) relax test for PDB file
// 05-jan-2001 (kk) integrated with new open system
// 14-jul-2000 (kk) move most read logic to SVL; correct serial num for TER
// 01-oct-1999 (kk) bugfix in fix_hydrogens; module name change (RCSB)
// 14-jul-1999 (kk) write rUID's if not all zero; re-start at each chain
// 09-jul-1999 (kk) fix_hydrogens ignores non-standard names
// 09-jul-1999 (kk) bad charges bug on write
// 09-aug-1997 (kk) created
//
// COPYRIGHT (C) 1997-2011 CHEMICAL COMPUTING GROUP INC. ALL RIGHTS RESERVED.
//
// PERMISSION TO USE, COPY, MODIFY AND DISTRIBUTE THIS SOFTWARE IS HEREBY
// GRANTED PROVIDED THAT: (1) UNMODIFIED OR FUNCTIONALLY EQUIVALENT CODE
// DERIVED FROM THIS SOFTWARE MUST CONTAIN THIS NOTICE; (2) ALL CODE DERIVED
// FROM THIS SOFTWARE MUST ACKNOWLEDGE THE AUTHOR(S) AND INSTITUTION(S); (3)
// THE NAMES OF THE AUTHOR(S) AND INSTITUTION(S) NOT BE USED IN ADVERTISING
// OR PUBLICITY PERTAINING TO THE DISTRIBUTION OF THE SOFTWARE WITHOUT
// SPECIFIC, WRITTEN PRIOR PERMISSION; (4) ALL CODE DERIVED FROM THIS SOFTWARE
// BE EXECUTED WITH THE MOLECULAR OPERATING ENVIRONMENT (MOE) LICENSED FROM
// CHEMICAL COMPUTING GROUP INC.
//
// CHEMICAL COMPUTING GROUP INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
// SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
// AND IN NO EVENT SHALL CHEMICAL COMPUTING GROUP INC. BE LIABLE FOR ANY
// SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
// RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
// CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#set title 'MyPDB File I/O'
#set class 'MOE:myfile-io'
#set version '2012.08'
// TBD : option to re-name atoms on read - all or H only
// TBD : Use new oReparent when ready
// TBD : Replace pro_AtomCheck; fix simple wash; 5' to 3'
// TBD : TER-less files -> attempt to split chains
// TBD : options; split_hets etc
// TBD : trusting SRC_MOE/checksum on header END/PDB
// TBD : document use_link/connect policy (1pph, 2oun)
// TBD : option to choose first alt loc per res regardless of occ or value.
function pro_AtomCheck, pro_PeptideFlags;
function AutoConnectFromPosition, AutoTypeFromPosition;
function crys_StandardShortGroup, crys_GetGroupIndex;
function AtomSurfaceArea, aSlogP, aIdealVSA;
function Write_Prompt;
const SRC_MOE_WRITE = ' MOE v{n:.2f} (Chemical Computing Group Inc.) {}';
const SRC_MOE_MATCH = ' MOE * (Chemical Computing Group Inc.)*';
const SRC_PDB_REMED = '*COMPLIES WITH FORMAT V. 3.*';
const PDB_WRITE_DEFAULTS = [
use_chain_id: 1, // try to extract chain id from cname
cryst1: 0, // write the CRYST1 record
hnames: 'PDB v3.0', // or IUPAC or "old" PDB
amber: 0, // "Amberize"
atom_prop: [], // B-factor data
occupancy: [], // occupancy data
force_TER: 0 // write chains verbatim with TER recs
];
const FREAD_PDB_DEFAULTS = [
multi_model: 0, // if true, read all models in NMR (eg) files
collate_res: 1, // if true, align SEQRES to ATOM res
ignore_conect: 1, // if true, do not apply CONECT records
auto_connect: 1, // if true, apply AutoConnect
use_link: 0, // if true, apply & require links for inter-res
ignore_hoh: 0,
ignore_hetero: 0,
save_variants: 1,
alt_loc: 0,
split_hets: 0,
use_element: 0, // if true, apply element column of PDB file
chain_tag: 'auto',
gen_symm: 0, // if true, apply symmetries from MTRIXn records
gen_biomt: 0, // if non-zero, create specified biomol
biomol: 0,
crystal_contacts: 0,
crystal_contact_rad: 4.5,
translate_primes: 2, // 0:none, 1:'->*,'OP1'->O1P', 2: vice versa
consecutive_atoms: 0, // If on, residue atoms must be contiguous
preserve_order: 0,
atom_data: 0
];
const READ_PDB_DEFAULTS = [
split_hets: 1, // Remove trailing hets from protein chains.
install_cryst: 0, // Set space group & cell dimensions.
use_charges: 0,
multi_entry: 1
];
//===================== Extensions, Utilities & Brevity =======================
local function oAppendSet [setname, atoms]
oSetCollection [setname, uniq cat [atoms, oGetCollection setname]];
endfunction
local function mean v = invz length v * add v;
local function trim_ws s
local m = not isspace s, p = pscan m;
return (s | m or (rotl m and ltE [0, p, last p]));
endfunction
local function trim_trailing_ws s
local m = not isspace s, p = pscan m;
return (s | m or p < last p);
endfunction
local function apt_sread v = tr app first apt sread v;
const MOL_RES_STD = MOL_RES_EXTEND;
const MOL_RES_IDX = MOL_RES_EXTEND+1;
const MOL_RES_ALOC = MOL_RES_EXTEND+2;
const MOL_ATOM_ORDER = MOL_ATOM_EXTEND;
const MOL_ATOM_ALOC = MOL_ATOM_EXTEND+1;
const MOL_ATOM_OCC = MOL_ATOM_EXTEND+2;
const MOL_ATOM_ACTIVE = MOL_ATOM_EXTEND+3;
const MOL_ATOM_CTABH = MOL_ATOM_EXTEND+4;
const MOL_ATOM_SERIAL = MOL_ATOM_EXTEND+5;
const MOL_ATOM_TEMP = MOL_ATOM_EXTEND+6;
const MOL_ATOM_ELCOL = MOL_ATOM_EXTEND+7;
const MOL_ATOM_QCOL = MOL_ATOM_EXTEND+8;
const MOL_ATOM_REC = MOL_ATOM_EXTEND+9;
const MOL_ATOM_LITERAL = MOL_ATOM_EXTEND+10;
local function mol_DropExtend mol = apt keep [mol, app length mol_Extract[]];
local function mol_cName mol = mol(2)(MOL_CHAIN_NAME);
local function mol_cTag mol = mol(2)(MOL_CHAIN_TAG);
local function mol_cHdr mol = mol(2)(MOL_CHAIN_HEADER);
local function mol_nRes mol = mol(2)(MOL_CHAIN_NRES);
local function mol_rName mol = mol(3)(MOL_RES_NAME);
local function mol_rUID mol = mol(3)(MOL_RES_UID);
local function mol_rINS mol = mol(3)(MOL_RES_INS);
local function mol_rType mol = mol(3)(MOL_RES_TYPE);
local function mol_nAtoms mol = mol(3)(MOL_RES_NATOMS);
local function mol_rStd mol = mol(3)(MOL_RES_STD);
local function mol_rSetStd [mol, v] = poke [mol, [3, MOL_RES_STD], v];
local function mol_aBck mol = mol(4)(MOL_ATOM_BACKBONE);
local function mol_aIon mol = mol(4)(MOL_ATOM_ION);
local function mol_aHLP mol = mol(4)(MOL_ATOM_HINTLP);
local function mol_aHyb mol = mol(4)(MOL_ATOM_GEOM);
local function mol_aBnd mol = mol(4)(MOL_ATOM_BONDS);
local function mol_aEle mol = mol(4)(MOL_ATOM_EL);
local function mol_aRad mol = el_COV_Radius mol_aEle mol;
local function mol_aLht mol = 1 >= el_Protons mol_aEle mol;
local function mol_aNumH mol = app add apt get [[mol_aLht mol], mol_aBnd mol];
local function mol_aSetBnd [mol, B] = poke [mol, [4, MOL_ATOM_BONDS], B];
local function mol_SymmetrizeBondlist mol
local B = graph_edges mol_aBnd mol;
return mol_aSetBnd [ mol, graph_uneighbors cat [B, mol_aCount mol]];
endfunction
local function str_R2A [mol, v] = stretch [v, mol_nAtoms mol];
local function str_C2A [mol, v] = str_R2A [mol, stretch [v, mol_nRes mol]];
local function mol_aRnum mol = str_R2A [mol, igen mol_rCount mol];
local function mol_aCnum mol = str_C2A [mol, igen mol_cCount mol];
// ResName : rName_rUID_rINS; AtomName : Resname_aName
local function res_name [r, u, i] = apt twrite ['{}_{}_{}', r, u, i];
const RES_NAME = [MOL_RES_NAME, MOL_RES_UID, MOL_RES_INS];
local function mol_rFullName mol = res_name mol(3)[RES_NAME];
local function rFullName r = res_name [rName r, rUID r, rINS r];
local function mol_aFullName mol = cat tok_cat [
mol_rFullName mol, '_', split [mol_aName mol, mol_nAtoms mol]
];
local function aUnitedElement atoms = toupper el_UnitedElement aElement atoms;
local function aAltLoc A
local alt_char = findmatch ['PDB_ALTLOC_[A-Z,a-z,0-9]', oCollections[]];
local alt_atoms = app oGetCollection alt_char;
alt_char = stretch [app last app string alt_char, app length alt_atoms];
local x = indexof [A, cat alt_atoms];
return unpack [alt_char[pack x], [" "], x];
endfunction
local function ele2hyb ele
local m = indexof toupper [ele, ELEMENT_SYM];
return unpack [el_DefaultGeometry (ele | m), 'sp3', m];
endfunction
local function is_el ele = indexof toupper [ele, cat ['D', ELEMENT_SYM]];
local function safe_elements ele
ele | ele == 'D' = 'H';
ele | not is_el ele = 'LP';
const T = ELEMENT_SYM;
local x = indexof [ele, toupper T];
ele | x = T[pack x];
return ele;
endfunction
//====== REMARK 600 - get ion/hyb/hintlp & bonds from CTAB structure ==========
local function make_ctab v
v = v | app length app trim_ws v;
local [rname, s] = apt_sread [v, '{t:}{c:*}'];
local [natoms, nbonds] = first sread [s(1), '{t:X}{n:}{n:}'];
if (natoms < 1) or (natoms + nbonds + 1 <> length v) then
return []; // nothing or inconsistent
endif;
local [A, B] = split [dropfirst s, [natoms, nbonds]];
const ATOM_INFO = '{c:*4}{c:X}{c:*4}{c:X}{t:2}{c:X}{n:3}{c:X}{c:}{c:}';
A = apt_sread [A, ATOM_INFO];
local atom_info, bond_info;
atom_info(CTAB_A_SYM) = safe_elements A(3); // element
atom_info(CTAB_A_CHARGE) = A(4); // formal charge
atom_info(CTAB_A_STEREO) = CTAB_AS_OTHER; // unknown(?)
atom_info(CTAB_A_HCOUNT) = -1; // assume full valence mol
atom_info[[CTAB_A_APO, CTAB_A_RGROUP]] = -1;
atom_info[[CTAB_A_X, CTAB_A_Y, CTAB_A_Z]] = 0;
atom_info[[CTAB_A_MASS, CTAB_A_RADICAL, CTAB_A_NUM]] = 0;
local Aname = app token A(1);
if nbonds then
const BOND_INFO = '{c:*4}{c:X}{c:*4}{c:X}{t:4}{c:X}{c:}';
B = apt_sread [B, BOND_INFO];
B(3) = indexof [B(3), ['SING','DOUB','TRIP','QUAD','AROM']];
B(3) = mput [B(3), B(3) == 5, CTAB_BT_ARO]; // 1-4 are ok (ctabfcn.htm)
B = B || [B(3)];
bond_info(CTAB_B_FROM) = indexof [app token B(1), Aname];
bond_info(CTAB_B_TO) = indexof [app token B(2), Aname];
bond_info(CTAB_B_TYPE) = B(3);
bond_info(CTAB_B_STEREO) = 0;
endif
local [mol] = mol_ExtractFromCTAB [atom_info, bond_info];
return tag [rname(1), [[
aname: [
Aname,
app token app trim_ws A(2),
app token app trim_ws A(1)
],
ion: mol_aIon mol,
hyb: mol_aHyb mol,
hlp: mol_aHLP mol,
bonds: mol_aBnd mol,
ele: mol_aEle mol,
Hdeg: mol_aNumH mol
]]];
endfunction
local function extract_600 s
local m = 'DICTIONARY' == app token apt get [s, [5 + igen 10]];
if allfalse m then return []; endif;
local n = mtoc m;
s = split [s, n] | app first split [m, n];
return tagcat app make_ctab s;
endfunction
//======== Reading & writing RCSB title records ===============================
// CHAIN, SYNONYM and EC (in COMPND) along with KEYWDS, EXPDTA, MDLTYP
// and AUTHOR are lists. EXPDTA and MDLTYPE are semi-colon separated,
// the others are comma-separated. The AUTHOR list is specifically to
// be separated by commas followed by a non-space. Any commas, semi-colons
// or colons in a list item are supposed to be escaped.
// In practice, escape characters are hardly ever seen in the PDB, and there
// are almost two thousand semi-colon separated SYNONYM or KEYWD lists in
// the PDB (c 2010.12). In lists other than AUTHOR, the intended separators
// are usually - but not always - followed by a space.
// NOTE: mmcif files contain verbatim data for lists
local function nested v
local b1 = pscan (v == "("); // !!! 3e6p "["
if not anytrue last b1 then return zero v; endif;
local b2 = reverse pscan reverse (v == ")");
return b1 and (b2 > b1);
endfunction
// Split on any valid split character; paste together tokens separated
// by commas that terminate with digit...
local function split_list [s, split_char]
local x = indexof [s, split_char] and not rotrpoke [s == "\\", 1];
x | nested s = 0;
local d = isdigit s;
x | x == 1 and rotlpoke [d,0] and rotrpoke [d,0] = 0;
x = append [x, 1];
s = app trim_ws app droplast split [cat [s, " "], mtoc rotr x];
s = s || s <> "\\";
return app token (s | app length s);
endfunction
local function add_escapes [s, escapes]
return splice [s, x_pack indexof [s, escapes], 0, "\\"];
endfunction
local function escape_all s = add_escapes [s, ":,;"];
local function escape_colons s = add_escapes [s, ":"];
local function wrap [s, len]
local m = isspace s, n = mtoc m;
local w = split [s, n = poke [n, 1, inc first n]];
if anytrue (m = n > len) then
w = splice [w, inc x_pack m, -1, apt split [w | m, len]];
endif
local i, j = 1;
s = [];
for i = 1, length w loop
if (len > (length s(j) + length w(i) - isspace last w(i))) then
s(j) = cat [s(j), w(i)];
else
s(j = inc j) = w(i);
endif
endloop
return app trim_trailing_ws s;
endfunction
local function unwrap v
v = app cat v;
v = trim_ws cat apt cat [" ", v | app length v];
local m1 = v == "-", m2 = isspace v;
local m = m2 and rot [m1, 1] and not rot [m2, 2];
return (v | not m);
endfunction
local function write_string [fkey, attr, s]
s = app token wrap [s, 69];
fwrite [fkey, '{t:-10}{t:-70}\n', attr, first s];
s = dropfirst s;
apt fwrite [fkey, '{t:-6} {n:3} {t:-69}\n', attr, inc x_id s, s];
endfunction
local function write_list [fkey, data, attr, split_char]
local s = app token app escape_all app string data.(attr);
s = drop [cat apt swrite ['{}{} ', s, split_char], -2];
write_string [fkey, toupper attr, s];
endfunction
local function strip_escapes v
local m = "\\" == v and rotlpoke [indexof [v, ";,:"], 0];
return trim_ws (v | not m);
endfunction
const LIST_ITEMS = ['CHAIN','EC','SYNONYM'];
local function write_specification_list [fkey, data, attr]
if isflat data then return []; endif;
if isnull (data = data.(attr)) then return []; endif;
attr = toupper attr;
local i, j = 0, k;
local [T, V] = untag data;
for i = 1, length T loop
local [t, v] = untag V(i);
if isnull v then continue; endif;
local s;
if (j = inc j) > 1 then
s = twrite ['{t:-6} {n:3} MOL_ID: {};', attr, j, T(i)];
else
s = twrite ['{t:-6} MOL_ID: {};', attr, T(i)];
endif
fwrite [fkey, '{t:-80}\n', s];
t = toupper t;
for k = 1, length v loop
s = app string v(k);
if indexof [t(k), LIST_ITEMS] then
s = app escape_all s;
else
s = app escape_colons s;
endif
s = drop [cat apt swrite ['{}, ', app token s], -2];
s = app token wrap [swrite ['{}: {};', t(k), s], 69];
apt fwrite [fkey, '{t:6} {n:3} {t:-69}\n', attr, j + x_id s, s];
j = j + length s;
endloop
endloop
endfunction
local function parse_mol_list v
v = app string v;
local m = ":" == v and not apt rotrpoke [v == "\\", 1];
local x = apt indexof [1, m or (isspace v and not app m_first v)];
local s = apt peek [v|x, pack x];
if length s then
x | x = pack x * not isspace s;
endif
local attr = app token app trim_ws apt keep [v | x, dec pack x];
// "When necessary to fully describe hybrid molecules, tokens may"
// "appear more than once for a given MOL_ID."
// There must be FRAGMENT records then ... so, if there are multiple
// FRAGMENTS then ...
x | x = (m = m_uniq attr) * pack x; // ** See 3e6p
attr = attr | m;
v | x = apt drop [v | x, inc pack x];
v = app unwrap split [v, mtoc x];
m = apt eqL [";", app last v];
v | m = app droplast (v | m);
if (x = indexof ['CHAIN', attr]) then
v(x) = split_list [v(x), ",;"];
endif
if (x = indexof ['EC', attr]) then
v(x) = split_list [v(x), ",;"];
endif
if (x = indexof ['SYNONYM', attr]) then
v(x) = split_list [v(x), ",;"];
endif
m = m_diff [attr, LIST_ITEMS];
v | m = app token app strip_escapes (v | m);
return tag ([attr, v] || [app anytrue v]);
endfunction
local function parse_specification_list [data, record_type];
local v = app token app first data.(record_type);
local m = m_findmatch ['*MOL_ID: [1-9]*', v];
if not anytrue first m then
v = droplast cat apt cat [app string v, " "]; // !!! unwrapping !!!
return strip_escapes v;
endif;
local T = totok app first apt sread [app string (v|m), '{t:X}{n:}'];
v = app dropfirst split [v, mtoc m];
v = app parse_mol_list v;
return tag [T | app length v, v | app length v];
endfunction
const MONTHS = [
'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'
];
local function date_to_DMY date
local [Y,M,D] = fieldsplit [string date, "-"];
if not leE [1, M = atoi token M, 12] then return ''; endif;
return token cat [D, "-", string MONTHS(M), "-", keep [Y,-2]];
endfunction
local function date_to_YMD date
date = trim_ws cat date;
if isnull date then return []; endif;
local [D,M,Y] = fieldsplit [date, "-_ "];
if anyfalse app length [D,M,Y] then return []; endif;
if anyfalse isdigit [D,Y] then return []; endif;
if not (M = indexof [token toupper M, MONTHS]) then return []; endif;
[Y,D] = atoi app token [Y,D];
if Y < 100 then
Y = Y + select [1900, 2000, Y >= 72]; // 1SBT 08-NOV-72
endif
date = swrite ['{n:4}-{n:2}-{n:2}', Y, M, D];
date | date == " " = "0";
return token date;
endfunction
// A formul string has the form [ CBX:"2(C1 H1 O2) ", MG:"2(MG1 ++) "]
// parse_formul converts a formula string into a formula vector. Only contents
// inside the brackets are considered. Charges are also include in the output.
// e.g. "2(C6 H12 O6)" =>[C:6, H:12, O:6], "MG1 ++" => [Mg:1, +:2].
local function parse_formulas formulas
local [names, formula_str] = untag formulas;
local formula_vec =[];
local i;
for i = 1, length formula_str loop
formula_str(i) = trim_ws formula_str(i);
local fstr = formula_str(i);
// Keep only the content inside the brackets
local idx = indicesof ["()", fstr];
if app length idx === [1,1] then
fstr = drop [keep [fstr, idx(2)-1], idx(1)];
elseif not (idx === [[],[]]) then
return []; // unhealthy brackets (too harsh!)
endif
local s, v = [];
for s in fieldsplit [fstr, " "] loop
if indexof ["+", s] then
if last s == "." then s = droplast s; endif
if alltrue (s=="+") then
v.'+' = length s;
else
v.'+' = atoi token s;
endif
elseif indexof ["-", s] then
if last s == "." then s = droplast s; endif
if alltrue (s=="-") then
v.'-' = length s;
else
v.'-' = abs atoi token s;
endif
elseif isnull s(2) then
v.(token s(1)) = 1;
elseif isalpha s(2) then
v.(twrite ['{}{}',s(1),tolower s(2)]) =
max [1, atoi token drop [s,2]]
;
else
v.(token s(1)) = atoi token drop [s,1];
endif
endloop
formula_vec(i) = v;
endloop
return [names, app token formula_str, formula_vec];
endfunction
//=============== fread_PDB_header ============================================
// NOTE: In the RCSB data as of 2007-05-24 there are many entries that violate
// PDB format by extending lengths of some records beyond their documented
// values. REMARK, MODRES, SEQADV & COMPND are the most common offenders.
// To handle these, we read to the 80th column unless the HEADER line tells us
// that the pdb code is written into cols 73+.
// SSBOND & LINK are officially 78 chars long.
// CAVEAT is officially 70 chars long.
const PDB_FMT = untag [
HEADER: '{c:*10X}{c:*40}{c:*12}{c:*8}{c:*2X}{t:4}', // len: date 12, ID 8
USER : '{c:*10X}{c:*}', // USER gets whole line
OBSLTE: '{c:*11X}{c:*9}{c:*11X}{t:*}',
CAVEAT: '{c:*15X}{c:*61}', // document says 70.
TITLE : '{c:*10X}{c:*70}', // switched to 80 chars in fmt 3.20
SPLIT : '{c:*11X}{t:*}',
COMPND: '{c:*10X}{c:*70}', // attr-value pairs
SOURCE: '{c:*10X}{c:*70}', // attr-value pairs
KEYWDS: '{c:*10X}{c:*70}', // comma-separated list
EXPDTA: '{c:*10X}{c:*70}', // semi-colon separated list
NUMMDL: '{c:*10X}{n:4}',
MDLTYP: '{c:*10X}{c:*70}', // semi-colon separated list
AUTHOR: '{c:*10X}{c:*70}', // comma-separated list
REVDAT: '{c:*7X}{n:3}{c:*2}{c:*10}{c:X}{c:*5}{c:*3X}{n:1}{c:*7X}'
'{t:6}{c:X}{t:6}{c:X}{t:6}{c:X}{t:6}', // continuation as 2-chars
SPRSDE: '{c:*11X}{c:*9}{c:*11X}{t:*}',
JRNL : '{c:*10X}{c:*66}',
REMARK: '{c:*6X}{c:*}', // parse digit later
FTNOTE: '{c:*6X}{n:4}{c:*66}',
DBREF : '{c:*6X}{c:X}{t:4}{c:X}{c:}{c:X}{n:4}{c:}{c:X}{n:4}{c:}{c:X}'
'{t:6}{c:X}{t:8}{c:X}{t:12}{c:X}{n:5}{c:}{c:X}{n:5}{c:}',
DBREF1: '{c:*6X}{c:X}{t:4}{c:X}{c:}{c:X}{n:4}{c:}{c:X}{n:4}{c:}{c:X}'
'{t:6}{c:*15X}{t:20}',
DBREF2: '{c:*18X}{t:22}{c:*4X}{n:13}{c:}{n:13}{c:}',
SEQADV: '{c:*12X}{t:3}{c:X}{c:}{c:X}{n:4}{c:}{c:X}{t:4}{c:X}{t:9}'
'{c:X}{t:3}{c:X}{n:5}{c:X}{c:*27}',
SEQRES: '{c:*6X}{n:5}{c:1}{n:5}{c:X}{t:4*13}',
MODRES: '{c:*12X}{t:3}{c:X}{c:}{c:X}{n:4}{c:}{c:X}{t:3}{c:*47}',
HET : '{c:*6X}{c:X}{t:3}{c:*2X}{c:}{n:4}{c:}{c:*2X}{n:5}{c:*5X}{c:*40}',
HETNAM: '{c:*6X}{c:*2X}{c:*2X}{c:X}{t:3}{c:X}{c:*58}',
HETSYN: '{c:*6X}{c:*2X}{c:*2X}{c:X}{t:3}{c:X}{c:*58}',
FORMUL: '{c:*6X}{c:*6X}{t:3}{c:*4X}{c:*51}',
HELIX : '{c:*10X}{c:*66}', // 72-76 is officially length of helix
SHEET : '{c:*10X}{c:*60}',
TURN : '{c:*10X}{c:*60}',
SSBOND: '{c:*7X}{n:3}{c:X}'
'{t:3}{c:X}{c:1}{c:X}{n:4}{c:1}{c:*3X}'
'{t:3}{c:X}{c:1}{c:X}{n:4}{c:1}{c:*23X}'
'{t:6}{c:X}{t:6}{c:X}{n:5}',
LINK : '{c:*12X}'
'{t:4}{c:}{t:3}{c:X}{c:}{n:4}{c:}{c:*15X}'
'{t:4}{c:}{t:3}{c:X}{c:}{n:4}{c:}'
'{c:*2X}{t:6}{c:X}{t:6}{c:X}{n:5}',
HYDBND: '{c:*10X}{c:*62}', // undocumented, but still found in archive
SLTBRG: '{c:*10X}{c:*62}', // ''
CISPEP: '{c:*10X}{c:*60}',
SITE : '{c:*10X}{c:*60}',
CRYST1: '{c:*6X}{c:*66}',
ORIGX1: '{c:*10X}{c:*60}',
ORIGX2: '{c:*10X}{c:*60}',
ORIGX3: '{c:*10X}{c:*60}',
SCALE1: '{c:*6X}{c:*4X}{n:10}{n:10}{n:10}{c:*5X}{n:10}',
SCALE2: '{c:*6X}{c:*4X}{n:10}{n:10}{n:10}{c:*5X}{n:10}',
SCALE3: '{c:*6X}{c:*4X}{n:10}{n:10}{n:10}{c:*5X}{n:10}',
MTRIX1: '{c:*6X}{c:X}{n:3}{n:10}{n:10}{n:10}{c:*5X}{n:10}{c:*4X}{c:1}',
MTRIX2: '{c:*6X}{c:X}{n:3}{n:10}{n:10}{n:10}{c:*5X}{n:10}{c:*4X}{c:1}',
MTRIX3: '{c:*6X}{c:X}{n:3}{n:10}{n:10}{n:10}{c:*5X}{n:10}{c:*4X}{c:1}'
];
const TITLE_RECS = first PDB_FMT;
const ATOM_RECS = [
'MODEL',
'ATOM', 'HETATM', 'SIGATM', 'ANISOU', 'SIGUIJ', 'TER',
'ENDMDL',
'CONECT'
];
const PDB_RECS = cat [TITLE_RECS, ATOM_RECS];
global function myfread_PDB_header [file, first_line]
local i,x,m,n,v;
local pdb, L, R, pdb_code;
local fnum = file;
if type file == 'tok' then
fnum = fopenr file;
endif
local fmts = PDB_FMT(2);
fmts = cat [fmts, rep ['{c:*}', length PDB_RECS - length fmts]];
// ================ Line reader & parser ==================================
function GetLine []
loop
if length first_line then
L = first_line; first_line = [];
else
L = freadb [fnum, 'line', 1];
endif
if isnull L then return 0; endif; // End of file
R = first sread [keep [L = first L, 6], '{t:}'];
if R == 'END' then return 0; endif; // End of entry
until (i = indexof [R, PDB_RECS]) endloop
if (n = length L) < 80 then
L = cat [L, rep [" ", 80 - n]];
elseif length pdb_code then
const atom_idx = 72 + igen 4;
L[atom_idx] = " "; // see NOTE above
endif;
L | not (isprint L or isspace L) = "?"; // !!! sread is broken
L = first sread [L, fmts(i)];
return 1;
endfunction
// ====== Read the TITLE records - stop at the coordinate section ========
while GetLine []
while not indexof [R, ATOM_RECS]
loop
if R == 'HEADER' then // first instance only
if isnull pdb.(R) then
pdb.HEADER = droplast L;
pdb_code = last L;
if pdb_code == ' ' or pdb_code <> token keep [L(3), 4] then
pdb_code = [];
endif
endif
elseif indexof [R, TITLE_RECS] then
pdb.(R) = append [pdb.(R), L];
endif
R = [];
endloop
if type file == 'tok' then
fclose fnum;
endif
// ======== Finished reading; now clean-up record data ===================
pdb = untag pdb;
pdb = tag [tolower first pdb, last pdb];
if length R then
pdb.first_line = L;
else
L = [];
endif
pdb.date = date_to_YMD pdb.header(2);
pdb.code = trim_ws pdb.header(3);
pdb.header = trim_ws pdb.header(1);
pdb.title = unwrap app first pdb.title;
if length pdb.split then pdb.split = cat app cat pdb.split; endif
pdb.compnd = parse_specification_list [pdb, 'compnd'];
pdb.source = parse_specification_list [pdb, 'source'];
pdb.keywds = split_list [unwrap app first pdb.keywds, ",;"];
pdb.expdta = split_list [unwrap app first pdb.expdta, ";,"];
pdb.mdltyp = split_list [unwrap app first pdb.mdltyp, ";,"];
if length pdb.author then
v = unwrap app first pdb.author;
pdb.author = app token app trim_ws fieldsplit [v, ","];
endif
// REVDAT
if length pdb.revdat then
local revdat = apt keep [pdb.revdat, 5];
local revfield = apt drop [pdb.revdat, 5];
revfield = revfield || revfield <> '';
revdat = tr revdat;
if (revdat(3)(1)(2) == "-") then // no continuation lines
revdat(3) = apt cat revdat[[2,3]];
m = rep [1, l_length revdat];
else
m = not atoi app token revdat(2);
endif
revdat = revdat || [m];
revdat(3) = app date_to_YMD revdat(3);
revdat(6) = app cat split [revfield, mtoc m];
revdat = apt get [revdat[[1,3,4,5,6]], [x_sort revdat(1)]];
revdat = revdat || [app anytrue revdat(2)];
if l_length revdat then
pdb.revdat = apt get [revdat, [x_sort revdat(2)]];
else
pdb.revdat = [];
endif
endif
// OBSLTE, SPRSDE
function group_rec_management rec_man
if isnull rec_man then return []; endif;
local _date = app date_to_YMD app first rec_man;
local _codes = app cat app last rec_man;
m = _date <> '';
return tag [_date | m, app cat split [_codes, mtoc m]];
endfunction
pdb.obslte = group_rec_management pdb.obslte;
pdb.sprsde = group_rec_management pdb.sprsde;
// From the REMARK records extract the resolution, R Free, R value,
// mean B, refinement program, pH & format compliance statement.
function extract_remarks s
v = apt keep [s, 4];
m = app alltrue (isdigit v or isspace v);
v = tr app first apt sread [s | m, '{n:4}{c:*}'];
s = s | not m;
[x,m] = sam v(1);
v = tag [totok v(1)[x|m], split [v(2)[x], mtoc m]];
if length s then v.remark = s; endif
return v;
endfunction
pdb.remark = extract_remarks app first pdb.remark;
// REMARK 2: Resolution
v = app string findmatch [' RESOLUTION.*', app token pdb.remark.'2'];
x = apt indexof [1, isdigit v];
if anytrue x then
pdb.res = first sread [drop [(v|x)(1), dec (pack x)(1)], '{n:}'];
endif;
// REMARK 3: Refinement details - R free/value; mean B; program
function extract_remark_value [rem_text, ref_pattern]
local rf = string first findmatch [ref_pattern, rem_text];
if isnull rf then return []; endif;
rf = token trim_ws drop [rf, indexof [":", rf]];
if rf == 'NULL' then rf = []; endif;
return rf;
endfunction
v = app token app trim_ws pdb.remark.'3';
pdb.free_R = pdb.R_free = tonum extract_remark_value [v, [
'FREE R VALUE :*', // Refinment
'FREE R VALUE (NO CUTOFF) :*' // All data
]];
pdb.R_value = tonum extract_remark_value [v, [
'R VALUE (WORKING SET) :*',
'R VALUE (WORKING SET, NO CUTOFF) :*'
]];
pdb.mean_B = tonum extract_remark_value [
v, 'MEAN B * (OVERALL, A\*\*:*'
];
pdb.program = extract_remark_value [v, 'PROGRAM *:*'];
const EXP_DETAILS_REMARKS = ['200','210','230','240','245','265'];
v = app token cat tagget [pdb.remark, EXP_DETAILS_REMARKS];
pdb.pH = tonum extract_remark_value [v, '* PH *:*'];
v = app token pdb.remark.'4';
pdb.format = first findmatch [ '*COMPLIES WITH FORMAT V.*', v];
if length pdb.format then