Skip to content

Commit

Permalink
to_json.hpp: refactoring and changes in (undocumented) API
Browse files Browse the repository at this point in the history
it's now split into cpp and hpp files

Class JsonWriter (that was undocumented) is now not in the API.
Instead, we have struct JsonWriteOptions and cif::write_json_to_stream().
write_mmjson_to_stream() and Python's as_json() were left for backward
compatibility.
  • Loading branch information
wojdyr committed Sep 20, 2024
1 parent 8a4b6ca commit 8593a02
Show file tree
Hide file tree
Showing 7 changed files with 317 additions and 292 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ add_library(gemmi_cpp
src/mmcif.cpp src/mmread_gz.cpp src/monlib.cpp src/mtz.cpp src/mtz2cif.cpp
src/pdb.cpp src/polyheur.cpp src/read_cif.cpp src/read_map.cpp
src/resinfo.cpp src/riding_h.cpp src/select.cpp src/sprintf.cpp
src/to_mmcif.cpp src/to_pdb.cpp src/topo.cpp
src/to_json.cpp src/to_mmcif.cpp src/to_pdb.cpp src/topo.cpp
src/xds_ascii.cpp)
add_library(gemmi::gemmi_cpp ALIAS gemmi_cpp)
set_property(TARGET gemmi_cpp PROPERTY POSITION_INDEPENDENT_CODE ON)
Expand Down
296 changes: 23 additions & 273 deletions include/gemmi/to_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,298 +4,48 @@

#ifndef GEMMI_TO_JSON_HPP_
#define GEMMI_TO_JSON_HPP_
#include <cctype> // for isdigit
#include <ostream> // for ostream
#include <set> // for set
#include <string> // for string
#include <vector> // for vector
#include "cifdoc.hpp"
#include "numb.hpp" // for is_numb
#include "util.hpp" // for starts_with

namespace gemmi {
namespace cif {

class JsonWriter {
public:
bool comcifs = false; // conform to the COMCIFS CIF-JSON draft
struct JsonWriteOptions {
bool as_comcifs = false; // conform to the COMCIFS CIF-JSON draft
bool group_ddl2_categories = false; // for mmJSON
bool with_data_keyword = false; // for mmJSON
bool bare_tags = false; // "tag" instead of "_tag"
bool values_as_arrays = false; // "_tag": ["value"]
bool lowercase_names = true; // write case-insensitive names as lower case
int quote_numbers = 1; // 0=never (no s.u.), 1=mix, 2=always
std::string cif_dot = "null"; // how to convert '.' from CIF
explicit JsonWriter(std::ostream& os) : os_(os), linesep_("\n ") {}
void write_json(const Document& d);
void set_comcifs() {
comcifs = true;
values_as_arrays = true;
quote_numbers = 2;
cif_dot = "false";
}
void set_mmjson() {
group_ddl2_categories = true;
with_data_keyword = true;
bare_tags = true;
values_as_arrays = true;
lowercase_names = false;
quote_numbers = 0;
}

private:
std::ostream& os_;
std::string linesep_;

void change_indent(int n) { linesep_.resize(linesep_.size() + n, ' '); }

// returns category with trailing dot
std::string get_tag_category(const std::string& tag) const {
if (!group_ddl2_categories)
return std::string{};
size_t pos = tag.find('.');
if (pos == std::string::npos)
return std::string{};
return tag.substr(0, pos + 1);
}

std::string get_loop_category(const Loop& loop) const {
if (loop.tags.empty())
return std::string{};
std::string cat = get_tag_category(loop.tags[0]);
for (size_t i = 1; i < loop.tags.size(); ++i)
if (!starts_with(loop.tags[i], cat))
return std::string{};
return cat;
}

// based on tao/json/internal/escape.hpp
static void escape(std::ostream& os, const std::string& s, size_t pos,
bool to_lower) {
static const char* h = "0123456789abcdef";
const char* p = s.data() + pos;
const char* l = p;
const char* const e = s.data() + s.size();
while (p != e) {
const unsigned char c = *p;
if (c == '\\') {
os.write(l, p - l);
l = ++p;
os << "\\\\";
} else if (c == '"') {
os.write(l, p - l);
l = ++p;
os << "\\\"";
} else if (c < 32) {
os.write(l, p - l);
l = ++p;
switch ( c ) {
case '\b': os << "\\b"; break;
case '\f': os << "\\f"; break;
case '\n': os << "\\n"; break;
case '\r': os << "\\r"; break;
case '\t': os << "\\t"; break;
default: os << "\\u00" << h[(c & 0xf0) >> 4] << h[c & 0x0f];
}
} else if (to_lower && c >= 'A' && c <= 'Z') {
os.write(l, p - l);
l = ++p;
os.put(c + 32);
} else if (c == 127) {
os.write(l, p - l);
l = ++p;
os << "\\u007f";
} else {
++p;
}
}
os.write(l, p - l);
}

void write_string(const std::string& s, size_t pos=0, bool to_lower=false) {
os_.put('"');
escape(os_, s, pos, to_lower);
os_.put('"');
}

void write_as_number(const std::string& value) {
// if we are here, value is not empty
if (value[0] == '.') // in JSON numbers cannot start with dot
os_.put('0');
// in JSON the number cannot start with +
size_t pos = 0;
if (value[pos] == '+') {
pos = 1;
} else if (value[pos] == '-') { // make handling -001 easier
os_.put('-');
pos = 1;
}
// in JSON left-padding with 0s is not allowed
while (value[pos] == '0' && std::isdigit(value[pos+1]))
++pos;
// in JSON dot must be followed by digit
size_t dotpos = value.find('.');
if (dotpos != std::string::npos && !std::isdigit(value[dotpos+1])) {
os_ << value.substr(pos, dotpos+1-pos) << '0';
pos = dotpos + 1;
}
if (value.back() != ')')
os_ << value.c_str() + pos;
else
os_ << value.substr(pos, value.find('(', pos) - pos);
}

void write_value(const std::string& value) {
if (value == "?")
os_ << "null";
else if (value == ".")
os_ << cif_dot;
else if (quote_numbers < 2 && is_numb(value) &&
// exception: 012 (but not 0.12) is assumed to be a string
(value[0] != '0' || value[1] == '.' || value[1] == '\0') &&
(quote_numbers == 0 || value.back() != ')'))
write_as_number(value);
else
write_string(as_string(value));
}

void open_cat(const std::string& cat, size_t* tag_pos) {
if (!cat.empty()) {
change_indent(+1);
write_string(cat.substr(0, cat.size() - 1), bare_tags ? 1 : 0, lowercase_names);
os_ << ": {" << linesep_;
*tag_pos += cat.size() - 1;
}
}

void close_cat(std::string& cat, size_t* tag_pos) {
if (!cat.empty()) {
change_indent(-1);
os_ << linesep_ << '}';
*tag_pos -= cat.size() - 1;
cat.clear();
}
}

void write_loop(const Loop& loop) {
size_t ncol = loop.tags.size();
const auto& vals = loop.values;
std::string cat = get_loop_category(loop);
size_t tag_pos = bare_tags ? 1 : 0;
open_cat(cat, &tag_pos);
for (size_t i = 0; i < ncol; i++) {
if (i != 0)
os_ << "," << linesep_;
write_string(loop.tags[i], tag_pos, lowercase_names);
os_ << ": [";
for (size_t j = i; j < vals.size(); j += ncol) {
if (j != i)
os_.put(',');
write_value(vals[j]);
}
os_.put(']');
}
close_cat(cat, &tag_pos);
}


// works for both block and frame
void write_map(const std::string& name, const std::vector<Item>& items) {
write_string(name, 0, lowercase_names);
os_ << ": ";
change_indent(+1);
char first = '{';
bool has_frames = false;
std::string cat;
size_t tag_pos = bare_tags ? 1 : 0;
// When grouping into categories, only consecutive tags are grouped.
std::set<std::string> seen_cats;
for (const Item& item : items) {
switch (item.type) {
case ItemType::Pair:
if (!cat.empty() && !starts_with(item.pair[0], cat))
close_cat(cat, &tag_pos);
os_ << first << linesep_;
if (group_ddl2_categories && cat.empty()) {
cat = get_tag_category(item.pair[0]);
if (seen_cats.insert(cat).second)
open_cat(cat, &tag_pos);
}
write_string(item.pair[0], tag_pos, lowercase_names);
os_ << ": ";
if (values_as_arrays)
os_.put('[');
write_value(item.pair[1]);
if (values_as_arrays)
os_.put(']');
first = ',';
break;
case ItemType::Loop:
if (!item.loop.values.empty()) {
close_cat(cat, &tag_pos);
os_ << first << linesep_;
write_loop(item.loop);
first = ',';
}
break;
case ItemType::Frame:
has_frames = true;
break;
case ItemType::Comment:
break;
case ItemType::Erased:
break;
}
}
if (has_frames) { // usually, we don't have any frames
os_ << first << linesep_ << "\"Frames\": ";
change_indent(+1);
first = '{';
for (const Item& item : items)
if (item.type == ItemType::Frame) {
os_ << first << linesep_;
write_map(item.frame.name, item.frame.items);
first = ',';
}
change_indent(-1);
os_ << linesep_ << '}';
}
close_cat(cat, &tag_pos);
change_indent(-1);
os_ << linesep_ << '}';
static JsonWriteOptions comcifs() {
JsonWriteOptions opt;
opt.as_comcifs = true;
opt.values_as_arrays = true;
opt.quote_numbers = 2;
opt.cif_dot = "false";
return opt;
}

static JsonWriteOptions mmjson() {
JsonWriteOptions opt;
opt.group_ddl2_categories = true;
opt.with_data_keyword = true;
opt.bare_tags = true;
opt.values_as_arrays = true;
opt.lowercase_names = false;
opt.quote_numbers = 0;
return opt;
}
};

inline void JsonWriter::write_json(const Document& d) {
os_.put('{');
if (comcifs) {
os_ << R"(
"CIF-JSON": {
"Metadata": {
"cif-version": "2.0",
"schema-name": "CIF-JSON",
"schema-version": "1.0.0",
"schema-uri": "http://www.iucr.org/resources/cif/cif-json.json"
},)";
change_indent(+1);
}
for (const Block& block : d.blocks) {
if (&block != &d.blocks[0])
os_.put(',');
// start mmJSON with {"data_ so it can be easily recognized
if (&block != &d.blocks[0] || comcifs || !with_data_keyword)
os_ << linesep_;
write_map((with_data_keyword ? "data_" : "") + block.name, block.items);
}
if (comcifs)
os_ << "\n }";
os_ << "\n}\n";
}
GEMMI_DLL void write_json_to_stream(std::ostream& os, const Document& doc,
const JsonWriteOptions& options);

inline void write_mmjson_to_stream(std::ostream& os, const Document& doc) {
cif::JsonWriter writer(os);
writer.set_mmjson();
writer.write_json(doc);
write_json_to_stream(os, doc, JsonWriteOptions::mmjson());
}

} // namespace cif
Expand Down
18 changes: 9 additions & 9 deletions prog/cif2json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include <iostream>
#include "gemmi/cifdoc.hpp" // for Document
#include "gemmi/to_json.hpp" // for JsonWriter
#include "gemmi/to_json.hpp" // for write_json_to_stream
#include "gemmi/fstream.hpp" // for Ofstream
#include "gemmi/read_cif.hpp" // for read_cif_gz
#define GEMMI_PROG cif2json
Expand Down Expand Up @@ -62,23 +62,23 @@ void convert(const std::string& input, const std::string& output,
cif::Document doc = gemmi::read_cif_gz(input);
apply_cif_doc_modifications(doc, options);
gemmi::Ofstream os(output, &std::cout);
cif::JsonWriter writer(os.ref());
cif::JsonWriteOptions json_options;
if (options[Comcifs])
writer.set_comcifs();
json_options = cif::JsonWriteOptions::comcifs();
if (options[Mmjson])
writer.set_mmjson();
json_options = cif::JsonWriteOptions::mmjson();
if (options[Bare])
writer.bare_tags = true;
json_options.bare_tags = true;
if (options[Numb]) {
char first_letter = options[Numb].arg[0];
if (first_letter == 'q')
writer.quote_numbers = 2;
json_options.quote_numbers = 2;
else if (first_letter == 'n')
writer.quote_numbers = 0;
json_options.quote_numbers = 0;
}
if (options[CifDot])
writer.cif_dot = options[CifDot].arg;
writer.write_json(doc);
json_options.cif_dot = options[CifDot].arg;
cif::write_json_to_stream(os.ref(), doc, json_options);
}

} // anonymous namespace
Expand Down
6 changes: 2 additions & 4 deletions prog/convert.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2017 Global Phasing Ltd.

#include "gemmi/to_cif.hpp"
#include "gemmi/to_json.hpp"
#include "gemmi/to_json.hpp" // for write_json_to_stream
#include "gemmi/polyheur.hpp" // for setup_entities, remove_waters, ...
#include "gemmi/modify.hpp" // for remove_hydrogens, remove_anisou
#include "gemmi/align.hpp" // for assign_label_seq_id
Expand Down Expand Up @@ -465,9 +465,7 @@ void convert(gemmi::Structure& st,
if (output_type == CoorFormat::Mmcif) {
write_cif_to_stream(os.ref(), doc, cif_write_options(options[CifStyle]));
} else /*output_type == CoorFormat::Mmjson*/ {
cif::JsonWriter writer(os.ref());
writer.set_mmjson();
writer.write_json(doc);
cif::write_mmjson_to_stream(os.ref(), doc);
}
} else if (output_type == CoorFormat::Pdb) {
gemmi::PdbWriteOptions opt;
Expand Down
2 changes: 1 addition & 1 deletion prog/json2cif.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright 2017 Global Phasing Ltd.

#include "gemmi/to_cif.hpp" // for JsonWriter
#include "gemmi/to_cif.hpp" // for write_cif_to_stream
#include "gemmi/fstream.hpp" // for Ofstream
#include "gemmi/read_cif.hpp" // for read_cif_gz

Expand Down
Loading

0 comments on commit 8593a02

Please sign in to comment.