Skip to content

Commit

Permalink
perf-data-loader: db load order and fixed utility function mapping fo…
Browse files Browse the repository at this point in the history
…r text/string PK data types (#142)

* using PrimaryStringGen for text/string datatype with primary key

* added subcommand to generate load order of the entire table
  • Loading branch information
shaharuk-yb authored Aug 16, 2024
1 parent a6d2723 commit b264719
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 81 deletions.
10 changes: 9 additions & 1 deletion PERF-DATALOADER.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ cd benchbase
### How to use:
```
#$./perf-data-loader --help
Usage: ./perf-data-loader --config <config_file> --table-name <table_name> --rows <rows> [--gen-config-only] [--load-only]
Usage: ./perf-data-loader --config <config_file> --table-name <table_name> --rows <rows> [--gen-config-only] [--load-only] [--gen-load-order]
Short forms: -c <config_file> -t <table_name> -r <rows>
Options:
-c, --config Configuration file
-t, --table-name Table name
-r, --rows Number of rows
--gen-config-only Only generate the loader/config file
--load-only Only load data into the database
--gen-load-order Generate table load order in the provided database
-h, --help Display this help message
```
- to only generate the loader file(skip the actual load). This will generate the yaml file <table-name>_loader.yaml which can be used in loading the data.
Expand All @@ -42,6 +43,13 @@ Options:
```
./perf-data-loader --config <config_file> --table-name <table_name> --rows <rows>
```
- to generate the load order of all the tables in the database. It is useful to know the load order when there are
foreign ey references in the db schema. When used, this command will print out the load order/levels in which you can
populate the tables. Start from tables at level 0, then level 1, and so on. It also generate load_order.json file
containing the load order in json format.
```shell
./perf-data-loader -c <config_file> --gen-load-order
```

the input yaml file should have following content
```
Expand Down
37 changes: 34 additions & 3 deletions perf-data-loader
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@ TABLE_NAME=""
ROWS=""
GENERATE_ONLY=false
LOAD_ONLY=false
GEN_LOAD_ORDER=false
JAR_PATH=""

# Function to display help
function display_help {
echo "Usage: $0 --config <config_file> --table-name <table_name> --rows <rows> [--gen-config-only] [--load-only]"
echo "Usage: $0 --config <config_file> --table-name <table_name> --rows <rows> [--gen-config-only] [--load-only] [--gen-load-order]"
echo "Short forms: -c <config_file> -t <table_name> -r <rows>"
echo "Options:"
echo " -c, --config Configuration file"
echo " -t, --table-name Table name"
echo " -r, --rows Number of rows"
echo " --gen-config-only Only generate the loader/config file"
echo " --load-only Only load data into the database"
echo " --gen-load-order Generate table load order in the provided database"
echo " -h, --help Display this help message"
exit 0
}
Expand Down Expand Up @@ -72,6 +74,10 @@ while [[ $# -gt 0 ]]; do
LOAD_ONLY=true
shift # past argument
;;
--gen-load-order)
GEN_LOAD_ORDER=true
shift # past argument
;;
-h|--help)
display_help
;;
Expand All @@ -83,8 +89,10 @@ while [[ $# -gt 0 ]]; do
done

# Ensure that both --gen-config-only and --load-only are not provided simultaneously
if [ "$GENERATE_ONLY" = true ] && [ "$LOAD_ONLY" = true ]; then
echo "Error: Cannot use --gen-config-only and --load-only simultaneously."
if { [ "$GENERATE_ONLY" = true ] && [ "$LOAD_ONLY" = true ]; } || \
{ [ "$GENERATE_ONLY" = true ] && [ "$GEN_LOAD_ORDER" = true ]; } || \
{ [ "$LOAD_ONLY" = true ] && [ "$GEN_LOAD_ORDER" = true ]; }; then
echo "Error: Cannot use these conflicting parameters simultaneously. --gen-config-only, --load-only, --gen-load-order ."
exit 1
fi

Expand All @@ -111,6 +119,29 @@ if [ "$GENERATE_ONLY" = true ]; then
exit 0
fi

if [ "$GEN_LOAD_ORDER" = true ]; then
if [ -z "$CONFIG" ]; then
echo "Error: --config parameter is required with --gen-load-order."
display_help
fi
# Create a temporary file
TEMP_CONFIG=$(mktemp)
# Copy the content of the original CONFIG file to the temporary file
cp "$CONFIG" "$TEMP_CONFIG"

# Append text to the temporary config file (you can customize the text as needed)
echo >> "$TEMP_CONFIG"
echo "gen-db-load-order: true" >> "$TEMP_CONFIG"
echo "Generating load order based on the provided config file"
java -jar "$JAR_PATH" -b perf-dataloader -c "$TEMP_CONFIG" -p tableName="dummy" -p rows="1" --load=True

# Clean up: delete the temporary config file
rm -f "$TEMP_CONFIG"

exit 0
fi


# Check if required parameters are provided for the default operation
if [ -z "$CONFIG" ] || [ -z "$TABLE_NAME" ] || [ -z "$ROWS" ]; then
echo "Error: Missing required parameters."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.oltpbenchmark.api.Loader;
import com.oltpbenchmark.api.LoaderThread;
import com.oltpbenchmark.util.JSONUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.DumperOptions;
Expand Down Expand Up @@ -275,12 +276,6 @@ public static List<ForeignKey> getForeignKeys(String tableName, Connection conn)
return foreignKeyList;
}

public static List<String> getParentTableHierarchy(String tableName, Connection conn) {
List<String> tableHierarchy = new ArrayList<>();
findParentTables(tableName, conn, tableHierarchy);
return tableHierarchy;
}

public static void buildDependencyDAGForTable(Map<String, List<Dependency>> graph, List<ForeignKey> foreignKeyList, Connection conn) {
// Build the adjacency list
for (ForeignKey fk : foreignKeyList) {
Expand All @@ -292,83 +287,116 @@ public static void buildDependencyDAGForTable(Map<String, List<Dependency>> grap
}
}

private static void findParentTables(String tableName, Connection conn, List<String> tableHierarchy) {
List<ForeignKey> foreignKeys = getForeignKeys(tableName, conn);

if (foreignKeys.isEmpty()) {
tableHierarchy.add(tableName);
} else {
for (ForeignKey foreignKey : foreignKeys) {
findParentTables(foreignKey.getForeignTableName(), conn, tableHierarchy);
}
tableHierarchy.add(tableName);
}
}

@Override
public List<LoaderThread> createLoaderThreads() throws SQLException {
Connection conn = benchmark.makeConnection();
String tableName = workConf.getXmlConfig().getString("tablename");
int rows = workConf.getXmlConfig().getInt("rows");
boolean genLoadOrderOnly = workConf.getXmlConfig().getBoolean("gen-db-load-order", false);
if (genLoadOrderOnly) {
Set<String> processedTables = new HashSet<>();
Map<String, List<Dependency>> graph = new HashMap<>();
Set<String> visited = new HashSet<>();
StringBuilder loadOrder = new StringBuilder();
Map<Integer, List<String>> depth = new TreeMap<>();
Map<Integer, List<String>> levelAndTables = new LinkedHashMap<>();
buildGraph(conn, processedTables, graph);

List<String> independentTables = new ArrayList<>();
List<String> allDbTables = getAllTables(conn);
for(String table: allDbTables) {
if (!processedTables.contains(table.toLowerCase())) {
independentTables.add(table);
}
}

// check if the table exists in the database
checkIfTableExists(tableName, conn);
// get the table schema
List<Column> tableSchema = getTableSchema(tableName, conn);
if (!graph.isEmpty()) {
String startTable = graph.keySet().iterator().next();
levelAndTables.putAll(getOrderOfImport(startTable, loadOrder, graph, depth, visited));
levelAndTables.get(0).addAll(independentTables);
} else {
levelAndTables.put(0, independentTables);
}

// key primary key details
List<PrimaryKey> primaryKeys = getPrimaryKeys(tableName, conn);
int totalTables = 0;
for (Map.Entry<Integer, List<String>> entry : levelAndTables.entrySet()) {
int level = entry.getKey();
List<String> tablesAtLevel = entry.getValue();
totalTables += tablesAtLevel.size();
System.out.println("Level " + level + ": " + String.join(", ", tablesAtLevel));
}

// get all unique constraints from the indexes
List<String> uniqueConstraintColumns = getUniqueConstrains(tableName, conn);
System.out.println("Total number of tables: " + totalTables);
try {
FileWriter writer = new FileWriter("load_order.json");
writer.write(JSONUtil.format(JSONUtil.toJSONString(levelAndTables)));
writer.close();
} catch (IOException e) {
throw new RuntimeException(e);
}

} else {
String tableName = workConf.getXmlConfig().getString("tablename");
int rows = workConf.getXmlConfig().getInt("rows");

// check if the table exists in the database
checkIfTableExists(tableName, conn);
// get the table schema
List<Column> tableSchema = getTableSchema(tableName, conn);

// key primary key details
List<PrimaryKey> primaryKeys = getPrimaryKeys(tableName, conn);

// get all unique constraints from the indexes
List<String> uniqueConstraintColumns = getUniqueConstrains(tableName, conn);

// get all columns with respective user defined ENUM data type
Map<String, List<Object>> udColumns = getUserDefinedEnumDataTypes(tableName, "public", conn);
// get all columns with respective user defined ENUM data type
Map<String, List<Object>> udColumns = getUserDefinedEnumDataTypes(tableName, "public", conn);

// get all foreign keys of the table
List<ForeignKey> foreignKeys = getForeignKeys(tableName, conn);
// get all foreign keys of the table
List<ForeignKey> foreignKeys = getForeignKeys(tableName, conn);
// System.out.println(foreignKeys);


int limit = Math.min(10000, rows);
List<String> fkColNames = new ArrayList<>();
int limit = Math.min(10000, rows);
List<String> fkColNames = new ArrayList<>();

// if in foreign key, parent table is same as current table, don't treat it as foreign key. treat is as normal column
List<ForeignKey> fkToRemove = new ArrayList<>();
foreignKeys.forEach(fk -> {
if (fk.getForeignTableName().equalsIgnoreCase(tableName)) {
fkToRemove.add(fk);
} else {
fkColNames.add(fk.getColumnName());
}
});
foreignKeys.removeAll(fkToRemove);
// if in foreign key, parent table is same as current table, don't treat it as foreign key. treat is as normal column
List<ForeignKey> fkToRemove = new ArrayList<>();
foreignKeys.forEach(fk -> {
if (fk.getForeignTableName().equalsIgnoreCase(tableName)) {
fkToRemove.add(fk);
} else {
fkColNames.add(fk.getColumnName());
}
});
foreignKeys.removeAll(fkToRemove);

// remove all fks from unique constraints
uniqueConstraintColumns.removeAll(fkColNames);
// remove all fks from unique constraints
uniqueConstraintColumns.removeAll(fkColNames);

// remove all fks from primary keys
List<PrimaryKey> pkToRemove = new ArrayList<>();
primaryKeys.forEach(pk -> {
if (fkColNames.contains(pk.getColumnName()))
pkToRemove.add(pk);
});
primaryKeys.removeAll(pkToRemove);
// remove all fks from primary keys
List<PrimaryKey> pkToRemove = new ArrayList<>();
primaryKeys.forEach(pk -> {
if (fkColNames.contains(pk.getColumnName()))
pkToRemove.add(pk);
});
primaryKeys.removeAll(pkToRemove);

if (!foreignKeys.isEmpty()) {
// fetch the distinct values from parent table. This could take some time
getDistinctValuesFromParentTable(conn, foreignKeys, limit);
}
// create mapping of utility function to the columns in the table
Map<String, PropertyMapping> columnToUtilsMapping =
utilsMapping(tableSchema, primaryKeys, foreignKeys, limit, rows, uniqueConstraintColumns, udColumns);
if (!foreignKeys.isEmpty()) {
// fetch the distinct values from parent table. This could take some time
getDistinctValuesFromParentTable(conn, foreignKeys, limit);
}
// create mapping of utility function to the columns in the table
Map<String, PropertyMapping> columnToUtilsMapping =
utilsMapping(tableSchema, primaryKeys, foreignKeys, limit, rows, uniqueConstraintColumns, udColumns);

// generate the mapping object which can be used to create the output yaml file
Root root = generateMappingObject(tableName, rows, columnToUtilsMapping, fkColNames, udColumns);
// generate the mapping object which can be used to create the output yaml file
Root root = generateMappingObject(tableName, rows, columnToUtilsMapping, fkColNames, udColumns);

// create output yaml file
writeToFile(tableName, rows, root);
LOG.info("Generated loader file: {}_loader.yaml", tableName);
}

// create output yaml file
writeToFile(tableName, rows, root);
LOG.info("Generated loader file: {}_loader.yaml", tableName);
return new ArrayList<>();
}

Expand Down Expand Up @@ -522,12 +550,9 @@ public void getDistinctValuesFromParentTable(Connection conn, List<ForeignKey> f
"following order/Levels(tables from `Level 0` first, then `Level 1` and so on: ", foreignKey.getForeignTableName(),
foreignKey.getForeignColumnName()));

for (String table : graph.keySet()) {
if (!visited.contains(table)) {
getOrderOfImport(table, loadOrder, graph, depth, visited);
}
}
throw new RuntimeException(loadOrder.toString());
String startTable = graph.keySet().iterator().next();
Map<Integer, List<String>> levelsAndTables = getOrderOfImport(startTable, loadOrder, graph, depth, visited);
throw new RuntimeException(loadOrder.append(generateLoadOrder(levelsAndTables)).toString());
}
foreignKey.setDistinctValues(distinctValues);
} catch (SQLException e) {
Expand Down Expand Up @@ -630,9 +655,10 @@ public static class Dependency {
}

// Function to print nodes by levels
public void getOrderOfImport(String startTable, StringBuilder loadOrder,
public Map<Integer, List<String>> getOrderOfImport(String startTable, StringBuilder loadOrder,
Map<String, List<Dependency>> graph , Map<Integer, List<String>> depth,
Set<String> visited) {
Map<Integer, List<String>> levelsAndTables = new LinkedHashMap<>();
dfs(startTable, 0, graph, depth, visited);

// Adjust levels to start from 0
Expand All @@ -646,9 +672,20 @@ public void getOrderOfImport(String startTable, StringBuilder loadOrder,
for (Map.Entry<Integer, List<String>> entry : adjustedDepth.entrySet()) {
int level = entry.getKey();
List<String> tablesAtLevel = entry.getValue();
loadOrder.append("\n").append("Level ").append(level).append(": ").append(String.join(", ", tablesAtLevel));
levelsAndTables.put(level, tablesAtLevel);
}

return levelsAndTables;
}

public StringBuilder generateLoadOrder(Map<Integer, List<String>> levelAndTables) {
StringBuilder loadOrder = new StringBuilder();
for (Map.Entry<Integer, List<String>> entry : levelAndTables.entrySet()) {
int level = entry.getKey();
loadOrder.append("\n").append("Level ").append(level).append(": ").append(String.join(", ", levelAndTables.get(level)));
}

return loadOrder;
}

// DFS function to populate the levels map
Expand All @@ -670,4 +707,43 @@ public void dfs(String table, int level, Map<String, List<Dependency>> graph , M
}
}
}

public void buildGraph(Connection conn, Set<String> processedTables, Map<String, List<Dependency>> graph) {
String query = "SELECT tc.table_name AS child_table, ccu.table_name AS parent_table "
+ "FROM information_schema.table_constraints AS tc "
+ "JOIN information_schema.key_column_usage AS kcu "
+ "ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema "
+ "JOIN information_schema.constraint_column_usage AS ccu "
+ "ON ccu.constraint_name = tc.constraint_name AND ccu.table_schema = tc.table_schema "
+ "WHERE tc.constraint_type = 'FOREIGN KEY'";

try (Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(query)) {
while (rs.next()) {
String childTable = rs.getString("child_table");
String parentTable = rs.getString("parent_table");

processedTables.add(parentTable.toLowerCase());
processedTables.add(childTable.toLowerCase());
// Build the adjacency list
graph.computeIfAbsent(parentTable, k -> new ArrayList<>()).add(new Dependency(childTable, 1));
graph.computeIfAbsent(childTable, k -> new ArrayList<>()).add(new Dependency(parentTable, -1));
}
} catch (SQLException e) {
throw new RuntimeException(e);
}
}

public static List<String> getAllTables(Connection connection) throws SQLException {
List<String> tables = new ArrayList<>();
DatabaseMetaData metaData = connection.getMetaData();
String[] types = {"TABLE"};
try (ResultSet rs = metaData.getTables(null, null, "%", types)) {
while (rs.next()) {
String tableName = rs.getString("TABLE_NAME");
tables.add(tableName);
}
}
return tables;
}
}
Loading

0 comments on commit b264719

Please sign in to comment.