From 803d606d37b813a3840b9ad99480b3cde86d42c4 Mon Sep 17 00:00:00 2001 From: Thomas <78592830+Thomas-de-Bock@users.noreply.github.com> Date: Sun, 18 Feb 2024 21:24:58 +0100 Subject: [PATCH 1/7] Add license file --- LICENSE.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE.md diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..b1fac4f --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Thomas de Bock + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From c8410f42dba1b64ff9c70c143da611d0439ba1d9 Mon Sep 17 00:00:00 2001 From: Thomas-de-Bock Date: Mon, 19 Feb 2024 00:29:20 +0100 Subject: [PATCH 2/7] Basic error system, along with minor things: removed boost dependance, size_t instead of int for loops etc. --- Makefile | 2 +- src/construct.cpp | 14 ++-- src/construct_debug.cpp | 4 +- src/construct_debug.h | 9 ++- src/construct_flags.cpp | 16 +++-- src/construct_flags.h | 19 ++++-- src/construct_types.h | 18 ++++-- src/deconstruct.cpp | 138 +++++++++++++++++++++++++--------------- src/deconstruct.h | 35 +++++----- src/reconstruct.cpp | 17 +++-- src/reconstruct.h | 13 ++-- 11 files changed, 174 insertions(+), 111 deletions(-) diff --git a/Makefile b/Makefile index f8d850c..9cea6ed 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,3 @@ main: mkdir -p bin/ - g++ src/construct.cpp src/deconstruct.cpp src/construct_debug.cpp src/reconstruct.cpp src/construct_flags.cpp -o bin/construct + g++ src/construct.cpp src/deconstruct.cpp src/construct_debug.cpp src/reconstruct.cpp src/construct_flags.cpp src/construct_error.cpp -o bin/construct diff --git a/src/construct.cpp b/src/construct.cpp index f771c5c..9d087e0 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -1,22 +1,20 @@ #include "deconstruct.h" #include "reconstruct.h" #include "construct_flags.h" -#include -#include +#include +#include int main(int argc, char** argv) { - std::string path; - std::string outpath; - if(handle_flags(argc, argv, &path, &outpath) != 0) { + if(handle_flags(argc, argv) != 0) { std::cout << "Some flag(s) not set" << std::endl; return 0; } - if(path.empty()) { + if(inputfile.empty()) { std::cout << "No input file specified" << std::endl; return 0; } - std::ifstream inpfile(path); + std::ifstream inpfile(inputfile); std::stringstream buffer; buffer << inpfile.rdbuf(); std::vector tokens = parse_construct(buffer.str()); @@ -39,7 +37,7 @@ int main(int argc, char** argv) { linearize_tokens(tokens); std::ofstream outfile; - outfile.open(outpath); + outfile.open(outputfile); outfile << tokens_to_nasm(tokens); outfile.close(); } diff --git a/src/construct_debug.cpp b/src/construct_debug.cpp index a3368cb..694d15c 100644 --- a/src/construct_debug.cpp +++ b/src/construct_debug.cpp @@ -37,7 +37,7 @@ std::string token_to_string(con_token token) { break; case FUNCTION: tokstring += ", function: " + token.tok_function->name + ", arguments: "; - for(int i = 0; i < token.tok_function->arguments.size(); i++) { + for(size_t i = 0; i < token.tok_function->arguments.size(); i++) { if(i != 0) { tokstring += ", "; } @@ -61,7 +61,7 @@ std::string token_to_string(con_token token) { } if(token.tokens.size() > 0) { tokstring += ", tokens: {\n"; - for(int i = 0; i < token.tokens.size(); i++) { + for(size_t i = 0; i < token.tokens.size(); i++) { tokstring += token_to_string(*token.tokens[i]) + "\n"; } tokstring += "}"; diff --git a/src/construct_debug.h b/src/construct_debug.h index d9b605a..61facab 100644 --- a/src/construct_debug.h +++ b/src/construct_debug.h @@ -1,7 +1,12 @@ -#include -#include +#ifndef CON_DEBUG_H +#define CON_DEBUG_H + +#include +#include #include "construct_types.h" #include "reconstruct.h" std::string tokentype_to_string(CON_TOKENTYPE type); std::string token_to_string(con_token token); + +#endif diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index 741dbc7..f33f117 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -3,6 +3,10 @@ using namespace std; +CON_BITWIDTH bitwidth = BIT64; +std::string inputfile; +std::string outputfile; + int set_bitwidth(char* argv) { if(strcmp(argv, "elf64") == 0) { bitwidth = BIT64; @@ -24,11 +28,11 @@ int set_bitwidth(char* argv) { return -1; } -int handle_flags(int argc, char** argv, string* path, string* outpath) { +int handle_flags(int argc, char** argv) { bool bitwidth_set = false; bool path_set = false; bool outpath_set = false; - for(int i = 1; i < argc; i++) { + for(size_t i = 1; i < argc; i++) { if(strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { bitwidth_set = true; i++; @@ -37,18 +41,18 @@ int handle_flags(int argc, char** argv, string* path, string* outpath) { if(strcmp(argv[i], "-i") == 0) { path_set = true; i++; - (*path) = argv[i]; + inputfile = argv[i]; continue; } if(strcmp(argv[i], "-o") == 0) { outpath_set = true; i++; - (*outpath) = argv[i]; + outputfile = argv[i]; continue; } - if(path != NULL) { + if(!inputfile.empty()) { path_set = true; - (*path) = argv[i]; + inputfile = argv[i]; } } if(!bitwidth_set) { diff --git a/src/construct_flags.h b/src/construct_flags.h index 9777d64..b4db374 100644 --- a/src/construct_flags.h +++ b/src/construct_flags.h @@ -1,7 +1,16 @@ -#include -#include -#include -#include "reconstruct.h" +#ifndef CON_FLAGS_H +#define CON_FLAGS_H + +#include +#include +#include +#include "construct_types.h" + +extern CON_BITWIDTH bitwidth; +extern std::string inputfile; +extern std::string outputfile; int set_bitwidth(char* argv); -int handle_flags(int argc, char** argv, std::string* path, std::string* outpath); +int handle_flags(int argc, char** argv); + +#endif diff --git a/src/construct_types.h b/src/construct_types.h index 489d9da..807c853 100644 --- a/src/construct_types.h +++ b/src/construct_types.h @@ -1,8 +1,8 @@ #ifndef CON_TYPES_H #define CON_TYPES_H -#include -#include +#include +#include enum CON_BITWIDTH { BIT8, @@ -17,7 +17,8 @@ enum CON_COMPARISON { L, G, LE, - GE + GE, + COMPARISON_ERROR }; enum CON_TOKENTYPE { @@ -28,7 +29,7 @@ enum CON_TOKENTYPE { FUNCTION, CMD, MACRO, - FUNCALL + FUNCALL, }; @@ -86,7 +87,14 @@ struct con_cmd { struct con_funcall { std::string funcname; - std:: vector arguments; + std::vector arguments; +}; + +struct linedata { + std::string* line; + std::vector* line_split; + std::string* filename; + size_t line_num; }; #endif diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 87143c6..b00182e 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -1,11 +1,21 @@ +#include "construct_error.h" +#include "construct_flags.h" #include "construct_types.h" #include "deconstruct.h" -#include +void string_split(std::string& input, std::vector& output, std::string delimiter, bool token_compress_on) { + std::string token; + std::istringstream tokenStream(input); + while (std::getline(tokenStream, token, delimiter[0])) { + if (!token.empty() || !token_compress_on) { + output.push_back(token); + } + } +} using namespace std; int get_line_indentation(string line) { int indentation = 0; - for(int i = 0; i < line.size(); i++) { + for(size_t i = 0; i < line.size(); i++) { if(line[i] == '\t') { indentation++; continue; @@ -16,20 +26,20 @@ int get_line_indentation(string line) { } // Expects formatted line -CON_TOKENTYPE get_token_type(string line) { - if(line[0] == '!') +CON_TOKENTYPE get_token_type(linedata* linedata) { + if((*linedata->line)[0] == '!') return MACRO; - if(line.substr(0, 3) == "if ") + if(linedata->line->substr(0, 3) == "if ") return IF; - if(line.substr(0, 6) == "while ") + if(linedata->line->substr(0, 6) == "while ") return WHILE; - if(line.substr(0, 9) == "function ") + if(linedata->line->substr(0, 9) == "function ") return FUNCTION; - if(line.substr(0, 8) == "section ") + if(linedata->line->substr(0, 8) == "section ") return SECTION; - if(line.substr(0, 5) == "call " && line.find('(') != string::npos && line.find(')') != string::npos) + if(linedata->line->substr(0, 5) == "call " && linedata->line->find('(') != string::npos && linedata->line->find(')') != string::npos) return FUNCALL; - if(line.find(' ') == string::npos && line[line.size()-1] == ':') + if(linedata->line->find(' ') == string::npos && (*linedata->line)[linedata->line->size()-1] == ':') return TAG; return CMD; } @@ -47,7 +57,7 @@ CON_COMPARISON str_to_comparison(string comp) { return LE; if(comp == "ge") return GE; - //ERROR + return COMPARISON_ERROR; } @@ -72,7 +82,7 @@ vector delinearize_tokens(std::vector tokens) { // If token is while, if or function it is pushed to stack and becomes new parent. // if indentation goes up, new token is pushed to stack, when indentation goes down, // tops of stack are popped off by how much it decreased. - for(int i = 0; i < tokens.size(); i++) { + for(size_t i = 0; i < tokens.size(); i++) { if(parent_stack.top()->indentation - tokens[i]->indentation >= 0) { int indentation_diff = parent_stack.top()->indentation - tokens[i]->indentation+1; for(int j = 0; j < indentation_diff; j++) { @@ -95,48 +105,59 @@ vector delinearize_tokens(std::vector tokens) { return delinearized_tokens; } -con_macro* parse_macro(string line) { +con_macro* parse_macro(linedata* linedata) { + linedata->line_split = NULL; con_macro* tok_macro = new con_macro(); - int spacepos = line.find(' '); - tok_macro->macro = line.substr(1, spacepos-1); - tok_macro->value = line.substr(spacepos+1, line.size()-spacepos-1); + int spacepos = linedata->line->find(' '); + tok_macro->macro = linedata->line->substr(1, spacepos-1); + tok_macro->value = linedata->line->substr(spacepos+1, linedata->line->size()-spacepos-1); return tok_macro; } -con_if* parse_if(string line) { +con_if* parse_if(linedata* linedata) { con_if* tok_if = new con_if(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + string_split(*linedata->line, line_split, " ", false); + linedata->line_split = &line_split; tok_if->condition.arg1 = line_split[1]; tok_if->condition.op = str_to_comparison(line_split[2]); + if(tok_if->condition.op == COMPARISON_ERROR) { + throw_parse_error(linedata, IF_ERROR, 2); + } tok_if->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); return tok_if; } -con_while* parse_while(string line) { +con_while* parse_while(linedata* linedata) { con_while* tok_while = new con_while(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + string_split(*linedata->line, line_split, " ", false); + linedata->line_split = &line_split; tok_while->condition.arg1 = line_split[1]; tok_while->condition.op = str_to_comparison(line_split[2]); + if(tok_while->condition.op == COMPARISON_ERROR) { + throw_parse_error(linedata, WHILE_ERROR, 2); + } tok_while->condition.arg2 = line_split[3].substr(0, line_split[3].size()-1); // to remove : return tok_while; } -con_section* parse_section(string line) { +con_section* parse_section(linedata* linedata) { con_section* tok_section = new con_section(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ")); + string_split(*linedata->line, line_split, " ", false); + linedata->line_split = &line_split; tok_section->name = line_split[1]; return tok_section; } -con_tag* parse_tag(string line) { +con_tag* parse_tag(linedata* linedata) { con_tag* tok_tag = new con_tag(); - tok_tag->name = line.substr(0, line.size()-1); + tok_tag->name = linedata->line->substr(0, linedata->line->size()-1); return tok_tag; } -con_cmd* parse_cmd(string line) { +con_cmd* parse_cmd(linedata* linedata) { con_cmd* tok_cmd = new con_cmd(); vector line_split; - boost::split(line_split, line, boost::is_any_of(" ,")); + string_split(*linedata->line, line_split, " ,", false); + linedata->line_split = &line_split; tok_cmd->command = line_split[0]; if(line_split.size() > 1) tok_cmd->arg1 = line_split[1]; @@ -144,12 +165,13 @@ con_cmd* parse_cmd(string line) { tok_cmd->arg2 = line_split[3]; return tok_cmd; } -con_function* parse_function(string line) { +con_function* parse_function(linedata* linedata) { con_function* tok_function = new con_function(); vector line_split; - boost::split(line_split, line, boost::is_any_of("():,")); + string_split(*linedata->line, line_split, "():,", false); + linedata->line_split = &line_split; tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for(int i = 1; i < line_split.size()-2; i++) { + for(size_t i = 1; i < line_split.size()-2; i++) { if(line_split[i].empty()) { continue; } @@ -157,12 +179,13 @@ con_function* parse_function(string line) { } return tok_function; } -con_funcall* parse_funcall(string line) { +con_funcall* parse_funcall(linedata* linedata) { con_funcall* tok_funcall = new con_funcall(); vector line_split; - boost::split(line_split, line, boost::is_any_of("(),")); + string_split(*linedata->line, line_split, "(),", false); + linedata->line_split = &line_split; tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); - for(int i = 1; i < line_split.size()-1; i++) { + for(size_t i = 1; i < line_split.size()-1; i++) { if(line_split[i].empty()) { continue; } @@ -172,64 +195,74 @@ con_funcall* parse_funcall(string line) { } // Does not expect formatted line, only lowercase -con_token* parse_line(string line) { +con_token* parse_line(linedata* linedata) { con_token* token = new con_token; //remove multiple spaces from line string f_line = ""; bool caught_space = false; - for(int i = 0; i < line.size(); i++) { - if(line[i] == ' ') { + for(size_t i = 0; i < linedata->line->size(); i++) { + if((*linedata->line)[i] == ' ') { if(!caught_space) { - f_line += line[i]; + f_line += (*linedata->line)[i]; caught_space = true; } } else { - if(line[i] != '\t') { - f_line += line[i]; + if((*linedata->line)[i] != '\t') { + f_line += (*linedata->line)[i]; } caught_space = false; - } + } } - token->tok_type = get_token_type(f_line); + linedata->line = &f_line; + token->tok_type = get_token_type(linedata); switch(token->tok_type) { case MACRO: - token->tok_macro = parse_macro(f_line); + token->tok_macro = parse_macro(linedata); break; case IF: - token->tok_if = parse_if(f_line); + token->tok_if = parse_if(linedata); break; case WHILE: - token->tok_while = parse_while(f_line); + token->tok_while = parse_while(linedata); break; case FUNCTION: - token->tok_function = parse_function(f_line); + token->tok_function = parse_function(linedata); break; case FUNCALL: - token->tok_funcall = parse_funcall(f_line); + token->tok_funcall = parse_funcall(linedata); case SECTION: - token->tok_section = parse_section(f_line); + token->tok_section = parse_section(linedata); break; case TAG: - token->tok_tag = parse_tag(f_line); + token->tok_tag = parse_tag(linedata); break; case CMD: - token->tok_cmd = parse_cmd(f_line); + token->tok_cmd = parse_cmd(linedata); break; } return token; } vector parse_construct(string code) { vector code_split; - boost::split(code_split, code, boost::is_any_of("\n"), boost::token_compress_on); - boost::to_lower(code); + string_split(code, code_split, "\n", true); vector tokens; bool in_data = false; - for(int i = 0; i < code_split.size(); i++) { + linedata* currentlinedata = NULL; + for(size_t i = 0; i < code_split.size(); i++) { + if(currentlinedata != NULL) { + free(currentlinedata); + } // Check if it contains any alphabet chars if(code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { continue; } - con_token* new_token = parse_line(code_split[i]); + currentlinedata = new linedata; + currentlinedata->line = &code_split[i]; + currentlinedata->filename = &inputfile; + currentlinedata->line_num = i+1; + + + con_token* new_token = parse_line(currentlinedata); new_token->indentation = get_line_indentation(code_split[i]); tokens.push_back(new_token); if(new_token->tok_type == SECTION && (new_token->tok_section->name == ".data" || new_token->tok_section->name == ".bss")) { @@ -237,7 +270,6 @@ vector parse_construct(string code) { } else if(new_token->tok_type == SECTION && new_token->tok_section->name == ".text") { in_data = false; } else if(in_data) { - //TODO free original con_x con_cmd* data_cmd = new con_cmd; data_cmd->command = code_split[i]; new_token->tok_type = CMD; diff --git a/src/deconstruct.h b/src/deconstruct.h index be95614..d05cf7a 100644 --- a/src/deconstruct.h +++ b/src/deconstruct.h @@ -1,11 +1,12 @@ +#ifndef CON_DECONSTRUCT_H +#define CON_DECONSTRUCT_H + #include "construct_debug.h" -#include -#include -#include -#include -#include -#include -#include +#include "construct_error.h" +#include +#include +#include +#include int get_line_indentation(std::string line); CON_TOKENTYPE get_token_type(std::string line); @@ -13,13 +14,15 @@ CON_COMPARISON str_to_comparison(std::string comp); std::vector delinearize_tokens(std::vector tokens); -con_macro* parse_macro(std::string line); -con_if* parse_if(std::string line); -con_while* parse_while(std::string line); -con_section* parse_section(std::string line); -con_tag* parse_tag(std::string line); -con_cmd* parse_cmd(std::string line); -con_function* parse_function(std::string line); -con_funcall* parse_funcall(std::string line); -con_token* parse_line(std::string line); +con_macro* parse_macro(linedata* linedata); +con_if* parse_if(linedata* linedata); +con_while* parse_while(linedata* linedata); +con_section* parse_section(linedata* linedata); +con_tag* parse_tag(linedata* linedata); +con_cmd* parse_cmd(linedata* linedata); +con_function* parse_function(linedata* linedata); +con_funcall* parse_funcall(linedata* linedata); +con_token* parse_line(linedata* linedata); std::vector parse_construct(std::string code); + +#endif diff --git a/src/reconstruct.cpp b/src/reconstruct.cpp index d8bfaa5..942f70d 100644 --- a/src/reconstruct.cpp +++ b/src/reconstruct.cpp @@ -4,7 +4,6 @@ using namespace std; int if_amnt = 0; int while_amnt = 0; -CON_BITWIDTH bitwidth = BIT64; string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth) { switch(bitwidth) { @@ -139,7 +138,7 @@ void apply_macro_to_token(con_token& token, vector macros) { return; } // Unoptimal, but more clear imo - for(int i = 0; i < macros.size(); i++) { + for(size_t i = 0; i < macros.size(); i++) { con_macro* crntmacro = ¯os[i]; size_t pos; switch(token.tok_type) { @@ -195,7 +194,7 @@ void apply_macro_to_token(con_token& token, vector macros) { } } void apply_funcalls(std::vector& tokens) { - for(int i = 0; i < tokens.size(); i++) { + for(size_t i = 0; i < tokens.size(); i++) { apply_funcalls(tokens[i]->tokens); if(tokens[i]->tok_type != FUNCALL) { continue; @@ -226,7 +225,7 @@ void apply_funcalls(std::vector& tokens) { void apply_functions(std::vector& tokens) { vector* subtokens = &tokens; - for(int i = 0; i < subtokens->size(); i++) { + for(size_t i = 0; i < subtokens->size(); i++) { if((*subtokens)[i]->tok_type != FUNCTION) { continue; } @@ -260,7 +259,7 @@ void apply_functions(std::vector& tokens) { } } void apply_macros(vector& tokens, vector knownmacros) { - for(int i = 0; i < tokens.size(); i++) { + for(size_t i = 0; i < tokens.size(); i++) { if(tokens[i]->tok_type == MACRO) { // Filter spaces from macro and value pair con_macro* f_macro = new con_macro(); @@ -286,7 +285,7 @@ void apply_macros(vector& tokens, vector knownmacros) { } } void apply_whiles(vector& tokens) { - for(int i = 0; i< tokens.size(); i++) { + for(size_t i = 0; i< tokens.size(); i++) { apply_whiles(tokens[i]->tokens); if(tokens[i]->tok_type != WHILE) { continue; @@ -338,7 +337,7 @@ void apply_whiles(vector& tokens) { } } void apply_ifs(vector& tokens) { - for(int i = 0; i< tokens.size(); i++) { + for(size_t i = 0; i< tokens.size(); i++) { apply_ifs(tokens[i]->tokens); if(tokens[i]->tok_type != IF) { continue; @@ -373,7 +372,7 @@ void apply_ifs(vector& tokens) { } } void linearize_tokens(vector& tokens) { - for(int i = 0; i < tokens.size(); i++) { + for(size_t i = 0; i < tokens.size(); i++) { if(tokens[i]->tok_type != IF && tokens[i]->tok_type != WHILE && tokens[i]->tok_type != FUNCTION) { continue; } @@ -386,7 +385,7 @@ void linearize_tokens(vector& tokens) { std::string tokens_to_nasm(std::vector& tokens) { string output = ""; - for(int i = 0; i < tokens.size(); i++) { + for(size_t i = 0; i < tokens.size(); i++) { if(tokens[i]->tok_type == IF || tokens[i]->tok_type == WHILE || tokens[i]->tok_type == FUNCTION || tokens[i]->tok_type == MACRO || tokens[i]->tok_type == FUNCALL) { continue; } diff --git a/src/reconstruct.h b/src/reconstruct.h index 3147859..9f6986c 100644 --- a/src/reconstruct.h +++ b/src/reconstruct.h @@ -1,12 +1,15 @@ -#include -#include -#include +#ifndef CON_RECONSTRUCT_H +#define CON_RECONSTRUCT_H + +#include +#include +#include #include "construct_types.h" +#include "construct_flags.h" // Used for naming tags extern int if_amnt; extern int while_amnt; -extern CON_BITWIDTH bitwidth; std::string reg_to_str(uint8_t call_num, CON_BITWIDTH bitwidth); std::string comparison_to_string(CON_COMPARISON condition); @@ -27,3 +30,5 @@ void apply_ifs(std::vector& tokens); void apply_macros(std::vector& tokens, std::vector macros); std::string tokens_to_nasm(std::vector& tokens); + +#endif From 43ecc2c3bdb402041411648805f87e7990c5940b Mon Sep 17 00:00:00 2001 From: Thomas-de-Bock Date: Mon, 19 Feb 2024 00:30:57 +0100 Subject: [PATCH 3/7] Basic error system, along with minor things: removed boost dependance, size_t instead of int for loops etc. --- src/construct_error.cpp | 20 ++++++++++++++++++++ src/construct_error.h | 16 ++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 src/construct_error.cpp create mode 100644 src/construct_error.h diff --git a/src/construct_error.cpp b/src/construct_error.cpp new file mode 100644 index 0000000..06aacef --- /dev/null +++ b/src/construct_error.cpp @@ -0,0 +1,20 @@ +#include "construct_error.h" +#include "construct_flags.h" +#include + +const char* parse_error_strings[2] = {"Failed to parse if-statement", "Failed to parse while-loop"}; + +void throw_parse_error(linedata* linedata, PARSE_ERROR error_type, size_t tok_index) { + std::cerr << inputfile << ": " << parse_error_strings[error_type] << "\n" + << linedata->line_num << " | "; + size_t space_count = 3 + std::to_string(linedata->line_num).size(); + for(size_t i = 0; i < linedata->line_split->size(); i++) { + std::cerr << (*linedata->line_split)[i] << " "; + if(i >= tok_index) { + continue; + } + space_count += (*linedata->line_split)[i].size() + 1; + } + std::cerr << "\n" << std::string(space_count, ' ') << "^\n"; + exit(EXIT_FAILURE); +} diff --git a/src/construct_error.h b/src/construct_error.h new file mode 100644 index 0000000..7f6552b --- /dev/null +++ b/src/construct_error.h @@ -0,0 +1,16 @@ +#ifndef CON_ERROR_H +#define CON_ERROR_H + +#include "construct_types.h" +#include "construct_flags.h" +#include + +enum PARSE_ERROR { + IF_ERROR, + WHILE_ERROR +}; + +void throw_parse_error(linedata* linedata, PARSE_ERROR error_type, size_t tok_index); + + +#endif From 747695b5232482ca83a362e42631bb889bcca7f6 Mon Sep 17 00:00:00 2001 From: Thomas-de-Bock Date: Sun, 3 Mar 2024 13:12:49 +0100 Subject: [PATCH 4/7] Fixed double free and added help message, found by: BlueFalconHD --- src/construct.cpp | 1 - src/construct_flags.cpp | 28 +++++++++++++++++++++++++--- src/deconstruct.cpp | 1 + 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/construct.cpp b/src/construct.cpp index 9d087e0..d1880ad 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -18,7 +18,6 @@ int main(int argc, char** argv) { std::stringstream buffer; buffer << inpfile.rdbuf(); std::vector tokens = parse_construct(buffer.str()); - // Make _start global con_token* glob_tok = new con_token(); glob_tok->tok_type = CMD; diff --git a/src/construct_flags.cpp b/src/construct_flags.cpp index f33f117..94492da 100644 --- a/src/construct_flags.cpp +++ b/src/construct_flags.cpp @@ -32,6 +32,7 @@ int handle_flags(int argc, char** argv) { bool bitwidth_set = false; bool path_set = false; bool outpath_set = false; + bool help_set = false; for(size_t i = 1; i < argc; i++) { if(strcmp(argv[i], "-f") == 0 && set_bitwidth(argv[i+1]) == 0) { bitwidth_set = true; @@ -50,10 +51,31 @@ int handle_flags(int argc, char** argv) { outputfile = argv[i]; continue; } - if(!inputfile.empty()) { - path_set = true; - inputfile = argv[i]; + if(strcmp(argv[i], "-h") == 0) { + help_set = true; + continue; } + + path_set = true; + inputfile = argv[i]; + } + if(!bitwidth_set && !path_set && !outpath_set) { + help_set = true; + } + if(help_set) { + cout << + "Construct (version 1.1.0)\n" + "An abstraction over x86 assembly providing useful shortcuts and syntax.\n\n" + + "Usage:\n" + "-i Input file. Should have valid construct syntax.\n" + "-f Output format. Can be either elf64, elf32, elf16, or elf8.\n" + "-o Output file. Specifies where to put the resulting NASM assembly file.\n\n" + + "2024 Thomas de Bock\n" + "MIT LICENSE\n" + "https://github.com/Thomas-de-Bock/construct/\n"; + return -1; } if(!bitwidth_set) { cout << "flag -f (format) not set" << endl; diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index b00182e..8d1d95e 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -251,6 +251,7 @@ vector parse_construct(string code) { for(size_t i = 0; i < code_split.size(); i++) { if(currentlinedata != NULL) { free(currentlinedata); + currentlinedata = NULL; } // Check if it contains any alphabet chars if(code_split[i].find_first_of("abcdefghijklmnopqrstuvwxyz!") == std::string::npos) { From c0ca5c367fa53090be6747d07b60622b9d29e5b9 Mon Sep 17 00:00:00 2001 From: Thomas-de-Bock Date: Sun, 3 Mar 2024 13:13:24 +0100 Subject: [PATCH 5/7] Fixed double free and added help message, found by: BlueFalconHD --- src/construct.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/construct.cpp b/src/construct.cpp index d1880ad..9d087e0 100644 --- a/src/construct.cpp +++ b/src/construct.cpp @@ -18,6 +18,7 @@ int main(int argc, char** argv) { std::stringstream buffer; buffer << inpfile.rdbuf(); std::vector tokens = parse_construct(buffer.str()); + // Make _start global con_token* glob_tok = new con_token(); glob_tok->tok_type = CMD; From 3d908997a75aeb58c66e7920e2049da60e1940b1 Mon Sep 17 00:00:00 2001 From: Thomas <78592830+Thomas-de-Bock@users.noreply.github.com> Date: Sun, 12 May 2024 11:32:42 +0200 Subject: [PATCH 6/7] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a3d1036..03d5391 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ Using the naming of the conditional jump instructions, construct supports the fo - `g: greater` - `le: less or equal` - `ge: greater or equal` + Neither side of the comparison can contains whitespaces. # Use From 521fd7127924acbdeaf379fa9d57a66be3f95c6b Mon Sep 17 00:00:00 2001 From: Thomas-de-Bock Date: Mon, 24 Jun 2024 17:30:06 +0200 Subject: [PATCH 7/7] Fixed string split function and adjusted parsing functions accordingly --- src/deconstruct.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/deconstruct.cpp b/src/deconstruct.cpp index 8d1d95e..fa52ed6 100644 --- a/src/deconstruct.cpp +++ b/src/deconstruct.cpp @@ -5,10 +5,21 @@ void string_split(std::string& input, std::vector& output, std::string delimiter, bool token_compress_on) { std::string token; - std::istringstream tokenStream(input); - while (std::getline(tokenStream, token, delimiter[0])) { - if (!token.empty() || !token_compress_on) { - output.push_back(token); + size_t start = 0, end = 0; + while (start < input.length()) { + end = input.find_first_of(delimiter, start); + if (end == std::string::npos) { + token = input.substr(start); + if (!token.empty() || !token_compress_on) { + output.push_back(token); + } + break; + } else { + token = input.substr(start, end - start); + if (!token.empty() || !token_compress_on) { + output.push_back(token); + } + start = end + 1; } } } @@ -171,7 +182,7 @@ con_function* parse_function(linedata* linedata) { string_split(*linedata->line, line_split, "():,", false); linedata->line_split = &line_split; tok_function->name = line_split[0].substr(9, line_split[0].size()-9); - for(size_t i = 1; i < line_split.size()-2; i++) { + for(size_t i = 1; i < line_split.size()-1; i++) { if(line_split[i].empty()) { continue; } @@ -185,7 +196,7 @@ con_funcall* parse_funcall(linedata* linedata) { string_split(*linedata->line, line_split, "(),", false); linedata->line_split = &line_split; tok_funcall->funcname = line_split[0].substr(5, line_split[0].size()-5); - for(size_t i = 1; i < line_split.size()-1; i++) { + for(size_t i = 1; i < line_split.size(); i++) { if(line_split[i].empty()) { continue; }