diff options
-rw-r--r-- | bench/bench_window.cpp | 2 | ||||
-rw-r--r-- | include/lib94/lib94.hpp | 4 | ||||
-rw-r--r-- | lib94/warrior.cpp | 1216 | ||||
-rw-r--r-- | readme.txt | 38 | ||||
-rw-r--r-- | tabulator-mpi/main.cpp | 2 | ||||
-rw-r--r-- | warriors/epson.red | 6 |
6 files changed, 573 insertions, 695 deletions
diff --git a/bench/bench_window.cpp b/bench/bench_window.cpp index cb9f252..70cbbbf 100644 --- a/bench/bench_window.cpp +++ b/bench/bench_window.cpp @@ -254,7 +254,7 @@ void bench_window::on_add_warrior_dialog_response(int response_id, Gtk::FileChoo on_click_new_round(); } catch (const lib94::compiler_exception &ex) { - Gtk::MessageDialog *md = new Gtk::MessageDialog(std::string("failed to compile: ") + ex.message + " on line " + std::to_string(ex.line_number)); + Gtk::MessageDialog *md = new Gtk::MessageDialog(std::string("failed to compile: ") + ex.message + " on line " + std::to_string(ex.source_line_number)); md->set_transient_for(*this); md->set_modal(); md->signal_response().connect([md](int) {delete md;}); diff --git a/include/lib94/lib94.hpp b/include/lib94/lib94.hpp index 18f90b4..b3898d2 100644 --- a/include/lib94/lib94.hpp +++ b/include/lib94/lib94.hpp @@ -60,13 +60,11 @@ namespace lib94 { std::string instruction_to_string(const instruction &instr); struct compiler_exception : public std::exception { - unsigned line_number; + unsigned source_line_number; std::string message; }; warrior *compile_warrior(std::string source); - bool save_warrior(const warrior &w, const std::filesystem::path &to); - std::optional<warrior *> load_warrior(const std::filesystem::path &from); void clear_core(const instruction &background); void clear_core_random(); diff --git a/lib94/warrior.cpp b/lib94/warrior.cpp index 48a5b57..7c4d67e 100644 --- a/lib94/warrior.cpp +++ b/lib94/warrior.cpp @@ -7,6 +7,26 @@ #include <memory> #include <map> +//warrior compilation takes place in three stages: +// stage 1: preprocessing +// in this stage, comments and blank lines are extracted, +// and inline macros (equ's) are found and are processed. +// special comments are also found and processed (although +// assertion comments aren't actually checked until stage 3). +// stage 2: parsing +// in this step, the cleaned lines from stage 1 are parsed +// into opcodes, modifiers, addressing modes, and field expressions. +// the field expressions are a tree that can have labels and such +// in them. the expressions aren't evaluated until stage 4. +// labels are also found and stored at this stage. +// stage 3: assertion checking +// now that we have the values of the labels, we can check all +// of the assertions that were found in stage 1. note that all +// assertions run as though they were on the first line for label +// purposes, although this does not effect differences of labels +// stage 4: field expression evaluation +// now the field expressions are evaluted, using the label information. + namespace lib94 { static const std::string opcode_strings[] = { @@ -27,893 +47,715 @@ namespace lib94 { mode_chars[instr.bmode] + std::to_string(instr.bnumber); } - static number_t real_mod(number_t input) { - return (input % LIB94_CORE_SIZE + LIB94_CORE_SIZE) % LIB94_CORE_SIZE; + [[noreturn]] static void throw_compiler_exception(unsigned source_line_number, std::string message) { + compiler_exception ex; + ex.source_line_number = source_line_number; + ex.message = message; + throw ex; } - typedef std::map<std::string, number_t> label_set; - typedef std::map<std::string, number_t> inline_macro_set; + typedef long intermediate_t; + + typedef std::map<std::string, number_t> label_offset_set; + + //this abstract class represents expression fields extracted in stage 2 and evaluted in stage 4. + class expr { + public: + unsigned source_line_number; + number_t offset; + virtual intermediate_t evaluate(const label_offset_set &label_offsets) const = 0; + }; + + //this abstract class represents assertions fields extracted in stage 1 and evaluated in stage 3 + class assertion { + public: + unsigned source_line_number; + virtual bool check(const label_offset_set &label_offsets) const = 0; + }; - struct number_expr_exception : public std::exception { - std::string message; + struct string_with_line_number { + std::string string; + unsigned source_line_number; }; - struct number_expr { - virtual number_t to_number(number_t our_offset, const inline_macro_set &inline_macros, const label_set &labels) = 0; + //internal stage used by stage 1. also records the name and author outputs. + struct preprocessor_state { + std::optional<std::string> name; + std::optional<std::string> author; + std::map<std::string, std::string> macros; + std::vector<std::unique_ptr<assertion>> assertions; + unsigned current_source_line_number = 1; }; - struct identifier_number_expr : public number_expr { - std::string identifier; + static std::string remove_spaces(std::string from) { + size_t first_nonspace_pos = from.find_first_not_of(' '); + if (first_nonspace_pos == std::string::npos) + return ""; + size_t last_nonspace_pos = from.find_last_not_of(' '); + return from.substr(first_nonspace_pos, last_nonspace_pos + 1 - first_nonspace_pos); + } - virtual number_t to_number(number_t our_offset, const inline_macro_set &inline_macros, const label_set &labels) { - auto result = inline_macros.find(identifier); - if (result != inline_macros.end()) - return result->second; + static std::string to_lower_case(std::string from) { + std::string new_string(from); + for (char &ch : new_string) + ch = tolower(ch); + return new_string; + } - result = labels.find(identifier); - if (result != labels.end()) - return real_mod(result->second - our_offset); + //must be non-empty, first character must be letter or underscore, + //every other character must be letter, number, or underscore + static bool is_valid_identifier(std::string candidate) { + if (candidate.size() == 0 || isdigit(candidate[0])) + return false; + for (const char &ch : candidate) + if (!isalnum(ch) && ch != '_') + return false; + return true; + } - number_expr_exception ex; - ex.message = "unknown label or inline macro"; - throw ex; + class binop_expr : public expr { + public: + std::unique_ptr<expr> left_expression; + std::unique_ptr<expr> right_expression; + std::function<intermediate_t (intermediate_t, intermediate_t)> operation; + bool division_or_mod; + + intermediate_t evaluate(const label_offset_set &label_offsets) const override { + intermediate_t left = left_expression->evaluate(label_offsets); + intermediate_t right = right_expression->evaluate(label_offsets); + if (division_or_mod && right == 0) + throw_compiler_exception(source_line_number, "division or modulo by zero"); + return operation(left, right); } }; - struct number_number_expr : public number_expr { - number_t value; + class unop_expr : public expr { + public: + std::unique_ptr<expr> child_expression; + std::function<intermediate_t (intermediate_t)> operation; - virtual number_t to_number(number_t, const inline_macro_set &, const label_set &) { - return value; + intermediate_t evaluate(const label_offset_set &label_offsets) const override { + intermediate_t child = child_expression->evaluate(label_offsets); + return operation(child); } }; - struct op_number_expr : public number_expr { - std::unique_ptr<number_expr> left; - std::unique_ptr<number_expr> right; - std::function<number_t (number_t, number_t)> op; + class label_expr : public expr { + public: + std::string the_label; - virtual number_t to_number(number_t our_offset, const inline_macro_set &inline_macros, const label_set &labels) { - number_t left_result = left->to_number(our_offset, inline_macros, labels); - number_t right_result = right->to_number(our_offset, inline_macros, labels); - return op(left_result, right_result); + intermediate_t evaluate(const label_offset_set &label_offsets) const override { + auto result = label_offsets.find(the_label); + if (result == label_offsets.end()) + throw_compiler_exception(source_line_number, "unknown label"); + return (intermediate_t)result->second - (intermediate_t)offset; } }; - struct negative_number_expr : public number_expr { - std::unique_ptr<number_expr> inner; + class literal_expr : public expr { + public: + intermediate_t value; - virtual number_t to_number(number_t our_offset, const inline_macro_set &inline_macros, const label_set &labels) { - return LIB94_CORE_SIZE - inner->to_number(our_offset, inline_macros, labels); + intermediate_t evaluate(const label_offset_set &) const override { + return value; } }; - static bool valid_identifier(std::string candidate) { - if (candidate == "") - return false; + static std::unique_ptr<expr> parse_expression(number_t offset, std::string from, unsigned source_line_number); - if (!isalpha(candidate[0]) && candidate[0] != '_') - return false; + static const std::string plus_minus_scan_left_special = "+-*/%("; - for (char ch : candidate) - if (!isalnum(ch) && ch != '_') - return false; + static const std::map<char, std::function<intermediate_t (intermediate_t, intermediate_t)>> binary_operator_conversion = { + {'+', [](intermediate_t a, intermediate_t b) {return a + b;}}, + {'-', [](intermediate_t a, intermediate_t b) {return a - b;}}, + {'*', [](intermediate_t a, intermediate_t b) {return a * b;}}, + {'/', [](intermediate_t a, intermediate_t b) {return a / b;}}, + {'%', [](intermediate_t a, intermediate_t b) {return a % b;}} + }; - return true; - } + //searched right to left outside parentheses for any character in connectives. + //on the first one found, returns a new expression split there. + //if none is found, returns an empty unique_ptr. + //there is some special handling on + and - to make sure they aren't unary operators. + static std::unique_ptr<expr> maybe_parse_binop_expression(number_t offset, std::string from, const char *connectives, unsigned source_line_number) { - static size_t find_respecting_parentheses(std::string part, std::string candidates) { - size_t layers = 0; + unsigned parenthesis_layers = 0; - for (int i = part.size() - 1; i >= 0; --i) - if (layers == 0 && candidates.find(part[i]) != std::string::npos) { + for (int i = from.size() - 1; i >= 0; --i) { - if (part[i] == '-' || part[i] == '+') { - for (int j = i - 1; j >= 0; --j) { - if (isalnum(part[j]) || part[j] == '_' || part[j] == ')') - return i; - if (part[j] != ' ') - break; - } - continue; - } + if (from[i] == ')') + ++parenthesis_layers; - return i; - } + else if (from[i] == '(') + --parenthesis_layers; - else if (part[i] == ')') - ++layers; - else if (part[i] == '(') - --layers; + else if (parenthesis_layers == 0) + for (const char *ch = connectives; *ch; ++ch) + if (from[i] == *ch) { - return std::string::npos; - } + if (*ch == '+' || *ch == '-') { + bool okay = true; + + //scan left - if we hit a binary connective, an open parenthesis, or the start of the string, + //then this is probably supposed to be a unary operator, not a binary one. + //if we hit something else, then this is probably indeed binary. + for (int j = i - 1; ; --j) + if (j < 0 || plus_minus_scan_left_special.find(from[j]) != std::string::npos) { + okay = false; + break; + } + else if (from[j] != ' ') + break; + + if (!okay) + continue; + } + + //this is our connective! + + auto expression = std::make_unique<binop_expr>(); + expression->left_expression = parse_expression(offset, from.substr(0, i), source_line_number); + expression->right_expression = parse_expression(offset, from.substr(i + 1), source_line_number); + expression->operation = binary_operator_conversion.find(*ch)->second; + expression->division_or_mod = *ch == '/' || *ch == '%'; + expression->source_line_number = source_line_number; + expression->offset = offset; + return expression; - static std::unique_ptr<number_expr> to_number_expr(std::string part); - - static std::unique_ptr<number_expr> make_op_expr(std::string part, size_t split) { - std::string left = part.substr(0, split); - std::string right = part.substr(split + 1); - - auto left_expr = to_number_expr(left); - auto right_expr = to_number_expr(right); - - auto expr = std::make_unique<op_number_expr>(); - expr->left = std::move(left_expr); - expr->right = std::move(right_expr); - - switch (part[split]) { - - case '+': - expr->op = [](number_t a, number_t b) { - return real_mod(a + b); - }; - break; - - case '-': - expr->op = [](number_t a, number_t b) { - return real_mod(a - b); - }; - break; - - case '*': - expr->op = [](number_t a, number_t b) { - return real_mod(a * b); - }; - break; - - case '/': - expr->op = [](number_t a, number_t b) { - if (b == 0) { - number_expr_exception ex; - ex.message = "division by zero"; - throw ex; - } - return a / b; - }; - break; - - case '%': - expr->op = [](number_t a, number_t b) { - if (b == 0) { - number_expr_exception ex; - ex.message = "modulo by zero"; - throw ex; } - return a % b; - }; - break; } - return expr; + return {}; } - static std::string trim_spaces(std::string str) { - size_t start = str.find_first_not_of(' '); - size_t end = str.find_last_not_of(' ') + 1; - if (start == std::string::npos) - return ""; - return str.substr(start, end - start); - } + //parses an expression in stage 1 or 2 to be evaluated in stage 3 or 4. + static std::unique_ptr<expr> parse_expression(number_t offset, std::string from, unsigned source_line_number) { - static std::unique_ptr<number_expr> to_number_expr(std::string part) { - part = trim_spaces(part); - if (part == "") { - number_expr_exception ex; - ex.message = "empty expression"; - throw ex; - } + auto binop_expression = maybe_parse_binop_expression(offset, from, "+-", source_line_number); + if (binop_expression) + return binop_expression; - size_t split = find_respecting_parentheses(part, "+-"); - if (split == std::string::npos) - split = find_respecting_parentheses(part, "*/%"); - if (split != std::string::npos) - return make_op_expr(part, split); + binop_expression = maybe_parse_binop_expression(offset, from, "*/%", source_line_number); + if (binop_expression) + return binop_expression; - if (part[0] == '(' && part[part.size() - 1] == ')') - return to_number_expr(part.substr(1, part.size() - 2)); + from = remove_spaces(from); - if (part[0] == '+') - return to_number_expr(part.substr(1)); + if (from.starts_with('(') && from.ends_with(')')) + return parse_expression(offset, from.substr(1, from.size() - 2), source_line_number); + + if (from.starts_with('+')) + return parse_expression(offset, from.substr(1), source_line_number); + + if (from.starts_with('-')) { + auto unop_expression = std::make_unique<unop_expr>(); + unop_expression->child_expression = parse_expression(offset, from.substr(1), source_line_number); + unop_expression->operation = [](intermediate_t x) {return -x;}; + unop_expression->source_line_number = source_line_number; + unop_expression->offset = offset; + return unop_expression; + } - if (part[0] == '-') { - std::unique_ptr<number_expr> inner = to_number_expr(part.substr(1)); - std::unique_ptr<negative_number_expr> expr = std::make_unique<negative_number_expr>(); - expr->inner = std::move(inner); - return expr; + if (is_valid_identifier(from)) { + auto label_expression = std::make_unique<label_expr>(); + label_expression->the_label = from; + label_expression->source_line_number = source_line_number; + label_expression->offset = offset; + return label_expression; } - size_t count; - number_t number = 0; + size_t value_length = 0; + unsigned value = 0; try { - number = real_mod(std::stoul(part, &count)); + value = std::stoul(from, &value_length); } - catch (const std::exception &e) { - count = 0; + catch (std::invalid_argument &ex) {} + catch (std::out_of_range &ex) {} + + if (value_length == from.size() && value_length) { + auto literal_expression = std::make_unique<literal_expr>(); + literal_expression->value = value; + literal_expression->source_line_number = source_line_number; + literal_expression->offset = offset; + return literal_expression; } - if (count == part.size()) { - std::unique_ptr<number_number_expr> expr = std::make_unique<number_number_expr>(); - expr->value = number; - return expr; - } - - if (valid_identifier(part)) { - std::unique_ptr<identifier_number_expr> expr = std::make_unique<identifier_number_expr>(); - expr->identifier = part; - return expr; - } + throw_compiler_exception(source_line_number, "unknown expression form"); - number_expr_exception ex; - ex.message = "unknown expression form"; - throw ex; } - //using unqiue_ptr to refer to this in case i add more types in the future - struct assert_expr { - std::unique_ptr<number_expr> left; - std::unique_ptr<number_expr> right; - std::function<bool (number_t, number_t)> comparison; + class comparison_assertion : public assertion { + public: + std::unique_ptr<expr> left_expression; + std::unique_ptr<expr> right_expression; + std::function<bool (intermediate_t, intermediate_t)> f; - bool is_true(number_t our_offset, const inline_macro_set &inline_macros, const label_set &labels) { - number_t left_result = left->to_number(our_offset, inline_macros, labels); - number_t right_result = right->to_number(our_offset, inline_macros, labels); - return comparison(left_result, right_result); + bool check(const label_offset_set &label_offsets) const override { + intermediate_t left = left_expression->evaluate(label_offsets); + intermediate_t right = right_expression->evaluate(label_offsets); + return f(left, right); } }; - struct assert_expr_exception : public std::exception { - std::string message; + static const std::map<std::string, std::function<bool (intermediate_t, intermediate_t)>> comparison_conversion = { + {"==", [](intermediate_t l, intermediate_t r) {return l == r;}}, + {">=", [](intermediate_t l, intermediate_t r) {return l >= r;}}, + {"<=", [](intermediate_t l, intermediate_t r) {return l <= r;}}, + {"!=", [](intermediate_t l, intermediate_t r) {return l != r;}}, + {">" , [](intermediate_t l, intermediate_t r) {return l > r;}}, + {"<", [](intermediate_t l, intermediate_t r) {return l < r;}} }; - static std::optional<std::unique_ptr<assert_expr>> try_make_assert_expr(std::string part, std::string sep, std::function<bool (number_t, number_t)> comparison) { - size_t pos = part.find(sep); - if (pos == std::string::npos) - return {}; + //parses an assertion in stage 1 to be evaluated in stage 3. + static std::unique_ptr<assertion> parse_assertion(std::string from, unsigned source_line_number) { + + for (const auto &pair : comparison_conversion) { + size_t pos = from.find(pair.first); + if (pos != std::string::npos) { + + std::string left = from.substr(0, pos); + std::string right = from.substr(pos + pair.first.size()); - std::unique_ptr<number_expr> left, right; + auto a = std::make_unique<comparison_assertion>(); + a->left_expression = parse_expression(0, left, source_line_number); + a->right_expression = parse_expression(0, right, source_line_number); + a->f = pair.second; + a->source_line_number = source_line_number; + return a; - left = to_number_expr(part.substr(0, pos)); - right = to_number_expr(part.substr(pos + sep.size())); + } + } - auto expr = std::make_unique<assert_expr>(); - expr->left = std::move(left); - expr->right = std::move(right); - expr->comparison = comparison; + throw_compiler_exception(source_line_number, "unknown assertion expression form"); - return expr; } - static std::unique_ptr<assert_expr> to_assert_expr(std::string part) { - try { - auto result = try_make_assert_expr(part, "==", [](number_t a, number_t b) {return a == b;}); - if (result) - return std::move(result.value()); + //this is the driver for stage 1. if stop_at is empty, it processes all of the remaining lines. if stop_at contains a string, + //it processes lines until it hits a line that looks like that after processing, and returns with the state pointing to that line. + //this function also stores information found from special comments, and finds equs. the processing done by this function is roughly: + // 1. find and replace previous equs in this line. + // 2. remove (and process special) comments. + // 3. if this line is blank, go to the next one. + // 4. if this line is an equ, store it and then go to the next one. + // 5. if this line is a for, recurse starting at the next line with stop_at set to rof, + // then go to the line after the rof. + // 6. finally, if we reach this step, store the processed line in output. + static void preprocess(const std::vector<std::string> &source_lines, preprocessor_state &state, std::vector<string_with_line_number> &output, std::optional<std::string> stop_at = {}) { + --state.current_source_line_number; + while (true) { + ++state.current_source_line_number; + + if (state.current_source_line_number == source_lines.size() + 1) { + if (stop_at) + throw_compiler_exception(state.current_source_line_number, "end of source where " + *stop_at + " expected"); + return; + } - result = try_make_assert_expr(part, "<=", [](number_t a, number_t b) {return a <= b;}); - if (result) - return std::move(result.value()); + std::string line = source_lines[state.current_source_line_number - 1]; - result = try_make_assert_expr(part, ">=", [](number_t a, number_t b) {return a >= b;}); - if (result) - return std::move(result.value()); + //replace macros: - result = try_make_assert_expr(part, "!=", [](number_t a, number_t b) {return a != b;}); - if (result) - return std::move(result.value()); + for (const auto ¯o_def : state.macros) { + size_t from = 0; + while (true) { + size_t pos = line.find(macro_def.first, from); - result = try_make_assert_expr(part, "<", [](number_t a, number_t b) {return a < b;}); - if (result) - return std::move(result.value()); + if (pos == std::string::npos) + break; - result = try_make_assert_expr(part, ">", [](number_t a, number_t b) {return a > b;}); - if (result) - return std::move(result.value()); - } - catch (const number_expr_exception &iex) { - assert_expr_exception ex; - ex.message = iex.message; - throw ex; - } + line.replace(pos, macro_def.first.size(), macro_def.second); + from = pos + macro_def.second.size(); - assert_expr_exception ex; - ex.message = "unknown assert operation"; - throw ex; - } + } + } - struct future_instruction { - unsigned source_line; - opcode op; - modifier mod; - mode amode; - mode bmode; - std::unique_ptr<number_expr> anumber; - std::unique_ptr<number_expr> bnumber; - }; + //check for comment: - struct assertion { - size_t source_line; - std::unique_ptr<assert_expr> expr; - number_t offset; - }; + size_t semicolon_pos = line.find(';'); + if (semicolon_pos != std::string::npos) { - struct preprocessed_line { - unsigned source_line; - std::string the_line; - }; + std::string comment = remove_spaces(line.substr(semicolon_pos + 1)); + std::string lower_case_comment = to_lower_case(comment); + line = line.substr(0, semicolon_pos); - struct info_from_preprocessor { - std::vector<assertion> assertions; - std::optional<std::string> name; - std::optional<std::string> author; - }; + if (lower_case_comment.starts_with("name ")) { + if (state.name) + throw_compiler_exception(state.current_source_line_number, "duplicate name comment"); + state.name = remove_spaces(comment.substr(5)); + } - static void preprocess_until_end_block(info_from_preprocessor &info, std::vector<preprocessed_line> &into, unsigned &next_line_number, std::string &source, std::optional<std::string> block_ender) { - while (source != "") { + else if (lower_case_comment.starts_with("author ")) { + if (state.author) + throw_compiler_exception(state.current_source_line_number, "duplicate author comment"); + state.author = remove_spaces(comment.substr(7)); + } - size_t newline = source.find('\n'); - std::string line; + else if (lower_case_comment.starts_with("assert ")) + state.assertions.push_back(parse_assertion(comment.substr(7), state.current_source_line_number)); - if (newline == std::string::npos) { - line = source; - source = ""; - } - else { - line = source.substr(0, newline); - source = source.substr(newline + 1); } - unsigned line_number = next_line_number; - ++next_line_number; + //if it's blank, go to the next one: - size_t semicolon = line.find(';'); - if (semicolon != std::string::npos) { - std::string comment = trim_spaces(line.substr(semicolon + 1)); - line = line.substr(0, semicolon); + line = remove_spaces(line); + if (line == "") + continue; + std::string lower_case_line = to_lower_case(line); - if (comment.starts_with("assert ")) { - std::unique_ptr<assert_expr> expr; + //if we have a stop_at and this is that, then consume and stop: - try { - expr = std::move(to_assert_expr(comment.substr(7))); - } - catch (const assert_expr_exception &iex) { - compiler_exception ex; - ex.line_number = line_number; - ex.message = iex.message; - } + if (stop_at && line == *stop_at) + return; - info.assertions.push_back((assertion){ - .source_line = line_number, - .expr = std::move(expr), - .offset = (number_t)into.size() - }); - } + //check for equ: - else if (comment.starts_with("name ")) { + size_t equ_pos = lower_case_line.find(" equ "); + if (equ_pos != std::string::npos) { - if (info.name.has_value()) { - compiler_exception ex; - ex.line_number = line_number; - ex.message = "duplicate name comment"; - throw ex; - } + std::string macro_name = line.substr(0, equ_pos); + std::string macro_content = line.substr(equ_pos + 5); - info.name = trim_spaces(comment.substr(5)); - } + if (!is_valid_identifier(macro_name)) + throw_compiler_exception(state.current_source_line_number, "bad macro name"); - else if (comment.starts_with("author ")) { + if (!state.macros.insert({macro_name, macro_content}).second) + throw_compiler_exception(state.current_source_line_number, "duplicate macro"); - if (info.author.has_value()) { - compiler_exception ex; - ex.line_number = line_number; - ex.message = "duplicate author comment"; - throw ex; - } + continue; - info.author = trim_spaces(comment.substr(7)); - } } - line = trim_spaces(line); + //check for for: - if (block_ender.has_value() && line == block_ender.value()) - return; + if (lower_case_line.starts_with("for ")) { + + std::string for_arg = remove_spaces(line.substr(4)); - if (line.starts_with("for ")) { + size_t count_length = 0; + unsigned count = 0; - number_t repeats; - size_t count; try { - repeats = real_mod(std::stoul(line.substr(4), &count)); - } - catch (const std::exception &e) { - count = 0; + count = std::stoul(for_arg, &count_length); } + catch (std::invalid_argument &ex) {} + catch (std::out_of_range &ex) {} - if (!count || count != line.size() - 4) { - compiler_exception ex; - ex.line_number = line_number; - ex.message = "bad for argument"; - throw ex; - } + if (count_length != for_arg.size() || !count_length) + throw_compiler_exception(state.current_source_line_number, "bad for argument"); - std::vector<preprocessed_line> inside_for; - preprocess_until_end_block(info, inside_for, next_line_number, source, "rof"); + std::vector<string_with_line_number> for_contents; + ++state.current_source_line_number; + preprocess(source_lines, state, for_contents, "rof"); - for (number_t i = 0; i < repeats; ++i) - for (const preprocessed_line &l : inside_for) - into.push_back(l); + for (unsigned i = 0; i < count; ++i) + for (const auto &piece : for_contents) + output.push_back(piece); continue; + } - into.push_back((preprocessed_line){.source_line = line_number, .the_line = line}); - } + //just a normal line: + + output.push_back({.string = line, .source_line_number = state.current_source_line_number}); - if (block_ender.has_value()) { - compiler_exception ex; - ex.line_number = next_line_number; - ex.message = "end of file encountered where " + block_ender.value() + " expected"; - throw ex; } } - struct future_inline_macro { - std::string name; - unsigned source_line; - std::unique_ptr<number_expr> definition; - number_t offset; + struct parsed_line { + opcode op; + modifier mod; + mode amode; + mode bmode; + std::unique_ptr<expr> aexpr; + std::unique_ptr<expr> bexpr; }; - typedef std::vector<future_inline_macro> future_inline_macro_set; - - struct org_info { - std::unique_ptr<number_expr> expr; - number_t offset; - unsigned source_line; + static const std::map<std::string, opcode> opcode_conversion = { + {"dat", DAT}, {"mov", MOV}, {"add", ADD}, {"sub", SUB}, + {"mul", MUL}, {"div", DIV}, {"mod", MOD}, {"jmp", JMP}, + {"jmz", JMZ}, {"jmn", JMN}, {"djn", DJN}, {"seq", SEQ}, + {"sne", SNE}, {"slt", SLT}, {"spl", SPL}, {"nop", NOP}, + {"cmp", SEQ} }; - static const std::map<char, mode> mode_symbols = { - {'#', IMMEDIATE}, {'$', DIRECT}, - {'*', A_INDIRECT}, {'@', B_INDIRECT}, - {'{', A_DECREMENT}, {'<', B_DECREMENT}, - {'}', A_INCREMENT}, {'>', B_INCREMENT} + static const std::map<std::string, modifier> modifier_conversion = { + {"a", A}, {"b", B}, {"ab", AB}, {"ba", BA}, {"f", F}, {"x", X}, {"i", I} }; - typedef std::pair<mode, std::unique_ptr<number_expr>> field; - - static field make_empty_field() { - auto expr = std::make_unique<number_number_expr>(); - expr->value = 0; - return std::make_pair<>(DIRECT, std::move(expr)); - } - - static field to_field(std::string part) { - if (part == "") - return {}; + static const std::map<char, mode> mode_conversion = { + {'#', IMMEDIATE}, {'$', DIRECT}, {'*', A_INDIRECT}, {'@', B_INDIRECT}, + {'{', A_DECREMENT}, {'<', B_DECREMENT}, {'}', A_INCREMENT}, {'>', B_INCREMENT} + }; - mode m = DIRECT; + static void parse_field(number_t offset, std::string from, mode &mode, std::unique_ptr<expr> &expr, unsigned source_line_number) { - auto result = mode_symbols.find(part[0]); - if (result != mode_symbols.end()) { - m = result->second; - part = trim_spaces(part.substr(1)); + if (from == "") { + mode = DIRECT; + expr = parse_expression(offset, "0", source_line_number); + return; } - return std::make_pair<>(m, to_number_expr(part)); - } + auto mode_result = mode_conversion.find(from[0]); - static modifier get_default_modifier(opcode op, mode amode, mode bmode) { - switch (op) { - - case DAT: - return F; - - case MOV: - case SEQ: - case SNE: - if (amode == IMMEDIATE) - return AB; - if (bmode == IMMEDIATE) - return B; - return I; - - case ADD: - case SUB: - case MUL: - case DIV: - case MOD: - if (amode == IMMEDIATE) - return AB; - if (bmode == IMMEDIATE) - return B; - return F; - - case SLT: - if (amode == IMMEDIATE) - return AB; - return B; - - case JMP: - case JMZ: - case JMN: - case DJN: - case SPL: - case NOP: - return B; + if (mode_result == mode_conversion.end()) { + mode = DIRECT; + expr = parse_expression(offset, from, source_line_number); + } + else { + mode = mode_result->second; + if (from.size() == 1) + expr = parse_expression(offset, "0", source_line_number); + else + expr = parse_expression(offset, from.substr(1), source_line_number); } - assert(false); } - static const std::map<std::string, opcode> opcode_names = { - {"dat", DAT}, {"mov", MOV}, {"add", ADD}, {"sub", SUB}, - {"mul", MUL}, {"div", DIV}, {"mod", MOD}, {"jmp", JMP}, - {"jmz", JMZ}, {"jmn", JMN}, {"djn", DJN}, {"seq", SEQ}, - {"sne", SNE}, {"slt", SLT}, {"spl", SPL}, {"nop", NOP}, - {"cmp", SEQ}, {"jnz", JMN} + struct parser_state { + label_offset_set label_offsets; + std::unique_ptr<expr> org_expr; }; - static void process_line(std::vector<future_instruction> &into, const preprocessed_line &line, future_inline_macro_set &future_inline_macros, label_set &labels, std::optional<org_info> &org) { - assert(into.size() < LIB94_CORE_SIZE); - - if (line.the_line == "") - return; - - size_t opcode_len = line.the_line.find_first_of(" ."); - if (opcode_len == std::string::npos) - opcode_len = line.the_line.size(); - - std::string opcode_name = line.the_line.substr(0, opcode_len); - std::string rest = trim_spaces(line.the_line.substr(opcode_len)); + //the driver for stage 2. + //if the line given in from has an instruction, that is put into into, and true is returned. + //otherwise, false is returned. either way, any labels that are found are also stored. + //additionally, any orgs/ends are processed and stored. + static bool maybe_parse_line(const string_with_line_number &from, parsed_line &into, number_t current_offset, parser_state &state) { - if (opcode_name == "org" || opcode_name == "end") { + std::string remainder = from.string; - if (org.has_value()) { - compiler_exception ex; - ex.line_number = line.source_line; - ex.message = "duplicate org"; - throw ex; - } + while (true) { - try { - org = std::move((org_info){ - .expr = to_number_expr(rest), - .offset = (number_t)into.size(), - .source_line = line.source_line - }); - } + if (remainder == "") + return false; - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = line.source_line; - ex.message = iex.message; - throw ex; - } + size_t potential_opcode_end = remainder.find_first_of(" ."); + if (potential_opcode_end == std::string::npos) + potential_opcode_end = remainder.size(); - return; + std::string potential_opcode = remainder.substr(0, potential_opcode_end); + std::string lower_case_potential_opcode = to_lower_case(potential_opcode); + remainder = remove_spaces(remainder.substr(potential_opcode_end)); - } + if (lower_case_potential_opcode == "org" || lower_case_potential_opcode == "end") { - auto opcode_result = opcode_names.find(opcode_name); + if (state.org_expr) + throw_compiler_exception(from.source_line_number, "duplicate org/end"); - if (opcode_result == opcode_names.end() && valid_identifier(opcode_name)) { + state.org_expr = parse_expression(current_offset, remainder, from.source_line_number); + return false; - if (rest.starts_with("equ ")) { + } - try { - future_inline_macros.push_back((future_inline_macro){ - .name = opcode_name, - .source_line = line.source_line, - .definition = to_number_expr(rest.substr(4)), - .offset = (number_t)into.size() - }); - } + auto opcode_result = opcode_conversion.find(lower_case_potential_opcode); + if (opcode_result == opcode_conversion.end()) { + //maybe we're a label - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = line.source_line; - ex.message = iex.message; - throw ex; - } + if (!is_valid_identifier(potential_opcode)) + throw_compiler_exception(from.source_line_number, "bad label or opcode"); - return; + if (!state.label_offsets.insert({potential_opcode, current_offset}).second) + throw_compiler_exception(from.source_line_number, "duplicate label"); - } + continue; - if (labels.contains(opcode_name)) { - compiler_exception ex; - ex.line_number = line.source_line; - ex.message = "duplicate label"; - throw ex; } - labels[opcode_name] = into.size(); - preprocessed_line new_line = {.source_line = line.source_line, .the_line = rest}; - return process_line(into, new_line, future_inline_macros, labels, org); + into.op = opcode_result->second; + break; } - opcode real_opcode = opcode_result->second; - std::optional<modifier> real_modifier; + //got an opcode :) + //now check for a modifier - if (rest != "" && rest[0] == '.') { + bool have_modifier = false; - if (rest.starts_with(".ab")) { - real_modifier = AB; - rest = trim_spaces(rest.substr(3)); - } + if (remainder.size() > 0 && remainder[0] == '.') { - if (rest.starts_with(".ba")) { - real_modifier = BA; - rest = trim_spaces(rest.substr(3)); - } + remainder = remove_spaces(remainder.substr(1)); + have_modifier = true; - if (rest.starts_with(".a")) { - real_modifier = A; - rest = trim_spaces(rest.substr(2)); - } + size_t modifier_end = remainder.find(' '); + if (modifier_end == std::string::npos) + modifier_end = remainder.size(); - if (rest.starts_with(".b")) { - real_modifier = B; - rest = trim_spaces(rest.substr(2)); - } + std::string modifier = to_lower_case(remainder.substr(0, modifier_end)); + remainder = remove_spaces(remainder.substr(modifier_end)); - if (rest.starts_with(".f")) { - real_modifier = F; - rest = trim_spaces(rest.substr(2)); - } + auto modifier_result = modifier_conversion.find(modifier); - if (rest.starts_with(".x")) { - real_modifier = X; - rest = trim_spaces(rest.substr(2)); - } + if (modifier_result == modifier_conversion.end()) + throw_compiler_exception(from.source_line_number, "bad modifier"); - if (rest.starts_with(".i")) { - real_modifier = I; - rest = trim_spaces(rest.substr(2)); - } + into.mod = modifier_result->second; } - field a_field, b_field; + //field time + + size_t comma_pos = remainder.find(','); + std::string a_field = comma_pos == std::string::npos ? remainder : remove_spaces(remainder.substr(0, comma_pos)); + std::string b_field = comma_pos == std::string::npos ? "" : remove_spaces(remainder.substr(comma_pos + 1)); + + parse_field(current_offset, a_field, into.amode, into.aexpr, from.source_line_number); + parse_field(current_offset, b_field, into.bmode, into.bexpr, from.source_line_number); + + //if we didn't get a modifier before, determine default + + if (!have_modifier) + + switch (into.op) { + + case DAT: + into.mod = F; + break; + + case MOV: + case SEQ: + case SNE: + if (into.amode == IMMEDIATE) + into.mod = AB; + else if (into.bmode == IMMEDIATE) + into.mod = B; + else + into.mod = I; + break; + + case ADD: + case SUB: + case MUL: + case DIV: + case MOD: + if (into.amode == IMMEDIATE) + into.mod = AB; + else if (into.bmode == IMMEDIATE) + into.mod = B; + else + into.mod = F; + break; + + case SLT: + if (into.amode == IMMEDIATE) + into.mod = AB; + else + into.mod = B; + break; + + case JMP: + case JMZ: + case JMN: + case DJN: + case SPL: + case NOP: + into.mod = B; + break; - try { - - if (rest == "") { - a_field = make_empty_field(); - b_field = make_empty_field(); - } - - else { - size_t comma = rest.find(','); - - if (comma == std::string::npos) { - a_field = to_field(rest); - b_field = make_empty_field(); - } - - else { - a_field = to_field(rest.substr(0, comma)); - b_field = to_field(trim_spaces(rest.substr(comma + 1))); - } } - } - - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = line.source_line; - ex.message = iex.message; - throw ex; - } + //we got an instruction :) + return true; - if (!real_modifier.has_value()) - real_modifier = get_default_modifier(real_opcode, a_field.first, b_field.first); - - into.push_back((future_instruction){ - .source_line = line.source_line, - .op = real_opcode, - .mod = real_modifier.value(), - .amode = a_field.first, - .bmode = b_field.first, - .anumber = std::move(a_field.second), - .bnumber = std::move(b_field.second) - }); } - warrior * compile_warrior(std::string source) { - for (char &ch : source) - if (ch == '\t' || ch == '\r') - ch = ' '; - - info_from_preprocessor info; - std::vector<preprocessed_line> lines; - unsigned line_number = 1; - preprocess_until_end_block(info, lines, line_number, source, {}); - - if (!info.name.has_value() || info.name == "") { - compiler_exception ex; - ex.line_number = line_number; - ex.message = "no warrior name"; - throw ex; - } - - if (!info.author.has_value() || info.author == "") { - compiler_exception ex; - ex.line_number = line_number; - ex.message = "no warrior author"; - throw ex; - } + warrior *compile_warrior(std::string source) { - std::vector<future_instruction> future_instructions; - future_inline_macro_set future_inline_macros; - label_set labels; - std::optional<org_info> org; + std::vector<std::string> source_lines; - for (const preprocessed_line &line : lines) - process_line(future_instructions, line, future_inline_macros, labels, org); - - inline_macro_set inline_macros; - for (const auto &fim : future_inline_macros) { - - if (inline_macros.contains(fim.name)) { - compiler_exception ex; - ex.line_number = fim.source_line; - ex.message = "duplicate inline macro"; - throw ex; - } + while (source != "") { - if (labels.contains(fim.name)) { - compiler_exception ex; - ex.line_number = fim.source_line; - ex.message = "inline macro with same name as label"; - throw ex; - } + std::string line; + size_t line_end = source.find('\n'); - try { - inline_macros[fim.name] = fim.definition->to_number(fim.offset, inline_macros, labels); + if (line_end == std::string::npos) { + line = source; + source = ""; } - - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = fim.source_line; - ex.message = iex.message; - throw ex; + else { + line = source.substr(0, line_end); + source = source.substr(line_end + 1); } - } - - std::vector<instruction> actual_instructions; + for (char &ch : line) + if (ch == '\t' || ch == '\r') + ch = ' '; - for (number_t offset = 0; offset < (number_t)future_instructions.size(); ++offset) { - const future_instruction &fi = future_instructions[offset]; + source_lines.push_back(line); - try { - actual_instructions.push_back((instruction){ - .op = fi.op, - .mod = fi.mod, - .amode = fi.amode, - .bmode = fi.bmode, - .anumber = fi.anumber->to_number(offset, inline_macros, labels), - .bnumber = fi.bnumber->to_number(offset, inline_macros, labels) - }); - } - - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = fi.source_line; - ex.message = iex.message; - throw ex; - } } - number_t org_result; + //got lines, time to preprocess - if (!org.has_value()) - org_result = 0; + preprocessor_state pp_state; + pp_state.macros.insert({"CORESIZE", std::to_string(LIB94_CORE_SIZE)}); - else { - try { - org_result = real_mod(org.value().expr->to_number(org.value().offset, inline_macros, labels) + org.value().offset); - } - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = org.value().source_line; - ex.message = iex.message; - throw ex; - } - } + std::vector<string_with_line_number> preprocessed_lines; + preprocess(source_lines, pp_state, preprocessed_lines); - for (const assertion &a : info.assertions) { + if (!pp_state.name) + throw_compiler_exception(pp_state.current_source_line_number, "no name comment"); - bool success; + if (!pp_state.author) + throw_compiler_exception(pp_state.current_source_line_number, "no author comment"); - try { - success = a.expr->is_true(a.offset, inline_macros, labels); - } + //now line parsing - catch (const number_expr_exception &iex) { - compiler_exception ex; - ex.line_number = a.source_line; - ex.message = iex.message; - throw ex; - } + parser_state p_state; + std::vector<parsed_line> parsed_lines; + unsigned offset = 0; - if (!success) { - compiler_exception ex; - ex.line_number = a.source_line; - ex.message = "failed assertion"; - throw ex; + for (const string_with_line_number &line : preprocessed_lines) { + parsed_line p_line; + if (maybe_parse_line(line, p_line, offset, p_state)) { + parsed_lines.push_back(std::move(p_line)); + ++offset; } - } - return new warrior { - .name = info.name.value(), - .author = info.author.value(), - .org = org_result, - .instructions = actual_instructions - }; - } + //stage 3: check assertions - struct wheader { - size_t name_size; - size_t author_size; - size_t instructions_size; - number_t org; - }; + for (const auto &assertion : pp_state.assertions) + if (!assertion->check(p_state.label_offsets)) + throw_compiler_exception(assertion->source_line_number, "assertion failed"); - bool save_warrior(const warrior &w, const std::filesystem::path &to) { - FILE *f = fopen(to.c_str(), "wb"); - if (!f) - return false; + //stage 4: evaluate expressions - wheader wh = { - .name_size = w.name.size(), .author_size = w.author.size(), - .instructions_size = w.instructions.size(), .org = w.org - }; + std::unique_ptr<warrior> w = std::make_unique<warrior>(); - fwrite(&wh, sizeof(wheader), 1, f); - fwrite(w.name.c_str(), w.name.size(), 1, f); - fwrite(w.author.c_str(), w.author.size(), 1, f); - fwrite(w.instructions.data(), sizeof(instruction) * w.instructions.size(), 1, f); + for (const auto &line : parsed_lines) { - fclose(f); - return true; - } + instruction i; + i.op = line.op; + i.mod = line.mod; + i.amode = line.amode; + i.bmode = line.bmode; - std::optional<warrior *> load_warrior(const std::filesystem::path &from) { - FILE *f = fopen(from.c_str(), "rb"); - if (!f) - return {}; + i.anumber = line.aexpr->evaluate(p_state.label_offsets); + i.bnumber = line.bexpr->evaluate(p_state.label_offsets); - std::unique_ptr<warrior> w = std::make_unique<warrior>(); + i.anumber = (i.anumber % LIB94_CORE_SIZE + LIB94_CORE_SIZE) % LIB94_CORE_SIZE; + i.bnumber = (i.bnumber % LIB94_CORE_SIZE + LIB94_CORE_SIZE) % LIB94_CORE_SIZE; - wheader wh; - fread(&wh, sizeof(wheader), 1, f); + w->instructions.push_back(i); - w->name.resize(wh.name_size); - w->author.resize(wh.author_size); - w->instructions.resize(wh.instructions_size); - w->org = wh.org; + } - fread(w->name.data(), wh.name_size, 1, f); - fread(w->author.data(), wh.author_size, 1, f); - fread(w->instructions.data(), wh.instructions_size, 1, f); + //stage 5 ;) - fclose(f); + if (p_state.org_expr) { + w->org = p_state.org_expr->evaluate(p_state.label_offsets) + p_state.org_expr->offset; + w->org = (w->org % LIB94_CORE_SIZE + LIB94_CORE_SIZE) % LIB94_CORE_SIZE; + } - for (const instruction &i : w->instructions) - if (i.op > NOP || i.mod > I || i.amode > B_INCREMENT || i.bmode > B_INCREMENT || - i.anumber >= LIB94_CORE_SIZE || i.bnumber >= LIB94_CORE_SIZE) - return {}; + w->name = *pp_state.name; + w->author = *pp_state.author; return w.release(); + } } diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..1691370 --- /dev/null +++ b/readme.txt @@ -0,0 +1,38 @@ +=== building === + +In order to compile this, you will need GNU Make, GCC, GNU LD, pkg-config, Open MPI, and gtkmm 4. + +On Debian, you can install all of these with + apt install make gcc binutils pkg-config openmpi-bin libgtkmm-4.0-dev + +On macOS with Homebrew, you can install all of these with + brew install make gcc binutils pkg-config open-mpi gtkmm4 + +Then, to build all of the software, just run make. + +=== core war standard === + +lib94 attempts to follow the draft standard at <https://corewar.co.uk/standards/icws94.txt>, minus P-space. There are no read/write +limits (or if you prefer, they are the same as the core size). The minimum separation is always 0, and the core size is set at 8000. +To change the core size, change LIB94_CORE_SIZE in include/lib94/lib94.hpp, run make clean, and then run make. + +=== bench === + +The "bench" program (short for test bench) is intended for testing out warriors and seeing exactly what they do. It allows you to +single step, or run at a variety of rates. It has a large display which shows core reads/writes/executions as they happen, and a +listing of all of the instructions in the core. It also shows "alive" warriors, their instruction pointer for the next step, and +how many processes they have. + +To open bench, just run bin/bench after building as above. + +=== tabulator === + +The "tabulator" program runs every possible pairing of warriors from a selection against each other a number of times, and then shows +the number of wins of each warrior against each other warrior in a table format. This program uses MPI to run batches of these rounds +in different processes, and communicate the results back to a head process. + +To run all of the included warriors against each other, run + mpirun bin/tabulator-mpi warriors/*.red + +Note that tabulator expects at least two processes (one head process and at least one worker). If you only have one core, you may run + mpirun -np 2 --oversubscribe bin/tabulator-mpi warriors/*.red diff --git a/tabulator-mpi/main.cpp b/tabulator-mpi/main.cpp index cb60ecb..e6e1a27 100644 --- a/tabulator-mpi/main.cpp +++ b/tabulator-mpi/main.cpp @@ -21,7 +21,7 @@ const lib94::warrior *load_warrior(const char *file) { } catch (const lib94::compiler_exception &ex) { - fprintf(stderr, "error in %s on line %u: %s\n", file, ex.line_number, ex.message.c_str()); + fprintf(stderr, "error in %s on line %u: %s\n", file, ex.source_line_number, ex.message.c_str()); exit(1); } } diff --git a/warriors/epson.red b/warriors/epson.red index 688ad8d..2bc05ad 100644 --- a/warriors/epson.red +++ b/warriors/epson.red @@ -4,10 +4,10 @@ intrascan_period equ 10 interscan_period equ 2 -;interscan period must divide intrascan period -;intrascan period must divide 8000 +;assert intrascan_period % interscan_period == 0 +;assert CORESIZE % intrascan_period == 0 -scan_init equ the_end - scan - (the_end - scan) % intrascan_period + intrascan_period +scan_init equ (the_end - scan - (the_end - scan) % intrascan_period + intrascan_period) scan seq.i -intrascan_period, scan_init |