From f87403e4a71cbbb52f1c658bf0aadd06c4bb2264 Mon Sep 17 00:00:00 2001 From: Elijah Cohen Date: Fri, 26 Apr 2024 15:53:47 -0500 Subject: [PATCH] parser functioning minimally, gonna work on evaluation &c now --- Makefile | 11 ++++ ideas.org | 53 +++++++++++++++++ src/Makefile | 52 ++++++++++++++++ src/parser.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 12 ++++ src/sexpr.c | 122 +++++++++++++++++++++++++++++++++++++ src/sexpr.h | 23 +++++++ src/test.c | 79 ++++++++++++++++++++++++ src/types.h | 38 ++++++++++++ 9 files changed, 555 insertions(+) create mode 100644 Makefile create mode 100644 ideas.org create mode 100644 src/Makefile create mode 100644 src/parser.c create mode 100644 src/parser.h create mode 100644 src/sexpr.c create mode 100644 src/sexpr.h create mode 100644 src/test.c create mode 100644 src/types.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..db9b141 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ + + + +all: + cd src; $(MAKE) +.PHONY: test +test: + cd src; $(MAKE) test +.PHONY: clean +clean: + cd src; $(MAKE) clean diff --git a/ideas.org b/ideas.org new file mode 100644 index 0000000..2915181 --- /dev/null +++ b/ideas.org @@ -0,0 +1,53 @@ + +So this is a poorly-named directory, since what I'm making will end up being a lisp-1... + +thinking about taking ideas from concatenative languages, tacit programming, and the such. +priority 1 is an s-expression parser. + + +MAJOR PROB: how do i represent builtins? not as functions, since function pointers are kinda screwy, might wanna make some sort of machine language for my basic atoms and such, so it'll be easier to implement in size-efficient ways. gonna end up maybe doing some sort of secd type thing +also i should have a stack for arguments, so when there's like a plus or something it stashes the value, gets the next value, and adds em together + +data type: +cons cell + +sexpr is either +cons cell +symbol +number +string +function? + +let's just start with that I guess + +x => x +(x) => x +(f x) => f(x) +(f x y) => (f(x) y) +(f x y z ...) => (f(x) y z ...) + + +OH MY GOD I CAN USE NUMBERS AS CHURCH NUMERALS! +(i.e. (5 f x) => f(f(f(f(f(x)))))) +dunno how much else I can do with it but i def wanna employ church encodings + +def: defines things globally +(def x 7) +then +x => 7 + +lambda: works like a lambda normally does +((\ x (+ x 1)) 6) => 7 +macrolambda: like lambda but without evaluating args +(example to come) + + + +okay, I think the idea is to have a sorta forth with closures? +where any number of stack elements can be grouped into a function? yeah that feels right-ish +or I could just make a fully-functional forth of sorts + + + +how do i implement closures? how do i make sure things actually evaluate once they've got enough arguments? +maybe have a closure as a struct containing the builtin's opcode, plus a list of args, plus a down-counter for how many args until execution triggers? diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..9d52008 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,52 @@ + +CC:=llvm-gcc + +# okay what the fuck am i doing with lex yacc generated stuff? just do it once? okay + +SRCS:= $(wildcard *.c) +BUILD:= ../build +BIN:=repl +TEST_BIN:=test +OBJS:= $(patsubst %.c,$(BUILD)/%.o,$(SRCS)) +DEPS:= $(OBJS:%.o=%.d) + +BIN_OBJS:= $(filter-out $(BUILD)/test.o,$(OBJS)) +TEST_OBJS:= $(filter-out $(BUILD)/repl.o,$(OBJS)) + + +#LIBRARIES = poppler-glib MagickWand libexif taglib_c libmagic libavcodec libavformat sqlite3 uuid fuse3 libbsd-overlay + +LIBRARIES = uuid libedit + +CFLAGS:= -g -Wall `pkg-config $(LIBRARIES) --cflags` +LDFLAGS:= -g -Wall `pkg-config $(LIBRARIES) --libs` + + +#$(BUILD)/lex.yy.o: lisp.l +#lex -o $(BUILD)/lex.yy.c lisp.l + +#$(BUILD)/y.tab.o: lisp.y +# yacc -defines lisp.y + +#y.tab.h: lisp.y +# yacc -d lisp.y + + +$(BUILD)/$(BIN): $(OBJS) + $(CC) $(BIN_OBJS) $(LDFLAGS) -o $(BUILD)/$(BIN) + +-include $(DEPS) + + +$(OBJS): $(BUILD)/%.o:%.c Makefile + $(CC) -c $< -MMD -o $@ $(CFLAGS) + + +.PHONY: test +test: $(TEST_OBJS) + $(CC) $(TEST_OBJS) $(LDFLAGS) -o $(BUILD)/$(TEST_BIN) + +.PHONY: clean +clean: + -rm $(BUILD)/$(BIN) $(BUILD)/$(TEST_BIN) $(OBJS) $(DEPS) + diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..cdf5826 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,165 @@ +#include +#include +#include +#include +#include + +#include "types.h" +#include "sexpr.h" + + +Sexpr* append_fragment(Sexpr* tokens, char* tok_start, size_t currlen) { + // helper so that i dont repeat code + char* newsym = malloc(sizeof(char)*currlen); + strncpy(newsym, tok_start, currlen); + Sexpr* newtok = from_sym(newsym); + free(newsym); + return cons(newtok, tokens); +} + +Sexpr* tokenize(char* s) { + // note: also reverses + Sexpr* openparen = from_sym("("); + Sexpr* closeparen = from_sym(")"); + Sexpr* tokens = from_nil(); + char* tok_start = NULL; + // returns a list of every token + size_t currlen = 0; + bool already_in_sym = false; + while(*s != '\0') { + if(*s=='(' || *s==')') { + if(already_in_sym) { + tokens = append_fragment(tokens, tok_start, currlen); + } + if(*s=='(') { + tokens = cons(openparen, tokens); + } + else { + tokens = cons(closeparen, tokens); + } + already_in_sym = false; + currlen = 0; + } // close () + else if(isspace(*s)) { + if(already_in_sym) { + tokens = append_fragment(tokens, tok_start, currlen); + already_in_sym = false; + currlen = 0; + } + // don't need to do anything otherwise + } // close isspc + else { + if(!already_in_sym) { + tok_start = s; + } + currlen++; + already_in_sym = true; + } + s++; + } + if(already_in_sym) { + tokens = append_fragment(tokens, tok_start, currlen); + } + return tokens; +} + +bool is_u64(char* s) { + while(*s!='\0') { + if(!isdigit(*s)) return false; + s++; + } + return true; +} + + +Sexpr* vals_parse(Sexpr* tokens) { + // should only accept a singly-linked list of symbols + // note: may also reverse + Sexpr* out = from_nil(); + Sexpr* next = NULL; + while(tokens->type != NIL) { + char* s = car(*tokens)->value.s; + if(is_u64(s)) { + uint64_t u; + sscanf(s, "%" SCNu64, &u); + next = from_uint(u); + } + else { + next = from_sym(s); + } + out = cons(next, out); + tokens = cdr(*tokens); + } + return out; +} + +Sexpr* cons_parse_bork(Sexpr* tokens) { + // dunno if it reverses yet + //Sexpr* out = from_nil(); + Sexpr* curr = from_nil(); + Sexpr* nesting_stack = from_nil(); + while(tokens->type != NIL) { + if(tokens->type == SYM) { + printf("issym\n"); + } + if(tokens->type == SYM && strcmp("(", car(*tokens)->value.s)==0) { + printf("open\n"); + nesting_stack = cons(curr, nesting_stack); + curr = from_nil(); + } + else if(tokens->type == SYM && strcmp(")", car(*tokens)->value.s)==0) { + printf("close\n"); + if(nesting_stack->type ==NIL) { + printf("unbalanced parens...?\n"); + return NULL; + } + curr = cons(curr, car(*nesting_stack)); + nesting_stack = cdr(*nesting_stack); + } + else { + // uhh just append to front? yeah ig + curr = cons(car(*tokens), curr); + tokens = cdr(*tokens); + } + } + // todo free nesting stack here + return curr; +} + +Sexpr* cons_parse(Sexpr* tokens) { + Sexpr* reversed = reverse(tokens); + // takes results from previous parsing ops, aka the forward-facing? + Sexpr* heads_stack = from_nil(); + Sexpr* curr_head = from_nil(); + Sexpr* curr_car; + while(reversed->type != NIL) { + curr_car = car(*reversed); + Sexpr_Type cartype = curr_car->type; + if(cartype == SYM && strcmp(")", curr_car->value.s)==0) { + heads_stack = cons(curr_head, heads_stack); + curr_head = from_nil(); + } + else if(cartype == SYM && strcmp("(", curr_car->value.s)==0) { + Sexpr* prev_head = car(*heads_stack); + curr_head = cons(curr_head, prev_head); + heads_stack = cdr(*heads_stack); + } + else { + curr_head = cons(curr_car, curr_head); + } + reversed = cdr(*reversed); + } + return curr_head; +} + + +Sexpr* parse(char* s) { + //printf("s: %s\n", s); + Sexpr* tokens = tokenize(s); + //printf("t: %s\n", sprint_sexpr(*tokens)); + Sexpr* vals = vals_parse(tokens); + printf("v: %s\n", sprint_sexpr(*vals)); + Sexpr* done = cons_parse(vals); + //printf("c: %s\n", sprint_sexpr(*done)); + return done; +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..073548e --- /dev/null +++ b/src/parser.h @@ -0,0 +1,12 @@ +#ifndef _PARSER_H +#define _PARSER_H + +#include "types.h" + + +Sexpr* tokenize(char* s); +Sexpr* cons_parse(Sexpr* tokens); +Sexpr* vals_parse(Sexpr* tokens); +Sexpr* parse(char* s); + +#endif diff --git a/src/sexpr.c b/src/sexpr.c new file mode 100644 index 0000000..6e3a896 --- /dev/null +++ b/src/sexpr.c @@ -0,0 +1,122 @@ + +#include +#include +#include + +#include "types.h" +#include "sexpr.h" + + +Sexpr* from_nil() { + Sexpr* ret = malloc(sizeof(Sexpr)); + ret->type = NIL; + ret->value.n = NULL; + return ret; +} + +Sexpr* from_sym(char* s) { + Sexpr* ret = malloc(sizeof(Sexpr)); + ret->type = SYM; + ret->value.s = malloc(strlen(s)+1); + strcpy(ret->value.s, s); + return ret; +} + +Sexpr* from_uint(uint64_t u) { + Sexpr* ret = malloc(sizeof(Sexpr)); + ret->type = UINT; + ret->value.u = u; + return ret; +} + +Sexpr* cons(Sexpr* car, Sexpr* cdr) { + Cons_t* c = malloc(sizeof(Cons_t)); + Sexpr* s = malloc(sizeof(Sexpr)); + c->car = car; + c->cdr = cdr; + s->type = CONS; + s->value.c = c; + return s; +} + +Sexpr* car(Sexpr s) { + return s.value.c->car; +} +Sexpr* cdr(Sexpr s) { + return s.value.c->cdr; +} + + +Sexpr* reverse(Sexpr* s) { + if(s->type != CONS) { + // uhh this probably should never happen... + return s; + } + Sexpr* out = from_nil(); + while(s->type != NIL) { + out = cons(car(*s), out); + s = cdr(*s); + } + return out; +} + +char* sprint_sexpr(Sexpr s) { + size_t nbytes; + char* out; + if(s.type == NIL) { + out = malloc(4*sizeof(char)); + strcpy(out, "nil"); + return out; + } + else if(s.type == T) { + out = malloc(2*sizeof(char)); + strcpy(out, "t"); + return out; + } + else if(s.type == SYM) { + out = malloc(sizeof(char)*(strlen(s.value.s)+1)); + strcpy(out, s.value.s); + return out; + } + else if(s.type == BUILTIN) { + out = malloc(6*sizeof(char)); + strcpy(out, ""); + return out; + } + else if(s.type == UINT) { + nbytes = snprintf(NULL, 0, "%" PRIu64 "", s.value.u) + 1; + out = malloc(nbytes*sizeof(char)); + snprintf(out, nbytes, "%" PRIu64 "", s.value.u); + return out; + } + else if(s.type == CONS) { + Sexpr curr_cell = s; + size_t currsize = 2; + out = malloc(currsize*sizeof(char)); + out[0] = '('; + out[1] = '\0'; + while(curr_cell.type == CONS) { + char* carstr = sprint_sexpr(*car(curr_cell)); + currsize += strlen(carstr) + 1; // trailing space/close paren + out = realloc(out, currsize); + strcat(out, carstr); + strcat(out, " "); + curr_cell = *cdr(curr_cell); + } + if(curr_cell.type == NIL) { + out[currsize-2] = ')'; + out[currsize-1] = '\0'; + return out; + } + else { // non-nil + char* cdrstr = sprint_sexpr(curr_cell); + currsize += strlen(cdrstr) + 4; // dot, space, close, null-terminator + strcat(out, ". "); + strcat(out, cdrstr); + strcat(out, ")"); + out[currsize-1] = '\0'; + return out; + } + } + return NULL; +} diff --git a/src/sexpr.h b/src/sexpr.h new file mode 100644 index 0000000..cfeca9a --- /dev/null +++ b/src/sexpr.h @@ -0,0 +1,23 @@ +#ifndef _SEXPR_H +#define _SEXPR_H + +#include "types.h" + + +Sexpr* from_nil(); + +Sexpr* from_sym(char* s); + +Sexpr* from_uint(uint64_t u); + +Sexpr* cons(Sexpr* car, Sexpr* cdr); + +Sexpr* car(Sexpr s); + +Sexpr* cdr(Sexpr s); + +Sexpr* reverse(Sexpr* s); + +char* sprint_sexpr(Sexpr s); + +#endif diff --git a/src/test.c b/src/test.c new file mode 100644 index 0000000..5e747ed --- /dev/null +++ b/src/test.c @@ -0,0 +1,79 @@ + +#include +#include + +#include "types.h" +#include "sexpr.h" +#include "parser.h" + + + + +void test_basics() { + printf("testing basics...\n"); + Sexpr* nil = from_nil(); + printf("nil: %s\n", sprint_sexpr(*nil)); + Sexpr* sym = from_sym("testsym"); + printf("sym: %s\n", sprint_sexpr(*sym)); + Sexpr* cons1 = cons(sym, nil); + Sexpr* cons2 = cons(nil, sym); + printf("cons1: %s\n", sprint_sexpr(*cons1)); + printf("cons2: %s\n", sprint_sexpr(*cons2)); + Sexpr* sym2 = from_sym("nothersym"); + printf("sym2: %s\n", sprint_sexpr(*sym2)); + Sexpr* cons3 = cons(sym2, cons2); + printf("cons2: %s\n", sprint_sexpr(*cons2)); + printf("cons3: %s\n", sprint_sexpr(*cons3)); + Sexpr* cons4 = cons(sym2, cons1); + printf("cons4: %s\n", sprint_sexpr(*cons4)); + Sexpr* cons5 = cons(cons4, cons4); + printf("cons5: %s\n", sprint_sexpr(*cons5)); + Sexpr* ui = from_uint(54362); + printf("ui: %s", sprint_sexpr(*ui)); +} + +void test2() { + Sexpr* a = from_sym("a"); + Sexpr* b = from_sym("b"); + Sexpr* c = from_sym("c"); + Sexpr* d = from_sym("d"); + Sexpr* e = from_sym("e"); + Sexpr* nil = from_nil(); + Sexpr* cc = cons(e, nil); + cc = cons(d, cc); + cc = cons(c, cc); + cc = cons(b, cc); + cc = cons(a, cc); + printf("all: %s\n", sprint_sexpr(*cc)); +} + +void test_parsing() { + printf("testing parsing...\n"); + printf("basic: %s\n", sprint_sexpr(*from_sym("hello"))); + Sexpr* t1 = tokenize("a b c d e f g"); + printf("t1: %s\n", sprint_sexpr(*reverse(t1))); + Sexpr* t2 = tokenize("(a b (c d) e)"); + printf("t1: %s\n", sprint_sexpr(*reverse(t2))); + + printf("%s\n", sprint_sexpr(*reverse(tokenize("a b c d ef g")))); +} + +void test_parser() { + printf("parsing again...\n"); + + printf("_: %s\n", sprint_sexpr(*parse("457"))); + printf("_: %s\n", sprint_sexpr(*parse("a b c d e"))); + printf("_: %s\n", sprint_sexpr(*parse("(a b (3 2 (5) c) d (e f) g)"))); +} + +void run_tests(){ + //test_basics(); + //test2(); + //test_parsing(); + test_parser(); +} + +int main() { + run_tests(); + return 0; +} diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..50845fe --- /dev/null +++ b/src/types.h @@ -0,0 +1,38 @@ +#ifndef _TYPES_H +#define _TYPES_H + +// here is where i define the most relevant types for this project + + +#include +#include +#include + +typedef uint64_t Builtin; +typedef char* Symbol_t; +typedef void* Nil_t; +typedef void* Truth_t; + +typedef enum Sexpr_Type { + UINT, SYM, BUILTIN, NIL, T, CONS +} Sexpr_Type; // to be used rarely, mainly only in eval i think + +typedef struct Cons { + struct Sexpr* car; + struct Sexpr* cdr; +} Cons_t; + +typedef struct Sexpr { + Sexpr_Type type; + union { + Builtin b; + uint64_t u; + Symbol_t s; + Cons_t* c; + Nil_t n; + Truth_t t; + } value; +} Sexpr; + + +#endif -- 2.39.2