From bffff8701efac3271d67b89b9afbde030185f04b Mon Sep 17 00:00:00 2001
From: Elijah Cohen <eli@eli173.com>
Date: Wed, 14 Aug 2024 22:43:26 -0500
Subject: [PATCH] work on memory safety in parser

---
 ideas.org    |  9 +++++++++
 src/Makefile |  2 +-
 src/parser.c | 34 ++++++++++++++++++++++------------
 src/repl.c   |  4 +++-
 src/sexpr.c  |  4 ++--
 src/test.c   | 10 +++++++++-
 6 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/ideas.org b/ideas.org
index 1f8dd50..d27dcdd 100644
--- a/ideas.org
+++ b/ideas.org
@@ -1,6 +1,15 @@
 MEMORY MANAGEMENT
 okay gonna start with making sure the parser is good... how?
 
+parser checklist:
+tokenize ok
+vals_parse ok
+balance_checker ok
+what if balance checker is unbalanced?
+cons_parse ?
+
+cons parse is tricky and needs lots of thinking about, but after that I think it's good
+
 let's think about every single allocation needed for some statements:
 (+ 4 6)
 eval structure:
diff --git a/src/Makefile b/src/Makefile
index 7117825..325e039 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,5 +1,5 @@
 
-CC:=llvm-gcc
+#CC:=llvm-gcc
 
 # okay what the fuck am i doing with lex yacc generated stuff? just do it once? okay
 
diff --git a/src/parser.c b/src/parser.c
index edd166a..277796e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -10,17 +10,16 @@
 
 Sexpr* append_fragment(Sexpr* tokens, char* tok_start, size_t currlen) {
 	// helper so that i dont repeat code
-					char* newsym = malloc(sizeof(char)*currlen);
-				strncpy(newsym, tok_start, currlen);
-				Sexpr* newtok = from_sym(newsym);
-				free(newsym);
-				return cons(newtok, tokens);
+	char* newsym = malloc(sizeof(char)*currlen);
+	strncpy(newsym, tok_start, currlen);
+	Sexpr* newtok = from_sym(newsym);
+	free(newsym);
+	return cons(newtok, tokens);
 }
 
+
 Sexpr* tokenize(char* s) {
 	// note: also reverses
-	Sexpr* openparen = from_sym("(");
-	Sexpr* closeparen = from_sym(")");
 	Sexpr* tokens = from_nil();
 	char* tok_start = NULL;
 	// returns a list of every token
@@ -32,10 +31,10 @@ Sexpr* tokenize(char* s) {
 				tokens = append_fragment(tokens, tok_start, currlen);
 			}
 			if(*s=='(') {
-				tokens = cons(openparen, tokens);
+				tokens = cons(from_sym("("), tokens);
 			}
 			else {
-				tokens = cons(closeparen, tokens);
+				tokens = cons(from_sym(")"), tokens);
 			}
 			already_in_sym = false;
 			currlen = 0;
@@ -113,13 +112,14 @@ bool balance_checker(Sexpr* tokens) {
 }
 
 Sexpr* cons_parse(Sexpr* tokens) {
-	Sexpr* reversed = reverse(tokens);
+	Sexpr* reversedptr = reverse(tokens);
+	Sexpr* reversed = reversedptr;
 	// takes results from previous parsing ops, aka the forward-facing?
 	Sexpr* heads_stack = from_nil();
 	Sexpr* curr_head = from_nil();
 	Sexpr* curr_car;
 	while(reversed->type != NIL) {
-		curr_car = car(reversed);
+		curr_car = clone(car(reversed));
 		Sexpr_Type cartype = curr_car->type;
 		if(cartype == SYM && strcmp(")", curr_car->value.s)==0) {
 			heads_stack = cons(curr_head, heads_stack);
@@ -135,6 +135,9 @@ Sexpr* cons_parse(Sexpr* tokens) {
 		}
 		reversed = cdr(reversed);
 	}
+	sexpr_free(reversedptr);
+	sexpr_free(heads_stack);
+	sexpr_free(curr_car);
 	return curr_head;
 }
 
@@ -142,14 +145,21 @@ Sexpr* cons_parse(Sexpr* tokens) {
 Sexpr* parse(char* s) {
 	//printf("s: %s\n", s);
 	Sexpr* tokens = tokenize(s);
-	//printf("t: %s\n", sprint_sexpr(*tokens));
+	//printf("t: %s\n", sprint_sexpr(tokens));
 	Sexpr* vals = vals_parse(tokens);
+	sexpr_free(vals);
+	vals = vals_parse(tokens);
+	sexpr_free(tokens);
 	//printf("v: %s\n", sprint_sexpr(vals));
 	if(!balance_checker(vals)) {
 		printf("unbalanced parenthesis\n");
+		sexpr_free(vals);
 		return NULL;
 	}
+	//printf("v1: %s\n", sprint_sexpr(vals));
 	Sexpr* done = cons_parse(vals);
+	//printf("v2: %s\n", sprint_sexpr(vals));
+	sexpr_free(vals);
 	//printf("c: %s\n", sprint_sexpr(*done));
 	return done;
 }
diff --git a/src/repl.c b/src/repl.c
index 5123b07..94420d8 100644
--- a/src/repl.c
+++ b/src/repl.c
@@ -34,7 +34,9 @@ int main(int argc, char** argv) {
 		else {
 			//printf("- -%s\n", sprint_sexpr(in));
 			Sexpr* out = eval(car(in), env);
-			printf(" - %s\n", sprint_sexpr(out));
+			char* outstr = sprint_sexpr(out);
+			printf(" - %s\n", outstr);
+			free(outstr);
 		}
 		free(input);
 	}
diff --git a/src/sexpr.c b/src/sexpr.c
index 593283b..69c37c5 100644
--- a/src/sexpr.c
+++ b/src/sexpr.c
@@ -159,11 +159,11 @@ Sexpr* clone(Sexpr* s) {
 Sexpr* reverse(Sexpr* s) {
 	if(s->type != CONS) {
 		// uhh this probably should never happen...
-		return s;
+		return clone(s);
 	}
 	Sexpr* out = from_nil();
 	while(s->type != NIL) {
-		out = cons(car(s), out);
+		out = cons(clone(car(s)), out);
 		s = cdr(s);
 	}
 	return out;
diff --git a/src/test.c b/src/test.c
index 268d79e..f77eca6 100644
--- a/src/test.c
+++ b/src/test.c
@@ -88,11 +88,19 @@ void test_dict() {
 void mem_parser() {
 	printf("starting parser memory testing\n");
 	char* toparse = "(car (cons 1 (cons 2 nil)))";
+	char* out;
 	Sexpr* parsed;
-	while(1) {
+	unsigned long l = 0;
+	while(l < 10000) {
 		parsed = parse(toparse);
+		out = sprint_sexpr(parsed);
 		sexpr_free(parsed);
+		//printf("%s\n", out);
+		free(out);
+		//getchar();
+		l++;
 	}
+	//sexpr_free(parsed);
 }
 
 void mem_hammer() {
-- 
2.39.5