From 306e3e1fa95a7b9a09f994d0c1ba20aac305ab89 Mon Sep 17 00:00:00 2001 From: Elijah Cohen Date: Wed, 6 Nov 2024 20:25:46 -0600 Subject: [PATCH] very basic file io, support for escape characters --- src/builtins/io.c | 75 ++++++++++++++++++++++++++++++ src/builtins/io.h | 6 +++ src/builtins/strings.c | 36 +++++++++++++++ src/builtins/strings.h | 3 ++ src/parser.c | 101 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 219 insertions(+), 2 deletions(-) diff --git a/src/builtins/io.c b/src/builtins/io.c index 25ce07b..8da2589 100644 --- a/src/builtins/io.c +++ b/src/builtins/io.c @@ -52,6 +52,75 @@ Sexpr* io_print_b(Sexpr* b, Sexpr* rest, Sexpr* env) { return cons(from_t(), rest); } +Sexpr* io_readfile(Sexpr* b, Sexpr* rest, Sexpr* env) { + if(IO_READFILE_ARGS != u64_get_num_args(b)) { + return cons(b, rest); + } + Sexpr* firstarg = eval(clone(car(b->value.b.args)), env); +#ifdef TYPECHECK + if(firstarg->type != STR) { + ERR("readfile: ", "argument not a string"); + sexpr_free(firstarg); + sexpr_free(b); + return cons(from_nil(), rest); + } +#endif + FILE* thefile = fopen(firstarg->value.str, "r"); + if(!thefile) { + ERR("readfile: file not found: ", firstarg->value.str); + sexpr_free(firstarg); + sexpr_free(b); + return cons(from_nil(), rest); + } + fseek(thefile, 0, SEEK_END); + size_t fsz = ftell(thefile); + fseek(thefile, 0, SEEK_SET); + char* buf = malloc(sizeof(char)*fsz); + if(!buf) { + ERR("readfile: ", "unable to allocate memory for file"); + fclose(thefile); + return cons(from_nil(), rest); + } + fread(buf, 1, fsz, thefile); + fclose(thefile); + Sexpr* f = from_string(buf); + free(buf); + return cons(f, rest); +} + +Sexpr* io_writefile(Sexpr* b, Sexpr* rest, Sexpr* env) { + if(IO_WRITEFILE_ARGS != u64_get_num_args(b)) { + return cons(b, rest); + } + Sexpr* args = b->value.b.args; + Sexpr* filearg = eval(clone(car(cdr(args))), env); + Sexpr* strarg = eval(clone(car(args)), env); +#ifdef TYPECHECK + if(filearg->type != STR || strarg->type != STR) { + ERR("writefile: ", "arguments not strings"); + sexpr_free(filearg); + sexpr_free(strarg); + sexpr_free(b); + return cons(from_nil(), rest); + } +#endif + char* filestring = filearg->value.str; + FILE* thefile = fopen(filestring, "w"); + if(!thefile) { + ERR("writefile: file not found: ", filestring); + sexpr_free(filearg); + sexpr_free(strarg); + sexpr_free(b); + return cons(from_nil(), rest); + } + size_t num_written = fwrite(strarg->value.str, sizeof(char), strlen(strarg->value.str), thefile); + fclose(thefile); + sexpr_free(filearg); + sexpr_free(strarg); + sexpr_free(b); + return cons(from_uint(num_written), rest); +} + Sexpr* x_io_dispatch(Sexpr* b, Sexpr* rest, Sexpr* env) { uint64_t code = b->value.b.opcode & 0xff; @@ -63,6 +132,10 @@ Sexpr* x_io_dispatch(Sexpr* b, Sexpr* rest, Sexpr* env) { return io_printstr(b, rest, env); case IO_PB: return io_print_b(b, rest, env); + case IO_READFILE: + return io_readfile(b, rest, env); + case IO_WRITEFILE: + return io_writefile(b, rest, env); default: return from_nil(); } @@ -73,6 +146,8 @@ Sexpr* load_io_env(Sexpr* env) { load_builtin(IO_PRINT_STR, (IO_PREFIX << 8) | IO_PRINT, env); load_builtin(IO_PRINTSTR_STR, (IO_PREFIX << 8) | IO_PRINTSTR, env); load_builtin(IO_PB_STR, (IO_PREFIX << 8) | IO_PB, env); + load_builtin(IO_READFILE_STR, (IO_PREFIX << 8) | IO_READFILE, env); + load_builtin(IO_WRITEFILE_STR, (IO_PREFIX << 8) | IO_WRITEFILE, env); return env; } diff --git a/src/builtins/io.h b/src/builtins/io.h index 7b805fe..f62db66 100644 --- a/src/builtins/io.h +++ b/src/builtins/io.h @@ -14,6 +14,12 @@ #define IO_PB 0x02 #define IO_PB_ARGS 1 #define IO_PB_STR "pb" +#define IO_READFILE 0x03 +#define IO_READFILE_ARGS 1 +#define IO_READFILE_STR "readfile" +#define IO_WRITEFILE 0x04 +#define IO_WRITEFILE_ARGS 2 +#define IO_WRITEFILE_STR "writefile" Sexpr* x_io_dispatch(Sexpr* s, Sexpr* rest, Sexpr* env); Sexpr* load_io_env(Sexpr* env); diff --git a/src/builtins/strings.c b/src/builtins/strings.c index 8d2660b..a99e546 100644 --- a/src/builtins/strings.c +++ b/src/builtins/strings.c @@ -152,6 +152,39 @@ Sexpr* s_substr(Sexpr* b, Sexpr* rest, Sexpr* env) { return cons(from_quote(newres), rest); } +Sexpr* s_strtok(Sexpr* b, Sexpr* rest, Sexpr* env) { + if(STRINGS_STRTOK_ARGS != u64_get_num_args(b)) { + return cons(b, rest); + } + Sexpr* args = b->value.b.args; + Sexpr* delimex = eval(clone(car(cdr(args))), env); + Sexpr* strex = eval(clone(car(args)), env); +#ifdef TYPECHECK + if(delimex->type != STR || strex->type != STR) { + ERR("strtok: ", "arguments not strings"); + sexpr_free(delimex); + sexpr_free(strex); + sexpr_free(b); + return cons(from_nil(), rest); + } +#endif + Sexpr* result = from_nil(); + char* delim = delimex->value.str; + char* str = strex->value.str; + char* out = strtok(str, delim); + while(out != NULL) { + result = cons(from_string(out), result); + out = strtok(NULL, delim); + } + Sexpr* newres = reverse(result); + sexpr_free(b); + sexpr_free(delimex); + sexpr_free(strex); + sexpr_free(result); + return cons(from_quote(newres), rest); +} + + Sexpr* x_strings_dispatch(Sexpr* b, Sexpr* rest, Sexpr* env) { uint64_t code = b->value.b.opcode & 0xff; switch(code) { @@ -165,6 +198,8 @@ Sexpr* x_strings_dispatch(Sexpr* b, Sexpr* rest, Sexpr* env) { return s_strexpand(b, rest, env); case STRINGS_SUBSTR: return s_substr(b, rest, env); + case STRINGS_STRTOK: + return s_strtok(b, rest, env); default: return from_nil(); } @@ -178,5 +213,6 @@ Sexpr* load_strings_env(Sexpr* env) { load_builtin(STRINGS_STRAT_STR, (STRINGS_PREFIX << 8) | STRINGS_STRAT, env); load_builtin(STRINGS_STREXPAND_STR, (STRINGS_PREFIX << 8) | STRINGS_STREXPAND, env); load_builtin(STRINGS_SUBSTR_STR, (STRINGS_PREFIX << 8) | STRINGS_SUBSTR, env); + load_builtin(STRINGS_STRTOK_STR, (STRINGS_PREFIX << 8) | STRINGS_STRTOK, env); return env; } diff --git a/src/builtins/strings.h b/src/builtins/strings.h index 76c4e07..f5d2def 100644 --- a/src/builtins/strings.h +++ b/src/builtins/strings.h @@ -22,6 +22,9 @@ #define STRINGS_SUBSTR 0x04 #define STRINGS_SUBSTR_ARGS 2 #define STRINGS_SUBSTR_STR "substr" +#define STRINGS_STRTOK 0x05 +#define STRINGS_STRTOK_ARGS 2 +#define STRINGS_STRTOK_STR "strtok" Sexpr* x_strings_dispatch(Sexpr* s, Sexpr* rest, Sexpr* env); Sexpr* load_strings_env(Sexpr* env); diff --git a/src/parser.c b/src/parser.c index c35f9c7..fc06b25 100644 --- a/src/parser.c +++ b/src/parser.c @@ -19,6 +19,96 @@ Sexpr* append_fragment(Sexpr* tokens, char* tok_start, size_t currlen) { return cons(newtok, tokens); } +char* escapify(char* s) { + // converts strings that are indeed supposed to be 'strings' + // into properly escaped versions of themselves + // i.e. the two-character "\n" gets converted to the single-character newline + char* out = malloc(sizeof(char)*strlen(s)); + char* outidx = out; + char* sidx = s; // index on s + char tmp[3] = {0, 0, 0}; + while(*sidx != '\0') { + if(*sidx == '\\') { + switch(*(sidx+1)) { + case '\\': + *outidx = '\\'; + outidx++; + sidx++; + break; + case '?': + *outidx = '?'; + outidx++; + sidx++; + break; + case '"': + *outidx = '"'; + outidx++; + sidx++; + break; + case 'n': + *outidx = '\n'; + outidx++; + sidx++; + break; + case 'a': + *outidx = '\a'; + outidx++; + sidx++; + break; + case 'b': + *outidx = '\b'; + outidx++; + sidx++; + break; + case 'e': + *outidx = '\e'; + outidx++; + sidx++; + break; + case 'f': + *outidx = '\f'; + outidx++; + sidx++; + break; + case 'r': + *outidx = '\r'; + outidx++; + sidx++; + break; + case 't': + *outidx = '\t'; + outidx++; + sidx++; + break; + case 'v': + *outidx = '\v'; + outidx++; + sidx++; + break; + case '\'': + *outidx = '\''; + outidx++; + sidx++; + break; + case 'x': + tmp[0] = *(sidx+2); + tmp[1] = *(sidx+3); + *outidx = strtol(tmp, NULL, 0x10); + outidx++; + sidx+=3; + break; + default: + ERR("string literal escape: ", "not supported"); + } + } + else { + *outidx = *sidx; + outidx++; + } + sidx++; + } + return out; +} Sexpr* tokenize(char* s) { // note: also reverses @@ -46,7 +136,12 @@ Sexpr* tokenize(char* s) { // just mark the start, then iterate til close quote, then append? // but what if uncompleted quote? tok_start = s; - do {s++;} while(*s != '"' && *s != '\0'); + do { + s++; + if(*s == '\\') { + s += 2; + } + } while(*s != '"' && *s != '\0'); //s++; tokens = append_fragment(tokens, tok_start, (s + 1 - tok_start)); } @@ -97,7 +192,9 @@ Sexpr* vals_parse(Sexpr* tokens) { } else if (*s == '"') { // issa string s[strlen(s) - 1] = '\0'; // replaces close quote with - next = from_string(s + 1); + char* escaped = escapify(s+1); + next = from_string(escaped); + free(escaped); } else { next = from_sym(s); -- 2.39.2