Post Scarcity 0.0.6
A prototype for a post scarcity programming environment
Loading...
Searching...
No Matches
read.c
Go to the documentation of this file.
1/*
2 * read.c
3 *
4 * First pass at a reader, for bootstrapping.
5 *
6 *
7 * (c) 2017 Simon Brooke <simon@journeyman.cc>
8 * Licensed under GPL version 2.0, or, at your option, any later version.
9 */
10
11#include <math.h>
12#include <stdbool.h>
13#include <stdio.h>
14/*
15 * wide characters
16 */
17#include <wchar.h>
18#include <wctype.h>
19
21#include "debug.h"
22#include "memory/dump.h"
23#include "memory/hashmap.h"
24#include "arith/integer.h"
25#include "ops/intern.h"
26#include "io/io.h"
27#include "ops/lispops.h"
28#include "arith/peano.h"
29#include "io/print.h"
30#include "arith/ratio.h"
31#include "io/read.h"
32#include "arith/real.h"
33#include "memory/vectorspace.h"
34
35// We can't, I think, use libreadline, because we read character by character,
36// not line by line, and because we use wide characters. So we're going to have
37// to reimplement it. So we're going to have to maintain history of the forms
38// (or strings, but I currently think forms). So we're going to have to be able
39// to detact special keys, particularly, at this stage, the uparrow and down-
40// arrow keys
41// #include <readline/readline.h>
42// #include <readline/history.h>
43
44
45/*
46 * for the time being things which may be read are:
47 * * strings
48 * * numbers - either integer, ratio or real
49 * * lists
50 * * maps
51 * * keywords
52 * * atoms
53 */
54
55struct cons_pointer read_number( struct stack_frame *frame,
56 struct cons_pointer frame_pointer,
57 URL_FILE * input, wint_t initial,
58 bool seen_period );
59struct cons_pointer read_list( struct stack_frame *frame,
60 struct cons_pointer frame_pointer,
61 struct cons_pointer env,
62 URL_FILE * input, wint_t initial );
63struct cons_pointer read_map( struct stack_frame *frame,
64 struct cons_pointer frame_pointer,
65 struct cons_pointer env,
66 URL_FILE * input, wint_t initial );
67struct cons_pointer read_string( URL_FILE * input, wint_t initial );
68struct cons_pointer read_symbol_or_key( URL_FILE * input, uint32_t tag,
69 wint_t initial );
70
71/**
72 * quote reader macro in C (!)
73 */
74struct cons_pointer c_quote( struct cons_pointer arg ) {
75 return make_cons( c_string_to_lisp_symbol( L"quote" ),
76 make_cons( arg, NIL ) );
77}
78
79/**
80 * Read a path macro from the stream. A path macro is expected to be
81 * 1. optionally a leading character such as '/' or '$', followed by
82 * 2. one or more keywords with leading colons (':') but no intervening spaces; or
83 * 3. one or more symbols separated by slashes; or
84 * 4. keywords (with leading colons) interspersed with symbols (prefixed by slashes).
85 */
86struct cons_pointer read_path( URL_FILE *input, wint_t initial,
87 struct cons_pointer q ) {
88 bool done = false;
89 struct cons_pointer prefix = NIL;
90
91 switch ( initial ) {
92 case '/':
93 prefix = make_cons( c_string_to_lisp_symbol( L"oblist" ), NIL );
94 break;
95 case '$':
96 case LSESSION:
97 prefix = c_string_to_lisp_symbol( L"session" );
98 break;
99 }
100
101 while ( !done ) {
102 wint_t c = url_fgetwc( input );
103 if ( iswblank( c ) || iswcntrl( c ) ) {
104 done = true;
105 } else if ( url_feof( input ) ) {
106 done = true;
107 } else {
108 switch ( c ) {
109 case ':':
111 ( input, KEYTV, url_fgetwc( input ) ), q );
112 break;
113 case '/':
115 ( c_string_to_lisp_symbol( L"quote" ),
117 ( input, SYMBOLTV,
118 url_fgetwc( input ) ),
119 NIL ) ), q );
120 break;
121 default:
122 if ( iswalpha( c ) ) {
124 ( input, SYMBOLTV, c ), q );
125 } else {
126 // TODO: it's really an error. Exception?
127 url_ungetwc( c, input );
128 done = true;
129 }
130 }
131 }
132 }
133
134 // right, we now have the path we want (reversed) in q.
135 struct cons_pointer r = NIL;
136
137 for ( struct cons_pointer p = q; !nilp( p ); p = c_cdr( p ) ) {
138 r = make_cons( c_car( p ), r );
139 }
140
141 dec_ref( q );
142
143 if ( !nilp( prefix ) ) {
144 r = make_cons( prefix, r );
145 }
146
147 return make_cons( c_string_to_lisp_symbol( L"->" ), r );
148}
149
150/**
151 * Read the next object on this input stream and return a cons_pointer to it,
152 * treating this initial character as the first character of the object
153 * representation.
154 */
156 struct cons_pointer frame_pointer,
157 struct cons_pointer env,
158 URL_FILE *input, wint_t initial ) {
159 debug_print( L"entering read_continuation\n", DEBUG_IO );
160 struct cons_pointer result = NIL;
161
162 wint_t c;
163
164 for ( c = initial;
165 c == '\0' || iswblank( c ) || iswcntrl( c );
166 c = url_fgetwc( input ) );
167
168 if ( url_feof( input ) ) {
169 result =
172 ( L"End of file while reading" ), frame_pointer );
173 } else {
174 switch ( c ) {
175 case ';':
176 for ( c = url_fgetwc( input ); c != '\n';
177 c = url_fgetwc( input ) );
178 /* skip all characters from semi-colon to the end of the line */
179 break;
180 case EOF:
181 result = throw_exception( c_string_to_lisp_symbol( L"read" ),
183 ( L"End of input while reading" ),
184 frame_pointer );
185 break;
186 case '\'':
187 result =
189 ( frame, frame_pointer, env, input,
190 url_fgetwc( input ) ) );
191 break;
192 case '(':
193 result =
194 read_list( frame, frame_pointer, env, input,
195 url_fgetwc( input ) );
196 break;
197 case '{':
198 result = read_map( frame, frame_pointer, env, input,
199 url_fgetwc( input ) );
200 break;
201 case '"':
202 result = read_string( input, url_fgetwc( input ) );
203 break;
204 case '-':{
205 wint_t next = url_fgetwc( input );
206 url_ungetwc( next, input );
207 if ( iswdigit( next ) ) {
208 result =
209 read_number( frame, frame_pointer, input, c,
210 false );
211 } else {
212 result = read_symbol_or_key( input, SYMBOLTV, c );
213 }
214 }
215 break;
216 case '.':
217 {
218 wint_t next = url_fgetwc( input );
219 if ( iswdigit( next ) ) {
220 url_ungetwc( next, input );
221 result =
222 read_number( frame, frame_pointer, input, c,
223 true );
224 } else if ( iswblank( next ) ) {
225 /* dotted pair. \todo this isn't right, we
226 * really need to backtrack up a level. */
227 result =
228 read_continuation( frame, frame_pointer, env,
229 input, url_fgetwc( input ) );
231 ( L"read_continuation: dotted pair; read cdr ",
232 DEBUG_IO );
233 } else {
234 read_symbol_or_key( input, SYMBOLTV, c );
235 }
236 }
237 break;
238 case ':':
239 result =
240 read_symbol_or_key( input, KEYTV, url_fgetwc( input ) );
241 break;
242 case '/':
243 {
244 /* slash followed by whitespace is legit provided it's not
245 * preceded by anything - it's the division operator. Otherwise,
246 * it's terminal, probably part of a path, and needs pushed back.
247 */
248 wint_t cn = url_fgetwc( input );
249 if ( nilp( result )
250 && ( iswblank( cn ) || iswcntrl( cn ) ) ) {
251 url_ungetwc( cn, input );
252 result = make_symbol_or_key( c, NIL, SYMBOLTV );
253 } else {
254 url_ungetwc( cn, input );
255 result = read_path( input, c, NIL );
256 }
257 }
258 break;
259 case '$':
260 case LSESSION:
261 result = read_path( input, c, NIL );
262 break;
263 default:
264 if ( iswdigit( c ) ) {
265 result =
266 read_number( frame, frame_pointer, input, c, false );
267 } else if ( iswprint( c ) ) {
268 result = read_symbol_or_key( input, SYMBOLTV, c );
269 } else {
270 result =
273 ( L"Unrecognised start of input character" ),
274 make_string( c, NIL ) ),
275 frame_pointer );
276 }
277 break;
278 }
279 }
280 debug_print( L"read_continuation returning\n", DEBUG_IO );
281 debug_dump_object( result, DEBUG_IO );
282
283 return result;
284}
285
286/**
287 * read a number from this input stream, given this initial character.
288 * \todo Need to do a lot of inc_ref and dec_ref, to make sure the
289 * garbage is collected.
290 */
292 struct cons_pointer frame_pointer,
293 URL_FILE *input,
294 wint_t initial, bool seen_period ) {
295 debug_print( L"entering read_number\n", DEBUG_IO );
296
297 struct cons_pointer result = acquire_integer( 0, NIL );
298 /* \todo we really need to be getting `base` from a privileged Lisp name -
299 * and it should be the same privileged name we use when writing numbers */
300 struct cons_pointer base = acquire_integer( 10, NIL );
301 struct cons_pointer dividend = NIL;
302 int places_of_decimals = 0;
303 wint_t c;
304 bool neg = initial == btowc( '-' );
305
306 if ( neg ) {
307 initial = url_fgetwc( input );
308 }
309
310 debug_printf( DEBUG_IO, L"read_number starting '%c' (%d)\n", initial,
311 initial );
312
313 for ( c = initial; iswdigit( c )
314 || c == LPERIOD || c == LSLASH || c == LCOMMA;
315 c = url_fgetwc( input ) ) {
316 switch ( c ) {
317 case LPERIOD:
318 if ( seen_period || !nilp( dividend ) ) {
319 return throw_exception( c_string_to_lisp_symbol( L"read" ),
321 ( L"Malformed number: too many periods" ),
322 frame_pointer );
323 } else {
324 debug_print( L"read_number: decimal point seen\n",
325 DEBUG_IO );
326 seen_period = true;
327 }
328 break;
329 case LSLASH:
330 if ( seen_period || !nilp( dividend ) ) {
331 return throw_exception( c_string_to_lisp_symbol( L"read" ),
333 ( L"Malformed number: dividend of rational must be integer" ),
334 frame_pointer );
335 } else {
336 debug_print( L"read_number: ratio slash seen\n",
337 DEBUG_IO );
338 dividend = result;
339
340 result = acquire_integer( 0, NIL );
341 // If I do replace_integer_p here instead of acquire_integer,
342 // and thus reclaim the garbage, I get a regression. Dom't yet
343 // know why.
344 }
345 break;
346 case LCOMMA:
347 // silently ignore comma.
348 break;
349 default:
350 result = add_integers( multiply_integers( result, base ),
351 acquire_integer( ( int ) c -
352 ( int ) '0', NIL ) );
353
355 L"read_number: added character %c, result now ",
356 c );
357 debug_print_object( result, DEBUG_IO );
358 debug_print( L"\n", DEBUG_IO );
359
360 if ( seen_period ) {
361 places_of_decimals++;
362 }
363 }
364 }
365
366 /*
367 * push back the character read which was not a digit
368 */
369 url_ungetwc( c, input );
370
371 if ( seen_period ) {
372 debug_print( L"read_number: converting result to real\n", DEBUG_IO );
373 struct cons_pointer div = make_ratio( result,
374 acquire_integer( powl
376 ( base ),
377 places_of_decimals ),
378 NIL ), true );
379 inc_ref( div );
380
381 result = make_real( to_long_double( div ) );
382
383 dec_ref( div );
384 } else if ( integerp( dividend ) ) {
385 debug_print( L"read_number: converting result to ratio\n", DEBUG_IO );
386 result = make_ratio( dividend, result, true );
387 }
388
389 if ( neg ) {
390 debug_print( L"read_number: converting result to negative\n",
391 DEBUG_IO );
392
393 result = negative( result );
394 }
395
396 debug_print( L"read_number returning\n", DEBUG_IO );
397 debug_dump_object( result, DEBUG_IO );
398
399 return result;
400}
401
402/**
403 * Read a list from this input stream, which no longer contains the opening
404 * left parenthesis.
405 */
406struct cons_pointer read_list( struct stack_frame *frame,
407 struct cons_pointer frame_pointer,
408 struct cons_pointer env,
409 URL_FILE *input, wint_t initial ) {
410 struct cons_pointer result = NIL;
411 wint_t c;
412
413 if ( initial != ')' ) {
415 L"read_list starting '%C' (%d)\n", initial, initial );
416 struct cons_pointer car =
417 read_continuation( frame, frame_pointer, env, input,
418 initial );
419
420 /* skip whitespace */
421 for ( c = url_fgetwc( input );
422 iswblank( c ) || iswcntrl( c ); c = url_fgetwc( input ) );
423
424 if ( c == LPERIOD ) {
425 /* might be a dotted pair; indeed, if we rule out numbers with
426 * initial periods, it must be a dotted pair. \todo Ought to check,
427 * howerver, that there's only one form after the period. */
428 result =
429 make_cons( car,
430 c_car( read_list( frame,
431 frame_pointer,
432 env,
433 input, url_fgetwc( input ) ) ) );
434 } else {
435 result =
436 make_cons( car,
437 read_list( frame, frame_pointer, env, input, c ) );
438 }
439 } else {
440 debug_print( L"End of list detected\n", DEBUG_IO );
441 }
442
443 return result;
444}
445
446struct cons_pointer read_map( struct stack_frame *frame,
447 struct cons_pointer frame_pointer,
448 struct cons_pointer env,
449 URL_FILE *input, wint_t initial ) {
450 // set write ACL to true whilst creating to prevent GC churn
451 struct cons_pointer result =
453 wint_t c = initial;
454
455 while ( c != LCBRACE ) {
456 struct cons_pointer key =
457 read_continuation( frame, frame_pointer, env, input, c );
458
459 /* skip whitespace */
460 for ( c = url_fgetwc( input ); iswblank( c ) || iswcntrl( c );
461 c = url_fgetwc( input ) );
462
463 struct cons_pointer value =
464 read_continuation( frame, frame_pointer, env, input, c );
465
466 /* skip commaa and whitespace at this point. */
467 for ( c = url_fgetwc( input );
468 c == LCOMMA || iswblank( c ) || iswcntrl( c );
469 c = url_fgetwc( input ) );
470
471 result =
472 hashmap_put( result, key,
473 eval_form( frame, frame_pointer, value, env ) );
474 }
475
476 // default write ACL for maps should be NIL.
477 pointer_to_vso( result )->payload.hashmap.write_acl = NIL;
478
479 return result;
480}
481
482/**
483 * Read a string. This means either a string delimited by double quotes
484 * (is_quoted == true), in which case it may contain whitespace but may
485 * not contain a double quote character (unless escaped), or one not
486 * so delimited in which case it may not contain whitespace (unless escaped)
487 * but may contain a double quote character (probably not a good idea!)
488 */
489struct cons_pointer read_string( URL_FILE *input, wint_t initial ) {
490 struct cons_pointer cdr = NIL;
491 struct cons_pointer result;
492 switch ( initial ) {
493 case '\0':
494 result = NIL;
495 break;
496 case '"':
497 /* making a string of the null character means we can have an empty
498 * string. Just returning NIL here would make an empty string
499 * impossible. */
500 result = make_string( '\0', NIL );
501 break;
502 default:
503 result =
504 make_string( initial,
505 read_string( input, url_fgetwc( input ) ) );
506 break;
507 }
508
509 return result;
510}
511
512struct cons_pointer read_symbol_or_key( URL_FILE *input, uint32_t tag,
513 wint_t initial ) {
514 struct cons_pointer cdr = NIL;
515 struct cons_pointer result;
516 switch ( initial ) {
517 case '\0':
518 result = make_symbol_or_key( initial, NIL, tag );
519 break;
520 case '"':
521 case '\'':
522 /* unwise to allow embedded quotation marks in symbols */
523 case ')':
524 case ':':
525 case '/':
526 /*
527 * symbols and keywords may not include right-parenthesis,
528 * slashes or colons.
529 */
530 result = NIL;
531 /*
532 * push back the character read
533 */
534 url_ungetwc( initial, input );
535 break;
536 default:
537 if ( iswprint( initial )
538 && !iswblank( initial ) ) {
539 result =
540 make_symbol_or_key( initial,
541 read_symbol_or_key( input,
542 tag,
544 ( input ) ), tag );
545 } else {
546 result = NIL;
547 /*
548 * push back the character read
549 */
550 url_ungetwc( initial, input );
551 }
552 break;
553 }
554
555 debug_print( L"read_symbol_or_key returning\n", DEBUG_IO );
556 debug_dump_object( result, DEBUG_IO );
557
558 return result;
559}
560
561/**
562 * Read the next object on this input stream and return a cons_pointer to it.
563 */
564struct cons_pointer read( struct
566 *frame, struct cons_pointer frame_pointer,
567 struct cons_pointer env, URL_FILE *input ) {
568 return read_continuation( frame, frame_pointer, env, input,
569 url_fgetwc( input ) );
570}
struct cons_pointer make_symbol_or_key(wint_t c, struct cons_pointer tail, uint32_t tag)
Construct a symbol or keyword from the character c and this tail.
struct cons_pointer make_string(wint_t c, struct cons_pointer tail)
Construct a string from the character c and this tail.
struct cons_pointer c_string_to_lisp_string(wchar_t *string)
Return a lisp string representation of this wide character string.
struct cons_pointer inc_ref(struct cons_pointer pointer)
increment the reference count of the object at this cons pointer.
struct cons_pointer c_string_to_lisp_symbol(wchar_t *symbol)
Return a lisp symbol representation of this wide character string.
struct cons_pointer c_car(struct cons_pointer arg)
Implementation of car in C.
struct cons_pointer dec_ref(struct cons_pointer pointer)
Decrement the reference count of the object at this cons pointer.
struct cons_pointer make_cons(struct cons_pointer car, struct cons_pointer cdr)
Construct a cons cell from this pair of pointers.
#define KEYTV
The string KEYW, considered as an unsigned int.
#define SYMBOLTV
The string SYMB, considered as an unsigned int.
#define NIL
a cons pointer which points to the special NIL cell
struct cons_pointer c_cdr(struct cons_pointer arg)
Implementation of cdr in C.
#define nilp(conspoint)
true if conspoint points to the special cell NIL, else false (there should only be one of these so it...
#define TRUE
a cons pointer which points to the special T cell
struct cons_pointer c_string_to_lisp_symbol(wchar_t *symbol)
Return a lisp symbol representation of this wide character string.
struct cons_pointer c_car(struct cons_pointer arg)
Implementation of car in C.
struct cons_pointer dec_ref(struct cons_pointer pointer)
Decrement the reference count of the object at this cons pointer.
#define integerp(conspoint)
true if conspoint points to an integer cell, else false
struct cons_pointer make_cons(struct cons_pointer car, struct cons_pointer cdr)
Construct a cons cell from this pair of pointers.
An indirect pointer to a cons cell.
A stack frame.
void debug_dump_object(struct cons_pointer pointer, int level)
Like dump_object, q.v., but protected by the verbosity mechanism.
Definition debug.c:155
void debug_printf(int level, wchar_t *format,...)
wprintf adapted for the debug logging system.
Definition debug.c:120
void debug_print(wchar_t *message, int level)
print this debug message to stderr, if verbosity matches level.
Definition debug.c:60
void debug_print_object(struct cons_pointer pointer, int level)
print the object indicated by this pointer to stderr, if verbosity matches level.
Definition debug.c:138
#define DEBUG_IO
Print messages debugging input/output operations.
Definition debug.h:59
int url_feof(URL_FILE *file)
Definition fopen.c:286
#define DFLT_HASHMAP_BUCKETS
Definition hashmap.h:18
struct cons_pointer multiply_integers(struct cons_pointer a, struct cons_pointer b)
Return a pointer to an integer representing the product of the integers pointed to by a and b.
Definition integer.c:328
struct cons_pointer add_integers(struct cons_pointer a, struct cons_pointer b)
Return a pointer to an integer representing the sum of the integers pointed to by a and b.
Definition integer.c:224
struct cons_pointer acquire_integer(int64_t value, struct cons_pointer more)
Supply small valued integers from the small integer cache, if available.
Definition integer.c:129
struct cons_pointer hashmap_put(struct cons_pointer mapp, struct cons_pointer key, struct cons_pointer val)
Store this val as the value of this key in this hashmap mapp.
Definition intern.c:488
struct cons_pointer make_hashmap(uint32_t n_buckets, struct cons_pointer hash_fn, struct cons_pointer write_acl)
Make a hashmap with this number of buckets, using this hash_fn.
Definition intern.c:138
wint_t url_ungetwc(wint_t wc, URL_FILE *input)
Definition io.c:220
wint_t url_fgetwc(URL_FILE *input)
get one wide character from the buffer.
Definition io.c:151
struct cons_pointer eval_form(struct stack_frame *parent, struct cons_pointer parent_pointer, struct cons_pointer form, struct cons_pointer env)
Useful building block; evaluate this single form in the context of this parent stack frame and this e...
Definition lispops.c:65
struct cons_pointer throw_exception(struct cons_pointer location, struct cons_pointer payload, struct cons_pointer frame_pointer)
Throw an exception.
Definition lispops.c:1396
struct cons_pointer negative(struct cons_pointer arg)
return a cons_pointer indicating a number which is the 0 - the number indicated by arg.
Definition peano.c:538
long double to_long_double(struct cons_pointer arg)
Return the closest possible binary64 representation to the value of this arg, expected to be an integ...
Definition peano.c:165
struct cons_pointer make_ratio(struct cons_pointer dividend, struct cons_pointer divisor, bool simplify)
Construct a ratio frame from this dividend and divisor, expected to be integers, in the context of th...
Definition ratio.c:317
struct cons_pointer c_quote(struct cons_pointer arg)
quote reader macro in C (!)
Definition read.c:74
struct cons_pointer read_string(URL_FILE *input, wint_t initial)
Read a string.
Definition read.c:489
struct cons_pointer read_number(struct stack_frame *frame, struct cons_pointer frame_pointer, URL_FILE *input, wint_t initial, bool seen_period)
read a number from this input stream, given this initial character.
Definition read.c:291
struct cons_pointer read_path(URL_FILE *input, wint_t initial, struct cons_pointer q)
Read a path macro from the stream.
Definition read.c:86
struct cons_pointer read_continuation(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input, wint_t initial)
Read the next object on this input stream and return a cons_pointer to it, treating this initial char...
Definition read.c:155
struct cons_pointer read_list(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input, wint_t initial)
Read a list from this input stream, which no longer contains the opening left parenthesis.
Definition read.c:406
struct cons_pointer read(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input)
Read the next object on this input stream and return a cons_pointer to it.
Definition read.c:564
struct cons_pointer read_symbol_or_key(URL_FILE *input, uint32_t tag, wint_t initial)
Definition read.c:512
struct cons_pointer read_map(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input, wint_t initial)
Definition read.c:446
#define LCOMMA
read.c
Definition read.h:17
#define LCBRACE
Definition read.h:21
#define LPERIOD
Definition read.h:18
#define LSLASH
Definition read.h:19
#define LSESSION
Definition read.h:23
struct cons_pointer make_real(long double value)
Allocate a real number cell representing this value and return a cons pointer to it.
Definition real.c:21
#define pointer_to_vso(pointer)
given a pointer to a vector space object, return the object.
Definition vectorspace.h:55