Post Scarcity
A prototype for a post scarcity programming environment
Loading...
Searching...
No Matches
read.c
Go to the documentation of this file.
1/*
2 * read.c
3 *
4 * First pass at a reader, for bootstrapping.
5 *
6 *
7 * (c) 2017 Simon Brooke <simon@journeyman.cc>
8 * Licensed under GPL version 2.0, or, at your option, any later version.
9 */
10
11#include <math.h>
12#include <stdbool.h>
13#include <stdio.h>
14/*
15 * wide characters
16 */
17#include <wchar.h>
18#include <wctype.h>
19
21#include "debug.h"
22#include "memory/dump.h"
23#include "memory/hashmap.h"
24#include "arith/integer.h"
25#include "ops/intern.h"
26#include "io/io.h"
27#include "ops/lispops.h"
28#include "arith/peano.h"
29#include "io/print.h"
30#include "arith/ratio.h"
31#include "io/read.h"
32#include "arith/real.h"
33#include "memory/vectorspace.h"
34
35// We can't, I think, use libreadline, because we read character by character,
36// not line by line, and because we use wide characters. So we're going to have
37// to reimplement it. So we're going to have to maintain history of the forms
38// (or strings, but I currently think forms). So we're going to have to be able
39// to detact special keys, particularly, at this stage, the uparrow and down-
40// arrow keys
41// #include <readline/readline.h>
42// #include <readline/history.h>
43
44
45/*
46 * for the time being things which may be read are:
47 * * strings
48 * * numbers - either integer, ratio or real
49 * * lists
50 * * maps
51 * * keywords
52 * * atoms
53 */
54
55struct cons_pointer read_number( struct stack_frame *frame,
56 struct cons_pointer frame_pointer,
57 URL_FILE * input, wint_t initial,
58 bool seen_period );
59struct cons_pointer read_list( struct stack_frame *frame,
60 struct cons_pointer frame_pointer,
61 struct cons_pointer env,
62 URL_FILE * input, wint_t initial );
63struct cons_pointer read_map( struct stack_frame *frame,
64 struct cons_pointer frame_pointer,
65 struct cons_pointer env,
66 URL_FILE * input, wint_t initial );
67struct cons_pointer read_string( URL_FILE * input, wint_t initial );
68struct cons_pointer read_symbol_or_key( URL_FILE * input, uint32_t tag,
69 wint_t initial );
70
71/**
72 * quote reader macro in C (!)
73 */
74struct cons_pointer c_quote( struct cons_pointer arg ) {
75 return make_cons( c_string_to_lisp_symbol( L"quote" ),
76 make_cons( arg, NIL ) );
77}
78
79/**
80 * Read a path macro from the stream. A path macro is expected to be
81 * 1. optionally a leading character such as '/' or '$', followed by
82 * 2. one or more keywords with leading colons (':') but no intervening spaces; or
83 * 3. one or more symbols separated by slashes; or
84 * 4. keywords (with leading colons) interspersed with symbols (prefixed by slashes).
85 */
86struct cons_pointer read_path( URL_FILE *input, wint_t initial,
87 struct cons_pointer q ) {
88 bool done = false;
89 struct cons_pointer prefix = NIL;
90
91 switch ( initial ) {
92 case '/':
93 prefix = make_cons( c_string_to_lisp_symbol( L"oblist" ), NIL );
94 break;
95 case '$':
96 case LSESSION:
97 prefix = c_string_to_lisp_symbol( L"session" );
98 break;
99 }
100
101 while ( !done ) {
102 wint_t c = url_fgetwc( input );
103 if ( iswblank( c ) || iswcntrl( c ) ) {
104 done = true;
105 } else if ( url_feof( input ) ) {
106 done = true;
107 } else {
108 switch ( c ) {
109 case ':':
111 ( input, KEYTV, url_fgetwc( input ) ), q );
112 break;
113 case '/':
115 ( c_string_to_lisp_symbol( L"quote" ),
117 ( input, SYMBOLTV,
118 url_fgetwc( input ) ),
119 NIL ) ), q );
120 break;
121 default:
122 if ( iswalpha( c ) ) {
124 ( input, SYMBOLTV, c ), q );
125 } else {
126 // TODO: it's really an error. Exception?
127 url_ungetwc( c, input );
128 done = true;
129 }
130 }
131 }
132 }
133
134 // right, we now have the path we want (reversed) in q.
135 struct cons_pointer r = NIL;
136
137 for ( struct cons_pointer p = q; !nilp( p ); p = c_cdr( p ) ) {
138 r = make_cons( c_car( p ), r );
139 }
140
141 dec_ref( q );
142
143 if ( !nilp( prefix ) ) {
144 r = make_cons( prefix, r );
145 }
146
147 return make_cons( c_string_to_lisp_symbol( L"->" ), r );
148}
149
150/**
151 * Read the next object on this input stream and return a cons_pointer to it,
152 * treating this initial character as the first character of the object
153 * representation.
154 */
156 struct cons_pointer frame_pointer,
157 struct cons_pointer env,
158 URL_FILE *input, wint_t initial ) {
159 debug_print( L"entering read_continuation\n", DEBUG_IO );
160 struct cons_pointer result = NIL;
161
162 wint_t c;
163
164 for ( c = initial;
165 c == '\0' || iswblank( c ) || iswcntrl( c );
166 c = url_fgetwc( input ) );
167
168 if ( url_feof( input ) ) {
169 result =
171 ( L"End of file while reading" ), frame_pointer );
172 } else {
173 switch ( c ) {
174 case ';':
175 for ( c = url_fgetwc( input ); c != '\n';
176 c = url_fgetwc( input ) );
177 /* skip all characters from semi-colon to the end of the line */
178 break;
179 case EOF:
181 ( L"End of input while reading" ),
182 frame_pointer );
183 break;
184 case '\'':
185 result =
187 ( frame, frame_pointer, env, input,
188 url_fgetwc( input ) ) );
189 break;
190 case '(':
191 result =
192 read_list( frame, frame_pointer, env, input,
193 url_fgetwc( input ) );
194 break;
195 case '{':
196 result = read_map( frame, frame_pointer, env, input,
197 url_fgetwc( input ) );
198 break;
199 case '"':
200 result = read_string( input, url_fgetwc( input ) );
201 break;
202 case '-':{
203 wint_t next = url_fgetwc( input );
204 url_ungetwc( next, input );
205 if ( iswdigit( next ) ) {
206 result =
207 read_number( frame, frame_pointer, input, c,
208 false );
209 } else {
210 result = read_symbol_or_key( input, SYMBOLTV, c );
211 }
212 }
213 break;
214 case '.':
215 {
216 wint_t next = url_fgetwc( input );
217 if ( iswdigit( next ) ) {
218 url_ungetwc( next, input );
219 result =
220 read_number( frame, frame_pointer, input, c,
221 true );
222 } else if ( iswblank( next ) ) {
223 /* dotted pair. \todo this isn't right, we
224 * really need to backtrack up a level. */
225 result =
226 read_continuation( frame, frame_pointer, env,
227 input, url_fgetwc( input ) );
229 ( L"read_continuation: dotted pair; read cdr ",
230 DEBUG_IO );
231 } else {
232 read_symbol_or_key( input, SYMBOLTV, c );
233 }
234 }
235 break;
236 case ':':
237 result =
238 read_symbol_or_key( input, KEYTV, url_fgetwc( input ) );
239 break;
240 case '/':
241 {
242 /* slash followed by whitespace is legit provided it's not
243 * preceded by anything - it's the division operator. Otherwise,
244 * it's terminal, probably part of a path, and needs pushed back.
245 */
246 wint_t cn = url_fgetwc( input );
247 if ( nilp( result )
248 && ( iswblank( cn ) || iswcntrl( cn ) ) ) {
249 url_ungetwc( cn, input );
250 result = make_symbol_or_key( c, NIL, SYMBOLTV );
251 } else {
252 url_ungetwc( cn, input );
253 result = read_path( input, c, NIL );
254 }
255 }
256 break;
257 case '$':
258 case LSESSION:
259 result = read_path( input, c, NIL );
260 break;
261 default:
262 if ( iswdigit( c ) ) {
263 result =
264 read_number( frame, frame_pointer, input, c, false );
265 } else if ( iswprint( c ) ) {
266 result = read_symbol_or_key( input, SYMBOLTV, c );
267 } else {
268 result =
270 ( L"Unrecognised start of input character" ),
271 make_string( c, NIL ) ),
272 frame_pointer );
273 }
274 break;
275 }
276 }
277 debug_print( L"read_continuation returning\n", DEBUG_IO );
278 debug_dump_object( result, DEBUG_IO );
279
280 return result;
281}
282
283/**
284 * read a number from this input stream, given this initial character.
285 * \todo Need to do a lot of inc_ref and dec_ref, to make sure the
286 * garbage is collected.
287 */
289 struct cons_pointer frame_pointer,
290 URL_FILE *input,
291 wint_t initial, bool seen_period ) {
292 debug_print( L"entering read_number\n", DEBUG_IO );
293
294 struct cons_pointer result = acquire_integer( 0, NIL );
295 /* \todo we really need to be getting `base` from a privileged Lisp name -
296 * and it should be the same privileged name we use when writing numbers */
297 struct cons_pointer base = acquire_integer( 10, NIL );
298 struct cons_pointer dividend = NIL;
299 int places_of_decimals = 0;
300 wint_t c;
301 bool neg = initial == btowc( '-' );
302
303 if ( neg ) {
304 initial = url_fgetwc( input );
305 }
306
307 debug_printf( DEBUG_IO, L"read_number starting '%c' (%d)\n", initial,
308 initial );
309
310 for ( c = initial; iswdigit( c )
311 || c == LPERIOD || c == LSLASH || c == LCOMMA;
312 c = url_fgetwc( input ) ) {
313 switch ( c ) {
314 case LPERIOD:
315 if ( seen_period || !nilp( dividend ) ) {
317 ( L"Malformed number: too many periods" ),
318 frame_pointer );
319 } else {
320 debug_print( L"read_number: decimal point seen\n",
321 DEBUG_IO );
322 seen_period = true;
323 }
324 break;
325 case LSLASH:
326 if ( seen_period || !nilp( dividend ) ) {
328 ( L"Malformed number: dividend of rational must be integer" ),
329 frame_pointer );
330 } else {
331 debug_print( L"read_number: ratio slash seen\n",
332 DEBUG_IO );
333 dividend = result;
334
335 result = acquire_integer( 0, NIL );
336 // If I do replace_integer_p here instead of acquire_integer,
337 // and thus reclaim the garbage, I get a regression. Dom't yet
338 // know why.
339 }
340 break;
341 case LCOMMA:
342 // silently ignore comma.
343 break;
344 default:
345 result = add_integers( multiply_integers( result, base ),
346 acquire_integer( ( int ) c -
347 ( int ) '0', NIL ) );
348
350 L"read_number: added character %c, result now ",
351 c );
352 debug_print_object( result, DEBUG_IO );
353 debug_print( L"\n", DEBUG_IO );
354
355 if ( seen_period ) {
356 places_of_decimals++;
357 }
358 }
359 }
360
361 /*
362 * push back the character read which was not a digit
363 */
364 url_ungetwc( c, input );
365
366 if ( seen_period ) {
367 debug_print( L"read_number: converting result to real\n", DEBUG_IO );
368 struct cons_pointer div = make_ratio( result,
369 acquire_integer( powl
371 ( base ),
372 places_of_decimals ),
373 NIL ), true);
374 inc_ref( div );
375
376 result = make_real( to_long_double( div ) );
377
378 dec_ref( div );
379 } else if ( integerp( dividend ) ) {
380 debug_print( L"read_number: converting result to ratio\n", DEBUG_IO );
381 result = make_ratio( dividend, result, true );
382 }
383
384 if ( neg ) {
385 debug_print( L"read_number: converting result to negative\n",
386 DEBUG_IO );
387
388 result = negative( result );
389 }
390
391 debug_print( L"read_number returning\n", DEBUG_IO );
392 debug_dump_object( result, DEBUG_IO );
393
394 return result;
395}
396
397/**
398 * Read a list from this input stream, which no longer contains the opening
399 * left parenthesis.
400 */
401struct cons_pointer read_list( struct stack_frame *frame,
402 struct cons_pointer frame_pointer,
403 struct cons_pointer env,
404 URL_FILE *input, wint_t initial ) {
405 struct cons_pointer result = NIL;
406 wint_t c;
407
408 if ( initial != ')' ) {
410 L"read_list starting '%C' (%d)\n", initial, initial );
411 struct cons_pointer car =
412 read_continuation( frame, frame_pointer, env, input,
413 initial );
414
415 /* skip whitespace */
416 for ( c = url_fgetwc( input );
417 iswblank( c ) || iswcntrl( c ); c = url_fgetwc( input ) );
418
419 if ( c == LPERIOD ) {
420 /* might be a dotted pair; indeed, if we rule out numbers with
421 * initial periods, it must be a dotted pair. \todo Ought to check,
422 * howerver, that there's only one form after the period. */
423 result =
424 make_cons( car,
425 c_car( read_list( frame,
426 frame_pointer,
427 env,
428 input, url_fgetwc( input ) ) ) );
429 } else {
430 result =
431 make_cons( car,
432 read_list( frame, frame_pointer, env, input, c ) );
433 }
434 } else {
435 debug_print( L"End of list detected\n", DEBUG_IO );
436 }
437
438 return result;
439}
440
441struct cons_pointer read_map( struct stack_frame *frame,
442 struct cons_pointer frame_pointer,
443 struct cons_pointer env,
444 URL_FILE *input, wint_t initial ) {
445 // set write ACL to true whilst creating to prevent GC churn
446 struct cons_pointer result =
448 wint_t c = initial;
449
450 while ( c != LCBRACE ) {
451 struct cons_pointer key =
452 read_continuation( frame, frame_pointer, env, input, c );
453
454 /* skip whitespace */
455 for ( c = url_fgetwc( input ); iswblank( c ) || iswcntrl( c );
456 c = url_fgetwc( input ) );
457
458 struct cons_pointer value =
459 read_continuation( frame, frame_pointer, env, input, c );
460
461 /* skip commaa and whitespace at this point. */
462 for ( c = url_fgetwc( input );
463 c == LCOMMA || iswblank( c ) || iswcntrl( c );
464 c = url_fgetwc( input ) );
465
466 result =
467 hashmap_put( result, key,
468 eval_form( frame, frame_pointer, value, env ) );
469 }
470
471 // default write ACL for maps should be NIL.
472 pointer_to_vso( result )->payload.hashmap.write_acl = NIL;
473
474 return result;
475}
476
477/**
478 * Read a string. This means either a string delimited by double quotes
479 * (is_quoted == true), in which case it may contain whitespace but may
480 * not contain a double quote character (unless escaped), or one not
481 * so delimited in which case it may not contain whitespace (unless escaped)
482 * but may contain a double quote character (probably not a good idea!)
483 */
484struct cons_pointer read_string( URL_FILE *input, wint_t initial ) {
485 struct cons_pointer cdr = NIL;
486 struct cons_pointer result;
487 switch ( initial ) {
488 case '\0':
489 result = NIL;
490 break;
491 case '"':
492 /* making a string of the null character means we can have an empty
493 * string. Just returning NIL here would make an empty string
494 * impossible. */
495 result = make_string( '\0', NIL );
496 break;
497 default:
498 result =
499 make_string( initial,
500 read_string( input, url_fgetwc( input ) ) );
501 break;
502 }
503
504 return result;
505}
506
507struct cons_pointer read_symbol_or_key( URL_FILE *input, uint32_t tag,
508 wint_t initial ) {
509 struct cons_pointer cdr = NIL;
510 struct cons_pointer result;
511 switch ( initial ) {
512 case '\0':
513 result = make_symbol_or_key( initial, NIL, tag );
514 break;
515 case '"':
516 case '\'':
517 /* unwise to allow embedded quotation marks in symbols */
518 case ')':
519 case ':':
520 case '/':
521 /*
522 * symbols and keywords may not include right-parenthesis,
523 * slashes or colons.
524 */
525 result = NIL;
526 /*
527 * push back the character read
528 */
529 url_ungetwc( initial, input );
530 break;
531 default:
532 if ( iswprint( initial )
533 && !iswblank( initial ) ) {
534 result =
535 make_symbol_or_key( initial,
536 read_symbol_or_key( input,
537 tag,
539 ( input ) ), tag );
540 } else {
541 result = NIL;
542 /*
543 * push back the character read
544 */
545 url_ungetwc( initial, input );
546 }
547 break;
548 }
549
550 debug_print( L"read_symbol_or_key returning\n", DEBUG_IO );
551 debug_dump_object( result, DEBUG_IO );
552
553 return result;
554}
555
556/**
557 * Read the next object on this input stream and return a cons_pointer to it.
558 */
559struct cons_pointer read( struct
561 *frame, struct cons_pointer frame_pointer,
562 struct cons_pointer env, URL_FILE *input ) {
563 return read_continuation( frame, frame_pointer, env, input,
564 url_fgetwc( input ) );
565}
struct cons_pointer make_symbol_or_key(wint_t c, struct cons_pointer tail, uint32_t tag)
Construct a symbol or keyword from the character c and this tail.
struct cons_pointer make_string(wint_t c, struct cons_pointer tail)
Construct a string from the character c and this tail.
struct cons_pointer c_string_to_lisp_string(wchar_t *string)
Return a lisp string representation of this wide character string.
struct cons_pointer inc_ref(struct cons_pointer pointer)
increment the reference count of the object at this cons pointer.
struct cons_pointer c_car(struct cons_pointer arg)
Implementation of car in C.
struct cons_pointer dec_ref(struct cons_pointer pointer)
Decrement the reference count of the object at this cons pointer.
struct cons_pointer make_cons(struct cons_pointer car, struct cons_pointer cdr)
Construct a cons cell from this pair of pointers.
#define KEYTV
The string KEYW, considered as an unsigned int.
#define SYMBOLTV
The string SYMB, considered as an unsigned int.
#define NIL
a cons pointer which points to the special NIL cell
struct cons_pointer c_cdr(struct cons_pointer arg)
Implementation of cdr in C.
#define nilp(conspoint)
true if conspoint points to the special cell NIL, else false (there should only be one of these so it...
#define TRUE
a cons pointer which points to the special T cell
struct cons_pointer c_string_to_lisp_symbol(wchar_t *symbol)
Return a lisp symbol representation of this wide character string.
struct cons_pointer c_car(struct cons_pointer arg)
Implementation of car in C.
struct cons_pointer dec_ref(struct cons_pointer pointer)
Decrement the reference count of the object at this cons pointer.
#define integerp(conspoint)
true if conspoint points to an integer cell, else false
struct cons_pointer make_cons(struct cons_pointer car, struct cons_pointer cdr)
Construct a cons cell from this pair of pointers.
An indirect pointer to a cons cell.
A stack frame.
void debug_dump_object(struct cons_pointer pointer, int level)
Like dump_object, q.v., but protected by the verbosity mechanism.
Definition debug.c:136
void debug_printf(int level, wchar_t *format,...)
wprintf adapted for the debug logging system.
Definition debug.c:101
void debug_print(wchar_t *message, int level)
print this debug message to stderr, if verbosity matches level.
Definition debug.c:41
void debug_print_object(struct cons_pointer pointer, int level)
print the object indicated by this pointer to stderr, if verbosity matches level.
Definition debug.c:119
#define DEBUG_IO
Print messages debugging input/output operations.
Definition debug.h:56
int url_feof(URL_FILE *file)
Definition fopen.c:286
#define DFLT_HASHMAP_BUCKETS
Definition hashmap.h:18
struct cons_pointer multiply_integers(struct cons_pointer a, struct cons_pointer b)
Return a pointer to an integer representing the product of the integers pointed to by a and b.
Definition integer.c:343
struct cons_pointer add_integers(struct cons_pointer a, struct cons_pointer b)
Return a pointer to an integer representing the sum of the integers pointed to by a and b.
Definition integer.c:224
struct cons_pointer acquire_integer(int64_t value, struct cons_pointer more)
Supply small valued integers from the small integer cache, if available.
Definition integer.c:129
struct cons_pointer hashmap_put(struct cons_pointer mapp, struct cons_pointer key, struct cons_pointer val)
Store this val as the value of this key in this hashmap mapp.
Definition intern.c:385
struct cons_pointer make_hashmap(uint32_t n_buckets, struct cons_pointer hash_fn, struct cons_pointer write_acl)
Make a hashmap with this number of buckets, using this hash_fn.
Definition intern.c:137
wint_t url_ungetwc(wint_t wc, URL_FILE *input)
Definition io.c:220
wint_t url_fgetwc(URL_FILE *input)
get one wide character from the buffer.
Definition io.c:151
struct cons_pointer throw_exception(struct cons_pointer message, struct cons_pointer frame_pointer)
Throw an exception.
Definition lispops.c:1208
struct cons_pointer eval_form(struct stack_frame *parent, struct cons_pointer parent_pointer, struct cons_pointer form, struct cons_pointer env)
Useful building block; evaluate this single form in the context of this parent stack frame and this e...
Definition lispops.c:64
struct cons_pointer negative(struct cons_pointer arg)
return a cons_pointer indicating a number which is the 0 - the number indicated by arg.
Definition peano.c:489
long double to_long_double(struct cons_pointer arg)
Return the closest possible binary64 representation to the value of this arg, expected to be an integ...
Definition peano.c:124
struct cons_pointer make_ratio(struct cons_pointer dividend, struct cons_pointer divisor, bool simplify)
Construct a ratio frame from this dividend and divisor, expected to be integers, in the context of th...
Definition ratio.c:312
struct cons_pointer c_quote(struct cons_pointer arg)
quote reader macro in C (!)
Definition read.c:74
struct cons_pointer read_string(URL_FILE *input, wint_t initial)
Read a string.
Definition read.c:484
struct cons_pointer read_number(struct stack_frame *frame, struct cons_pointer frame_pointer, URL_FILE *input, wint_t initial, bool seen_period)
read a number from this input stream, given this initial character.
Definition read.c:288
struct cons_pointer read_path(URL_FILE *input, wint_t initial, struct cons_pointer q)
Read a path macro from the stream.
Definition read.c:86
struct cons_pointer read_continuation(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input, wint_t initial)
Read the next object on this input stream and return a cons_pointer to it, treating this initial char...
Definition read.c:155
struct cons_pointer read_list(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input, wint_t initial)
Read a list from this input stream, which no longer contains the opening left parenthesis.
Definition read.c:401
struct cons_pointer read(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input)
Read the next object on this input stream and return a cons_pointer to it.
Definition read.c:559
struct cons_pointer read_symbol_or_key(URL_FILE *input, uint32_t tag, wint_t initial)
Definition read.c:507
struct cons_pointer read_map(struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env, URL_FILE *input, wint_t initial)
Definition read.c:441
#define LCOMMA
read.c
Definition read.h:17
#define LCBRACE
Definition read.h:21
#define LPERIOD
Definition read.h:18
#define LSLASH
Definition read.h:19
#define LSESSION
Definition read.h:23
struct cons_pointer make_real(long double value)
Allocate a real number cell representing this value and return a cons pointer to it.
Definition real.c:21
#define pointer_to_vso(pointer)
given a pointer to a vector space object, return the object.
Definition vectorspace.h:55