read.c 19.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 * Cherry programming language
 * Copyright (C) 2013 Christoph Mueller
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
18
#include "cherry.h"
19

20
#include "cherry/unicode.h"
21
#include "cherry/array.h"
22
23
24
25
26
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
27
#include <gc.h>
28

29
struct org_cherry_context*   
30
org_cherry_context(const cy_byte_t* source, const char* filename, cy_flags_t flags)
31
{
32
	struct org_cherry_context* context = GC_MALLOC(sizeof(struct org_cherry_context));
33
34
	context->filename = filename;
	context->begin = source;
35
	context->src = (cy_byte_t*) source;
36
	context->line = 1;
37
	context->buffer = org_cherry_array_new(sizeof(cy_byte_t));
38
	context->flags = flags;
39
40
41
42
43

	return context;
}


44
struct org_cherry_context*   
45
org_cherry_context_repl(const cy_byte_t* source)
46
{
47
	return org_cherry_context(source, 0, CY_SUPRESS_COMMENTS);
48
49
50
}


51
52
53
54
55
56
57
58
59
60
void
org_cherry_context_repl_set_source(struct org_cherry_context* c, const cy_byte_t* source)
{
	assert(c != 0);
	c->begin = source;
	c->src = source;
	c->line++;
}


61
void                
62
org_cherry_error(struct org_cherry_context* context, const char* format, ...)
63
64
65
66
{
	va_list args;
	va_start(args, format);

67
	if(context->filename != 0)
68
		fprintf(stderr, "ERROR (%s:%d)", context->filename, context->line); 
69
	else
70
		fprintf(stderr, "ERROR (console:%d)", context->line);
71

72
	fprintf(stderr, " --- ");
73
	vfprintf(stderr, format, args);
74
	fprintf(stderr, "\n");
75
76
77
78

	va_end(args);
}

Chris Müller's avatar
Chris Müller committed
79
80

struct Mapping {
81
	cy_byte_t* string;
82
	enum org_cherry_tok value;
Chris Müller's avatar
Chris Müller committed
83
84
85
86
};



87
const cy_byte_t*      
88
org_cherry_tok_to_string(enum org_cherry_tok token)
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
{
	switch(token) {
		case TOK_EOF:
			return "EOF";
		case TOK_COMMENT:
			return "COMMENT";
		case TOK_ROUNDLEFTBRACE:
			return "(";
		case TOK_ROUNDRIGHTBRACE:
			return ")";
		case TOK_SQUARELEFTBRACE:
			return "[";
		case TOK_SQUARERIGHTBRACE:
			return "]";
		case TOK_STRING:
			return "STRING";
		case TOK_DOT:
			return ".";
		case TOK_CHAR:
			return "CHAR";
		case TOK_HEX:
			return "HEX";
		case TOK_DEC:
			return "DEC";
		case TOK_OCT:
			return "OCT";
		case TOK_BIN:
			return "BIN";
		case TOK_FLOAT:
			return "FLOAT";
Chris Müller's avatar
Chris Müller committed
119
120
		case TOK_SYMBOL:
			return "SYMBOL";
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
	}

	return "UNKNOWN";
}


enum FloatState {
	FP_START, 
	FP_EXPONENT, 
	FP_DOT, 
	FP_DECIMAL, 
	FP_MINUSPLUS, 
	FP_FINAL
};


137
138
static enum org_cherry_tok
lex_float(struct org_cherry_context* context)
139
{
140
	struct org_cherry_array* buffer = context->buffer;
141
142
	const cy_byte_t* p = context->src;
	cy_unicode_t ch = org_cherry_utf8_get(p);
143
144
145
146
147
148
149
150
151
152

	enum FloatState state = FP_START;

	do {
		switch(state) {
			case FP_START:
				if(ch == 'e' || ch == 'E')
					state = FP_EXPONENT;
				else if(ch == '.')
					state = FP_DOT;
153
154
                else
                    assert(0);
155
156
157
158
159
160
				break;

			case FP_EXPONENT:
				if(ch == '+' || ch == '-')
					state = FP_MINUSPLUS;
				else if('0' > ch || ch > '9') {
161
					org_cherry_error(context, "Unexpected character found in float literal after +/-");
162
					org_cherry_array_append(buffer, "0", 1);
163
164
165
166
167
168
169
170
					goto RETURN_TOKEN;
				} else
					state = FP_FINAL;
				break;

			case FP_DOT:
				state = FP_DECIMAL;
				if('0' > ch || ch > '9') {
171
					org_cherry_error(context, "Unexpected character found in float literal after dot");
172
					org_cherry_array_append(buffer, "0", 1);
173
174
175
176
177
178
179
180
181
182
183
184
185
186
					goto RETURN_TOKEN;
				}
				break;

			case FP_DECIMAL:
				if(ch == 'e' || ch == 'E') 
					state = FP_EXPONENT;
				else if('0' > ch || ch > '9')
					goto RETURN_TOKEN;
				break;

			case FP_MINUSPLUS:
				state = FP_FINAL;
				if('0' > ch || ch > '9') {
187
					org_cherry_error(context, "Unexpected character found in float literal");
188
					org_cherry_array_append(buffer, "0", 1);
189
190
191
192
193
194
195
196
197
198
					goto RETURN_TOKEN;
				}
				break;

			case FP_FINAL:
				if('0' > ch || ch > '9')
					goto RETURN_TOKEN;
				break;
		}

199
		org_cherry_array_append(buffer, p, 1);
200
201

NO_APPEND_BUFFER:
202
203
		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
204
205
206
207
208
209

	} while (ch != '\0');

RETURN_TOKEN:
	context->src = p;

210
	org_cherry_array_append(buffer, "\0", 1);
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

	return TOK_FLOAT;
}


enum NumberState {
	INT_START, 
	INT_BASE, 
	INT_BIN_WAIT, 
	INT_HEX_WAIT, 
	INT_OCT_WAIT, 
	INT_BIN_READ, 
	INT_HEX_READ, 
	INT_OCT_READ, 
	INT_DEC_READ
};


229
230
static enum org_cherry_tok
lex_number(struct org_cherry_context* context)
231
{
232
	struct org_cherry_array* buffer = context->buffer;
233
234
	const cy_byte_t* p = context->src;
	cy_unicode_t ch = org_cherry_utf8_get(p);
235

236
	enum org_cherry_tok token = TOK_DEC;
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
	enum NumberState state = INT_START;

	do {
		switch(state) {
			case INT_START:
				if(ch == '0') 
					state = INT_BASE;
				else
					state = INT_DEC_READ;
				break;

			case INT_BASE:
				if(ch == 'x') {
					state = INT_HEX_WAIT;
				} else if(ch == 'b') {
					state = INT_BIN_WAIT;
				} else if('0' <= ch && ch <= '7') {
					state = INT_OCT_READ;
					break;
				} else if(ch == '.' || ch == 'e' || ch == 'E') {
					context->src = p;
					return lex_float(context);
				} else {
					token = TOK_DEC;
					goto RETURN_TOKEN;
				}
				break;

			case INT_HEX_WAIT:
				if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F')) {
267
					org_cherry_error(context, "Unexpected character found in hex literal");
268
					org_cherry_array_append(buffer, "0", 1);
269
270
271
272
273
274
275
276
					token = TOK_HEX;
					goto RETURN_TOKEN;
				}
				state = INT_HEX_READ;
				break;

			case INT_BIN_WAIT:
				if(ch != '0' && ch != '1') {
277
					org_cherry_error(context, "Unexpected character found in binary literal");
278
					org_cherry_array_append(buffer, "0", 1);
279
280
281
282
283
284
285
286
					token = TOK_BIN;
					goto RETURN_TOKEN;
				}
				state = INT_BIN_READ;
				break;

			case INT_BIN_READ:
				token = TOK_BIN;
Chris Müller's avatar
Chris Müller committed
287
				if(ch != '0' && ch != '1')
288
289
290
291
292
					goto RETURN_TOKEN;
				break;

			case INT_OCT_READ:
				token = TOK_OCT;
Chris Müller's avatar
Chris Müller committed
293
				if('0' > ch || ch > '7')
294
295
296
297
298
299
300
301
					goto RETURN_TOKEN;
				break;

			case INT_DEC_READ:
				token = TOK_DEC;
				if(ch == '.' || ch == 'e' || ch == 'E') {
					context->src = p;
					return lex_float(context);
Chris Müller's avatar
Chris Müller committed
302
				} else if('0' > ch || ch > '9')
303
304
305
306
307
					goto RETURN_TOKEN;
				break;

			case INT_HEX_READ:
				token = TOK_HEX;
Chris Müller's avatar
Chris Müller committed
308
				if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F'))
309
310
311
312
313
314
315
					goto RETURN_TOKEN;
				break;

			default:
				break;
		}

316
		org_cherry_array_append(buffer, p, 1);
317
318

NO_APPEND_BUFFER:
319
320
		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
321
322
323
324
	} while(ch != '\0');

RETURN_TOKEN:
	context->src = p;
325
	org_cherry_array_append(buffer, "\0", 1);
326
327
328
329
330

	return token;
}


Chris Müller's avatar
Chris Müller committed
331
332
333
334
335
336
enum CharState {
	CHAR_EAT,
	CHAR_ESCAPE,
	CHAR_UNICODE
};

337
338
static enum org_cherry_tok
lex_character(struct org_cherry_context* context)
Chris Müller's avatar
Chris Müller committed
339
340
341
{
	assert(context != 0);

342
	struct org_cherry_array* buffer = context->buffer;
343
344
	cy_byte_t* p = org_cherry_utf8_next(context->src);
	cy_unicode_t ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
345
346
347
	enum CharState state = CHAR_EAT;
	int unicount = 0;

348
    org_cherry_array_append(buffer, "\\", 1);
Chris Müller's avatar
Chris Müller committed
349

350
	while(!org_cherry_unicode_isspace(ch) && ch != '\0') {
Chris Müller's avatar
Chris Müller committed
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
		switch(state) {
			case CHAR_EAT:
				if(ch == 'u') {
					state = CHAR_UNICODE;
					unicount = 4;
				} else if(ch == 'U') {
					state = CHAR_UNICODE;
					unicount = 6;					
				} else {
					state = CHAR_ESCAPE;
				}
				break;
			case CHAR_UNICODE:
				if(unicount-- == 0)
					goto RETURN_TOKEN;

				if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F')) {
368
					org_cherry_error(context, "Unexpected hex sequence in unicode escape sequence");
369
					org_cherry_array_append(buffer, "0", 1);
Chris Müller's avatar
Chris Müller committed
370
371
372
373
374
375
376
377
					goto NO_BUFFER_APPEND;
				}
				break;

			case CHAR_ESCAPE:
				break;
		}

378
		org_cherry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
Chris Müller's avatar
Chris Müller committed
379
380
NO_BUFFER_APPEND:

381
382
		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
383
384
385
386
	}

RETURN_TOKEN:
	if(state == CHAR_UNICODE && unicount > 0) {
387
		org_cherry_error(context, "Improper unicode escape sequence found in character literal");
Chris Müller's avatar
Chris Müller committed
388
389

		while(unicount-- > 0)
390
391
			org_cherry_array_append(buffer, "0", 1);
	} else if(org_cherry_array_size(buffer) == 1) {
392
		org_cherry_error(context, "No character symbol is given in character literal");
393
		org_cherry_array_append(buffer, "0", 1);
Chris Müller's avatar
Chris Müller committed
394
395
	}

396
	org_cherry_array_append(buffer, "\0", 1);
Chris Müller's avatar
Chris Müller committed
397
398
399
400
401
402
403

	context->src = p;

	return TOK_CHAR;
}


404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437

static enum org_cherry_tok
lex_raw_string(struct org_cherry_context* context)
{

	struct org_cherry_array* buffer = context->buffer;
	cy_byte_t* p = org_cherry_utf8_next(context->src);
	cy_unicode_t ch = org_cherry_utf8_get(p);
	int unicount = 0;

	while(ch != '\0') {
		if(ch == '~') {
			p++;
			goto RETURN_TOKEN;
		} else if(ch == '\r' || ch == '\n') {
			org_cherry_error(context, "Unexpected newline/carriage return found in raw string literal");
			goto RETURN_TOKEN;
		}

		org_cherry_array_append(buffer, p, org_cherry_utf8_codepoints(p));

		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
	}

RETURN_TOKEN:
	context->src = p;

	org_cherry_array_append(buffer, "\0", 1);

	return TOK_STRING;

}

Chris Müller's avatar
Chris Müller committed
438
439
enum StringState {
	STR_EAT, 
440
441
	STR_ESCAPE,
	STR_UNICODE,
Chris Müller's avatar
Chris Müller committed
442
443
444
445
	STR_FINAL
};


446
447
static enum org_cherry_tok
lex_string(struct org_cherry_context* context)
Chris Müller's avatar
Chris Müller committed
448
449
450
{
	assert(context != 0);

451
	struct org_cherry_array* buffer = context->buffer;
Chris Müller's avatar
Chris Müller committed
452
	enum StringState state = STR_EAT;
453
454
	cy_byte_t* p = org_cherry_utf8_next(context->src);
	cy_unicode_t ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
455
456
457
458
459
460
461
462
463
464
465
	int unicount = 0;

	while(ch != '\0') {
		switch(state) {
			case STR_EAT:
				if(ch == '\\') 
					state = STR_ESCAPE;
				else if(ch == '\"') {
					state = STR_FINAL;
					goto NO_BUFFER_APPEND;
				} else if(ch == '\r' || ch == '\n') {
466
					org_cherry_error(context, "Unexpected newline/carriage return found in string literal");
Chris Müller's avatar
Chris Müller committed
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
					state = STR_FINAL;
					goto RETURN_TOKEN;
				}
				break;

			case STR_ESCAPE:
				switch(ch) {
					case 'a': case 'b': case 'f': case 'n': case 'r':
					case 't': case 'v': case '0': case '\"':
					case '\\':
						state = STR_EAT;
						break;
					case 'u':
						unicount = 4;
						state = STR_UNICODE;
						break;
					case 'U':
						unicount = 6;
						state = STR_UNICODE;
						break;
					default:
488
						org_cherry_error(context, "Unknown escape sequence found in this string literal");
Chris Müller's avatar
Chris Müller committed
489
						state = STR_EAT;
490
						org_cherry_array_append(buffer, "t", 1);
Chris Müller's avatar
Chris Müller committed
491
492
493
494
495
496
497
498
499
500
						goto NO_BUFFER_APPEND;

				}
				break;

			case STR_UNICODE:
				if(--unicount == 0)
					state = STR_EAT;

				if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F')) {
501
					org_cherry_error(context, "Unexpected hex number in unicode escape sequence found");
502
					org_cherry_array_append(buffer, "0", 1);
Chris Müller's avatar
Chris Müller committed
503
504
505
506
507
508
509
510
					goto NO_BUFFER_APPEND;
				}
				break;

			case STR_FINAL:
				goto RETURN_TOKEN;
		}

511
		org_cherry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
Chris Müller's avatar
Chris Müller committed
512
513

NO_BUFFER_APPEND:
514
515
		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
516
517
518
519
	}

RETURN_TOKEN:
	if(state != STR_FINAL) {
520
		org_cherry_error(context, "Unexpected end of file found in unclosed string");
Chris Müller's avatar
Chris Müller committed
521
522

		while(unicount-- > 0)
523
			org_cherry_array_append(buffer, "0", 1);
Chris Müller's avatar
Chris Müller committed
524
525

		if(state == STR_ESCAPE)
526
			org_cherry_array_append(buffer, "0", 1);
Chris Müller's avatar
Chris Müller committed
527
528
529
530
	}

	context->src = p;

531
	org_cherry_array_append(buffer, "\0", 1);
Chris Müller's avatar
Chris Müller committed
532
533
534
535
536

	return TOK_STRING;
}


537
538
static enum org_cherry_tok
lex_comment(struct org_cherry_context* context)
Chris Müller's avatar
Chris Müller committed
539
{
540
	struct org_cherry_array* buffer = context->buffer;
541
542
	const cy_byte_t* p = context->src;
	cy_unicode_t ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
543
544

	while(ch != '\0' && ch != '\r' && ch != '\n') {
545
		org_cherry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
Chris Müller's avatar
Chris Müller committed
546

547
548
		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
549
550
	}

551
	org_cherry_array_append(buffer, "\0", 1);
Chris Müller's avatar
Chris Müller committed
552

553
554
	context->src = p;

Chris Müller's avatar
Chris Müller committed
555
556
557
558
559
	return TOK_COMMENT;
}


static int 
560
is_symbol_character(cy_unicode_t ch)
Chris Müller's avatar
Chris Müller committed
561
{
562
	return org_cherry_unicode_isalnum(ch) || 
Chris Müller's avatar
Chris Müller committed
563
564
565
566
567
		ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '%' ||
		ch == '<' || ch == '>' || ch == '=' || ch == '!' || ch == '?' ||
		ch == '#' || ch == ':' || ch == '.' || ch == '~' || ch == '_';
}

568
569
static enum org_cherry_tok
lex_symbol(struct org_cherry_context* context)
Chris Müller's avatar
Chris Müller committed
570
{
571
	struct org_cherry_array* buffer = context->buffer;
572
573
	const cy_byte_t* p = context->src;
	cy_unicode_t ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
574
575

	while(is_symbol_character(ch)) {
576
		org_cherry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
Chris Müller's avatar
Chris Müller committed
577

578
579
		p = org_cherry_utf8_next(p);
		ch = org_cherry_utf8_get(p);
Chris Müller's avatar
Chris Müller committed
580
581
582
583
	}

	context->src = p;

584
	org_cherry_array_append(buffer, "\0", 1);
Chris Müller's avatar
Chris Müller committed
585

586
	cy_byte_t* sym = org_cherry_array_get(buffer, 0);
Chris Müller's avatar
Chris Müller committed
587
588
589
590
591
592
593
594
595

	if (strcmp(sym, "true") == 0)
		return TOK_TRUE;
	else if(strcmp(sym, "false") == 0)
		return TOK_FALSE;
	else
		return TOK_SYMBOL;
}

596
#define LEX_RETURN(tok) \
597
    p = org_cherry_utf8_next(p); \
598
599
600
601
    context->src = p; \
    return tok;
    

602
603
enum org_cherry_tok      
org_cherry_lex(struct org_cherry_context* context)
604
605
606
{
	assert(context != 0);

Chris Müller's avatar
Chris Müller committed
607
	while(TRUE) {
608
		const cy_byte_t* p = context->src;
609

610
		org_cherry_array_clear(context->buffer);
611

612
		cy_unicode_t ch = org_cherry_utf8_get(p);
613
		
614
		if(org_cherry_unicode_isspace(ch) && (ch != '\n' || ch != '\r')) {
615
			p = org_cherry_utf8_next(p);
616
617
618
619
620
621
622
			context->src = p;
			continue;
		}

		switch(ch) {
			case '\0':
				return TOK_EOF;
623
624
625
626
627
628
629
630
631
632

			case '\r':
				if(*org_cherry_utf8_next(p) == '\n')
					p++;
				break;

			case '\n':
				context->line++;
				break;

633
			case '(':
634
				LEX_RETURN(TOK_ROUNDLEFTBRACE);
635
			case ')':
636
				LEX_RETURN(TOK_ROUNDRIGHTBRACE);
637
			case '[':
638
				LEX_RETURN(TOK_SQUARELEFTBRACE);
639
			case ']':
640
				LEX_RETURN(TOK_SQUARERIGHTBRACE);
641
			case '.':
642
643
644
645
				LEX_RETURN(TOK_DOT);
			case '\'':
				LEX_RETURN(TOK_QUOTE);
			case ';':
646
647
648
649
				if(context->flags & CY_SUPRESS_COMMENTS) {
					lex_comment(context);
					continue;
				}
650
				return lex_comment(context);
651
652

			case '0': case '1': case '2': case '3': case '4':
653
654
655
			case '5': case '6': case '7': case '8': case '9':
				return lex_number(context);
			case '\\':
Chris Müller's avatar
Chris Müller committed
656
657
658
				return lex_character(context);
			case '"':
				return lex_string(context);
659
660
661
			case '~':
				return lex_raw_string(context);

Chris Müller's avatar
Chris Müller committed
662
663
			case '+': case '-': case '*': case '/': case '^':
			case '<': case '>': case '=': case '?': case '!':
664
			case ':': case '_': case '%': case '#':
Chris Müller's avatar
Chris Müller committed
665
666
				return lex_symbol(context);
			default:
667
				if(org_cherry_unicode_isalpha(ch))
Chris Müller's avatar
Chris Müller committed
668
669
					return lex_symbol(context);
				else {
670
671
					org_cherry_error(context, "Unknown character found in input scanning");
					p = org_cherry_utf8_next(p);
Chris Müller's avatar
Chris Müller committed
672
				}
673
674
675
676
677
678
		}
	}

	return TOK_EOF;
}

679
const cy_byte_t*               
680
681
682
683
684
685
686
org_cherry_pos(struct org_cherry_context* context)
{
	assert(context != 0);
	return context->src;
}

void                        
687
org_cherry_rewind(struct org_cherry_context* context, const cy_byte_t* pos)
688
689
{
	assert(context != 0);
690
	assert(context->begin <= pos && pos <= context->src);
691
692
693
694
695

	context->src = pos;
}


696
const cy_byte_t*
697
org_cherry_token_string(struct org_cherry_context* context)
698
699
700
{
	assert(context->buffer != 0);

701
	return (const cy_byte_t*) org_cherry_array_get(context->buffer, 0);
702
703
704
705
}


size_t
706
org_cherry_token_length(struct org_cherry_context* context)
707
708
709
{
	assert(context->buffer != 0);

710
	return org_cherry_array_size(context->buffer);
711
}
712

713

714
715
716
static struct org_cherry_value*
org_cherry_read_pair(struct org_cherry_context* context)
{
717
	const cy_byte_t* pos;
718
719
720
721
722
723
	enum org_cherry_tok tok;
	struct org_cherry_value* head;
	struct org_cherry_value* tail;
	
	pos = org_cherry_pos(context);
	tok = org_cherry_lex(context);
724

725
	if(tok == TOK_ROUNDRIGHTBRACE || tok == TOK_EOF)
726
727
728
729
730
731
732
733
		return org_cherry_emptylist;

	org_cherry_rewind(context, pos);

	head = org_cherry_read(context);
	tail = org_cherry_read_pair(context);

	return (struct org_cherry_value*) org_cherry_list_cons(head, tail);
734
735
}

736
static struct org_cherry_value*
737
org_cherry_transform_try(struct org_cherry_value* value)
738
{
739
	struct org_cherry_value* code = org_cherry_list(org_cherry_symbol_begin, 0);
740
741
	struct org_cherry_value* handler = org_cherry_emptylist; 

742
743
744
745
746
	while(IS_NULL(handler) && !IS_NULL(value)) {
		if(IS_CATCH(HEAD(value)))
			handler = org_cherry_list_cons(org_cherry_symbol_lambda, TAIL(HEAD(value)));
		else {
			code = org_cherry_list_cons(HEAD(value), code);
747
748
749
750
751
		}

		value = TAIL(value);
	}

752
753
754
755
756
757
758
759
760
761
762
763
764
765
	return org_cherry_list(org_cherry_symbol_try, org_cherry_list_reverse(code), handler, 0);
}


static struct org_cherry_value*
org_cherry_transform_define(struct org_cherry_value* value)
{
	struct org_cherry_value* name = HEAD(HEAD(value));
	struct org_cherry_value* args = TAIL(HEAD(value));
	struct org_cherry_value* body = TAIL(value);

	return org_cherry_list(org_cherry_symbol_let, name,
			org_cherry_list_cons(org_cherry_symbol_lambda,
				org_cherry_list_cons(args, body)), 0);
766
767
768
}


769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
static struct org_cherry_value*
org_cherry_transform_if(struct org_cherry_value* value)
{
	struct org_cherry_value* pred = org_cherry_false;
	struct org_cherry_value* tru = org_cherry_false;
	struct org_cherry_value* fal = org_cherry_false;

	// check predicate
	if(IS_NULL(TAIL(value)))
		pred = TAIL(value) = org_cherry_list_cons(pred, org_cherry_emptylist);
	else 
		pred = TAIL(value);
	
	// check true case
	if(IS_NULL(TAIL(pred)))
		tru = TAIL(pred) = org_cherry_list_cons(tru, org_cherry_emptylist);
	else
		tru = TAIL(pred);

	// check false case
	if(IS_NULL(TAIL(tru)))
		fal = TAIL(tru) = org_cherry_list_cons(fal, org_cherry_emptylist);
	else
		fal = TAIL(tru);

	return value;
}


798
799
800
static struct org_cherry_value* 
org_cherry_transform(struct org_cherry_value* value)
{
801
802
	if(IS_TRY(value))
		return org_cherry_transform_try(TAIL(value));
803
	else if(IS_DEFINE(value))
804
		return org_cherry_transform_define(TAIL(value));
805
806
807
	else if(IS_IF(value))
		return org_cherry_transform_if(value);
	else
808
809
810
811
812
		return value;
}



813
814
815
816
817
struct org_cherry_value*
org_cherry_read(struct org_cherry_context* context)
{
	assert(context != 0);

818
819
	struct org_cherry_value* value = org_cherry_false;

820
821
822
823
824
825
	enum org_cherry_tok tok = org_cherry_lex(context);

	while(tok != TOK_EOF) {
		switch(tok) {
			case TOK_FALSE:
				return org_cherry_false;
826
827
828
829

			case TOK_TRUE:
				return org_cherry_true;

830
831
832
			case TOK_DOT:
				return org_cherry_dot;

833
834
			case TOK_HEX:
				return org_cherry_fixnum_from_string(org_cherry_token_string(context), 16);
835

836
837
			case TOK_DEC:
				return org_cherry_fixnum_from_string(org_cherry_token_string(context), 10);
838

839
840
			case TOK_OCT:
				return org_cherry_fixnum_from_string(org_cherry_token_string(context), 8);
841

842
843
			case TOK_BIN:
				return org_cherry_fixnum_from_string(org_cherry_token_string(context), 2);
844

845
846
			case TOK_FLOAT:
				return org_cherry_float_from_string(org_cherry_token_string(context));
847

848
849
			case TOK_STRING:
				return org_cherry_string_from_string(org_cherry_token_string(context));
850

851
852
            case TOK_CHAR:
                return org_cherry_char_from_string(org_cherry_token_string(context));
853

854
855
            case TOK_SYMBOL:
                return org_cherry_symbol_from_string(org_cherry_token_string(context));
856

857
            case TOK_ROUNDLEFTBRACE:
858
859
860
				value = org_cherry_read_pair(context);
				goto RETURN_VALUE;

861
			case TOK_QUOTE:
862
				return org_cherry_list_cons(org_cherry_symbol_quote, org_cherry_list_cons(org_cherry_read(context), org_cherry_emptylist));
863
864
865
866
867
868
869

			default:
				org_cherry_error(context, "bad input with token %s", 
						org_cherry_tok_to_string(tok));
		}
	}

870
871
	return 0;

872
873
RETURN_VALUE:
	return org_cherry_transform(value);
874
}