Commit 33a0cdbf authored by Chris Müller's avatar Chris Müller
Browse files

move unicode routines from crystal to cherry project

parent 5b270621
......@@ -17,6 +17,7 @@
*/
#pragma once
#include "cherry/read.h"
#include "cherry/runtime.h"
/*
* Cherry programming language
* Copyright (C) 2013 Christoph Mueller
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "cherry/runtime.h"
#include <crystal/standard.h>
#include <stdlib.h>
#include <stdint.h>
struct CyArray;
struct org_cherry_file {
const char* filename;
const byte_t* begin;
};
struct org_cherry_context {
struct org_cherry_file* file;
byte_t* src;
struct CryArray* buffer;
};
struct org_cherry_context* org_cherry_context(const byte_t* source, const char* filename);
struct org_cherry_context* org_cherry_context_repl(const byte_t* source);
void org_cherry_context_free(struct org_cherry_context* context);
void org_cherry_error(struct org_cherry_context* context, const char* format, ...);
enum org_cherry_tok {
TOK_EOF,
TOK_ROUNDLEFTBRACE,
TOK_ROUNDRIGHTBRACE,
TOK_SQUARELEFTBRACE,
TOK_SQUARERIGHTBRACE,
TOK_COMMENT,
TOK_DOT,
TOK_STRING,
TOK_CHAR,
TOK_HEX,
TOK_DEC,
TOK_OCT,
TOK_BIN,
TOK_FLOAT,
TOK_SYMBOL,
TOK_TRUE,
TOK_FALSE,
TOK_QUOTE
};
const byte_t* org_cherry_tok_to_string(enum org_cherry_tok token);
enum org_cherry_tok org_cherry_lex(struct org_cherry_context* context);
byte_t* org_cherry_token_string(struct org_cherry_context* context);
size_t org_cherry_token_length(struct org_cherry_context* context);
struct org_cherry_value* org_cherry_read(struct org_cherry_context* context);
......@@ -18,25 +18,9 @@
#pragma once
#include <stdint.h>
#include "cherry/standard.h"
#include <stdio.h>
#define TRUE 1
#define FALSE 0
typedef void* cy_pointer_t;
typedef void* cy_const_pointer_t;
typedef uint8_t cy_byte_t;
typedef uint16_t cy_word_t;
typedef uint32_t cy_double_t;
typedef uint64_t cy_quad_t;
typedef int64_t cy_fixnum_t;
typedef double cy_float_t;
typedef char cy_boolean_t;
typedef uint32_t cy_unicode_t;
#include <string.h>
enum org_cherry_value_type {
CY_EMPTYLIST,
......@@ -65,10 +49,10 @@ struct org_cherry_value {
cy_float_t float_value;
cy_byte_t* string_value;
const cy_byte_t* string_value;
cy_unicode_t char_value;
cy_byte_t* symbol_value;
const cy_byte_t* symbol_value;
};
};
......@@ -81,11 +65,21 @@ struct org_cherry_value {
#define IS_CHAR(value) (value->meta.type == CY_CHAR)
#define IS_PAIR(value) (value->meta.type == CY_PAIR)
#define org_cherry_string_size(STR) (strlen(STR) + 1)
const cy_byte_t* org_cherry_string_dup(const cy_byte_t* str);
struct org_cherry_value* org_cherry_fixnum_from_string(const cy_byte_t* str, int base);
struct org_cherry_value* org_cherry_float_from_string(const cy_byte_t* str);
struct org_cherry_value* org_cherry_char_from_string(const cy_byte_t* str);
struct org_cherry_value* org_cherry_symbol_from_string(const cy_byte_t* str);
struct org_cherry_value* org_cherry_string_from_string(const cy_byte_t* str);
struct org_cherry_value* org_cherry_value_alloc(void);
struct org_cherry_value* org_cherry_symbol(cy_byte_t* symbol_value);
struct org_cherry_value* org_cherry_symbol(const cy_byte_t* symbol_value);
struct org_cherry_value* org_cherry_fixnum(cy_fixnum_t value);
struct org_cherry_value* org_cherry_float(cy_float_t float_value);
struct org_cherry_value* org_cherry_string(cy_byte_t* string_value);
struct org_cherry_value* org_cherry_string(const cy_byte_t* string_value);
struct org_cherry_value* org_cherry_char(cy_unicode_t char_value);
struct org_cherry_pair {
......@@ -113,7 +107,7 @@ struct org_cherry_value* org_cherry_primitive_add(struct org_cherry_pair* pai
struct org_cherry_symbollist;
struct org_cherry_symbollist* org_cherry_symbollist(void);
struct org_cherry_value* org_cherry_symbollist_get(struct org_cherry_symbollist* table, cy_byte_t* name);
struct org_cherry_value* org_cherry_symbollist_get(struct org_cherry_symbollist* table, const cy_byte_t* name);
struct org_cherry_symbollist* org_cherry_env_push(struct org_cherry_symbollist* env);
struct org_cherry_symbollist* org_cherry_env_pop(struct org_cherry_symbollist* env);
......@@ -132,17 +126,52 @@ extern struct org_cherry_value* org_cherry_symbol_if;
extern struct org_cherry_value* org_cherry_symbol_cond;
extern struct org_cherry_value* org_cherry_symbol_else;
// ----------------------------------------------------------------------------
// Structure
// ----------------------------------------------------------------------------
struct org_cherry_pair* org_cherry_application(struct org_cherry_value* operator, struct org_cherry_value* operands);
// ----------------------------------------------------------------------------
// Evaluation
// ----------------------------------------------------------------------------
#define IS_SELF_EVALUATING(value) \
(IS_BOOLEAN(value) || IS_FIXNUM(value) || IS_CHAR(value) || IS_STRING(value))
(IS_BOOLEAN(value) || IS_FIXNUM(value) || IS_CHAR(value) || IS_STRING(value) || IS_FLOAT(value))
#define IS_VARIABLE(value) \
IS_SYMBOL(value)
#define IS_TAGGED(value, symbol) \
(IS_PAIR(value) && IS_SYMBOL(HEAD(value)) && HEAD(value) == symbol)
#define IS_QUOTE(value) \
IS_TAGGED(value, org_cherry_symbol_quote)
#define IS_DEFINE(value) \
IS_TAGGED(value, org_cherry_symbol_define)
#define IS_IF(value) \
IS_TAGGED(value, org_cherry_symbol_if)
#define IS_COND(value) \
IS_TAGGED(value, org_cherry_symbol_cond)
#define IS_ELSE(value) \
IS_TAGGED(value, org_cherry_symbol_else)
#define IS_LAMBDA(value) \
IS_TAGGED(value, org_cherry_symbol_lambda)
#define IS_APPLICATION(value) \
IS_PAIR(value)
struct org_cherry_value* org_cherry_eval(struct org_cherry_symbollist* env, struct org_cherry_value* exp);
// ----------------------------------------------------------------------------
// Print
// ----------------------------------------------------------------------------
......
/*
* Cherry programming language
* Copyright (C) 2013 Christoph Mueller
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <stdint.h>
#define TRUE 1
#define FALSE 0
typedef void* cy_pointer_t;
typedef void* cy_const_pointer_t;
typedef uint8_t cy_byte_t;
typedef uint16_t cy_word_t;
typedef uint32_t cy_double_t;
typedef uint64_t cy_quad_t;
typedef int64_t cy_fixnum_t;
typedef double cy_float_t;
typedef char cy_boolean_t;
typedef uint32_t cy_unicode_t;
/*
* Copyright (c) 2012 Christoph Mueller <ruunhb@googlemail.com>
*
* Crystal is free software: you can redistribute it and/or modify
* it under the terms of the Lesser GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Crystal is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Lesser GNU General Public License for more details.
*
* You should have received a copy of the Lesser GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#pragma once
#include "cherry/standard.h"
#include <stdlib.h>
int org_cherry_unicode_isblank(cy_unicode_t ch);
int org_cherry_unicode_isspace(cy_unicode_t ch);
int org_cherry_unicode_isalpha(cy_unicode_t ch);
int org_cherry_unicode_isalnum(cy_unicode_t ch);
int org_cherry_unicode_isdigit(cy_unicode_t ch);
int org_cherry_unicode_ishex(cy_unicode_t ch);
int org_cherry_unicode_isoct(cy_unicode_t ch);
int org_cherry_unicode_isprint(cy_unicode_t ch);
int org_cherry_unicode_isgraph(cy_unicode_t ch);
int org_cherry_unicode_ispunct(cy_unicode_t ch);
int org_cherry_unicode_iscntrl(cy_unicode_t ch);
int org_cherry_utf8_validate(const cy_byte_t* str);
int org_cherry_utf8_compare(const cy_byte_t* str1, const cy_byte_t* str2);
cy_byte_t* org_cherry_utf8_chr(const cy_byte_t* str, cy_unicode_t character);
cy_byte_t* org_cherry_utf8_rchr(const cy_byte_t* str, cy_unicode_t character);
cy_byte_t* org_cherry_utf8_str(const cy_byte_t* str1, const cy_byte_t* str2);
size_t org_cherry_utf8_size(const cy_byte_t* str);
size_t org_cherry_utf8_len(const cy_byte_t* str);
cy_unicode_t org_cherry_utf8_get(const cy_byte_t* str);
size_t org_cherry_utf8_codepoints(const cy_byte_t* str);
cy_byte_t* org_cherry_utf8_next(const cy_byte_t* str);
cy_byte_t* org_cherry_utf8_prev(const cy_byte_t* str);
......@@ -15,9 +15,9 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cherry/lexer.h"
#include "cherry.h"
#include <crystal/unicode.h>
#include "cherry/unicode.h"
#include <crystal/array.h>
#include <stdarg.h>
#include <stdlib.h>
......@@ -26,11 +26,11 @@
#include <assert.h>
struct CyContext*
cy_context_new(const byte_t* source, const char* filename)
struct org_cherry_context*
org_cherry_context(const byte_t* source, const char* filename)
{
struct CyContext* context = malloc(sizeof(struct CyContext));
struct CyFile* file = malloc(sizeof(struct CyFile));
struct org_cherry_context* context = malloc(sizeof(struct org_cherry_context));
struct org_cherry_file* file = malloc(sizeof(struct org_cherry_file));
file->filename = filename;
file->begin = source;
......@@ -42,15 +42,15 @@ cy_context_new(const byte_t* source, const char* filename)
}
struct CyContext*
cy_context_repl_new(const byte_t* source)
struct org_cherry_context*
org_cherry_context_repl(const byte_t* source)
{
return cy_context_new(source, 0);
return org_cherry_context(source, 0);
}
void
cy_context_free(struct CyContext* context)
org_cherry_context_free(struct org_cherry_context* context)
{
assert(context != 0);
......@@ -61,18 +61,19 @@ cy_context_free(struct CyContext* context)
}
void
cy_error(struct CyContext* context, const char* format, ...)
org_cherry_error(struct org_cherry_context* context, const char* format, ...)
{
va_list args;
va_start(args, format);
if(context->file != 0)
if(context->file->filename != 0)
fprintf(stderr, "ERROR %s", context->file->filename);
else
fprintf(stderr, "ERROR");
fprintf(stderr, "ERROR (console)");
fprintf(stderr, ": ");
vfprintf(stderr, format, args);
fprintf(stderr, "\n");
va_end(args);
}
......@@ -80,13 +81,13 @@ cy_error(struct CyContext* context, const char* format, ...)
struct Mapping {
byte_t* string;
enum CyTOK value;
enum org_cherry_tok value;
};
const byte_t*
cy_tok_to_string(enum CyTOK token)
org_cherry_tok_to_string(enum org_cherry_tok token)
{
switch(token) {
case TOK_EOF:
......@@ -135,12 +136,12 @@ enum FloatState {
};
static enum CyTOK
lex_float(struct CyContext* context)
static enum org_cherry_tok
lex_float(struct org_cherry_context* context)
{
struct CryArray* buffer = context->buffer;
byte_t* p = context->src;
unicode_t ch = cry_utf8_get(p);
unicode_t ch = org_cherry_utf8_get(p);
enum FloatState state = FP_START;
......@@ -159,7 +160,7 @@ lex_float(struct CyContext* context)
if(ch == '+' || ch == '-')
state = FP_MINUSPLUS;
else if('0' > ch || ch > '9') {
cy_error(context, "Unexpected character found in float literal after +/-");
org_cherry_error(context, "Unexpected character found in float literal after +/-");
cry_array_append(buffer, "0", 1);
goto RETURN_TOKEN;
} else
......@@ -169,7 +170,7 @@ lex_float(struct CyContext* context)
case FP_DOT:
state = FP_DECIMAL;
if('0' > ch || ch > '9') {
cy_error(context, "Unexpected character found in float literal after dot");
org_cherry_error(context, "Unexpected character found in float literal after dot");
cry_array_append(buffer, "0", 1);
goto RETURN_TOKEN;
}
......@@ -185,7 +186,7 @@ lex_float(struct CyContext* context)
case FP_MINUSPLUS:
state = FP_FINAL;
if('0' > ch || ch > '9') {
cy_error(context, "Unexpected character found in float literal");
org_cherry_error(context, "Unexpected character found in float literal");
cry_array_append(buffer, "0", 1);
goto RETURN_TOKEN;
}
......@@ -200,8 +201,8 @@ lex_float(struct CyContext* context)
cry_array_append(buffer, p, 1);
NO_APPEND_BUFFER:
p = cry_utf8_next(p);
ch = cry_utf8_get(p);
p = org_cherry_utf8_next(p);
ch = org_cherry_utf8_get(p);
} while (ch != '\0');
......@@ -227,14 +228,14 @@ enum NumberState {
};
static enum CyTOK
lex_number(struct CyContext* context)
static enum org_cherry_tok
lex_number(struct org_cherry_context* context)
{
struct CryArray* buffer = context->buffer;
byte_t* p = context->src;
unicode_t ch = cry_utf8_get(p);
unicode_t ch = org_cherry_utf8_get(p);
enum CyTOK token;
enum org_cherry_tok token = TOK_DEC;
enum NumberState state = INT_START;
do {
......@@ -265,7 +266,7 @@ lex_number(struct CyContext* context)
case INT_HEX_WAIT:
if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F')) {
cy_error(context, "Unexpected character found in hex literal");
org_cherry_error(context, "Unexpected character found in hex literal");
cry_array_append(buffer, "0", 1);
token = TOK_HEX;
goto RETURN_TOKEN;
......@@ -275,7 +276,7 @@ lex_number(struct CyContext* context)
case INT_BIN_WAIT:
if(ch != '0' && ch != '1') {
cy_error(context, "Unexpected character found in binary literal");
org_cherry_error(context, "Unexpected character found in binary literal");
cry_array_append(buffer, "0", 1);
token = TOK_BIN;
goto RETURN_TOKEN;
......@@ -317,8 +318,8 @@ lex_number(struct CyContext* context)
cry_array_append(buffer, p, 1);
NO_APPEND_BUFFER:
p = cry_utf8_next(p);
ch = cry_utf8_get(p);
p = org_cherry_utf8_next(p);
ch = org_cherry_utf8_get(p);
} while(ch != '\0');
RETURN_TOKEN:
......@@ -335,20 +336,20 @@ enum CharState {
CHAR_UNICODE
};
static enum CyTOK
lex_character(struct CyContext* context)
static enum org_cherry_tok
lex_character(struct org_cherry_context* context)
{
assert(context != 0);
struct CryArray* buffer = context->buffer;
byte_t* p = cry_utf8_next(context->src);
unicode_t ch = cry_utf8_get(p);
byte_t* p = org_cherry_utf8_next(context->src);
unicode_t ch = org_cherry_utf8_get(p);
enum CharState state = CHAR_EAT;
int unicount = 0;
cry_array_append(buffer, "\\", 1);
while(!cry_unicode_isspace(ch) && ch != '\0') {
while(!org_cherry_unicode_isspace(ch) && ch != '\0') {
switch(state) {
case CHAR_EAT:
if(ch == 'u') {
......@@ -366,7 +367,7 @@ lex_character(struct CyContext* context)
goto RETURN_TOKEN;
if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F')) {
cy_error(context, "Unexpected hex sequence in unicode escape sequence");
org_cherry_error(context, "Unexpected hex sequence in unicode escape sequence");
cry_array_append(buffer, "0", 1);
goto NO_BUFFER_APPEND;
}
......@@ -376,21 +377,21 @@ lex_character(struct CyContext* context)
break;
}
cry_array_append(buffer, p, cry_utf8_codepoints(p));
cry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
NO_BUFFER_APPEND:
p = cry_utf8_next(p);
ch = cry_utf8_get(p);
p = org_cherry_utf8_next(p);
ch = org_cherry_utf8_get(p);
}
RETURN_TOKEN:
if(state == CHAR_UNICODE && unicount > 0) {
cy_error(context, "Improper unicode escape sequence found in character literal");
org_cherry_error(context, "Improper unicode escape sequence found in character literal");
while(unicount-- > 0)
cry_array_append(buffer, "0", 1);
} else if(cry_array_size(buffer) == 1) {
cy_error(context, "No character symbol is given in character literal");
org_cherry_error(context, "No character symbol is given in character literal");
cry_array_append(buffer, "0", 1);
}
......@@ -410,15 +411,15 @@ enum StringState {
};
static enum CyTOK
lex_string(struct CyContext* context)
static enum org_cherry_tok
lex_string(struct org_cherry_context* context)
{
assert(context != 0);
struct CryArray* buffer = context->buffer;
enum StringState state = STR_EAT;
byte_t* p = cry_utf8_next(context->src);
unicode_t ch = cry_utf8_get(p);
byte_t* p = org_cherry_utf8_next(context->src);
unicode_t ch = org_cherry_utf8_get(p);
int unicount = 0;
while(ch != '\0') {
......@@ -430,7 +431,7 @@ lex_string(struct CyContext* context)
state = STR_FINAL;
goto NO_BUFFER_APPEND;
} else if(ch == '\r' || ch == '\n') {
cy_error(context, "Unexpected newline/carriage return found in string literal");
org_cherry_error(context, "Unexpected newline/carriage return found in string literal");
state = STR_FINAL;
goto RETURN_TOKEN;
}
......@@ -452,7 +453,7 @@ lex_string(struct CyContext* context)
state = STR_UNICODE;
break;
default:
cy_error(context, "Unknown escape sequence found in this string literal");
org_cherry_error(context, "Unknown escape sequence found in this string literal");
state = STR_EAT;
cry_array_append(buffer, "t", 1);
goto NO_BUFFER_APPEND;
......@@ -465,7 +466,7 @@ lex_string(struct CyContext* context)
state = STR_EAT;
if(('0' > ch || ch > '9') && ('A' > ch || ch > 'F')) {
cy_error(context, "Unexpected hex number in unicode escape sequence found");
org_cherry_error(context, "Unexpected hex number in unicode escape sequence found");
cry_array_append(buffer, "0", 1);
goto NO_BUFFER_APPEND;
}
......@@ -475,16 +476,16 @@ lex_string(struct CyContext* context)
goto RETURN_TOKEN;
}
cry_array_append(buffer, p, cry_utf8_codepoints(p));
cry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
NO_BUFFER_APPEND:
p = cry_utf8_next(p);
ch = cry_utf8_get(p);
p = org_cherry_utf8_next(p);
ch = org_cherry_utf8_get(p);
}
RETURN_TOKEN:
if(state != STR_FINAL) {
cy_error(context, "Unexpected end of file found in unclosed string");
org_cherry_error(context, "Unexpected end of file found in unclosed string");
while(unicount-- > 0)
cry_array_append(buffer, "0", 1);
......@@ -501,18 +502,18 @@ RETURN_TOKEN:
}
static enum CyTOK
lex_comment(struct CyContext* context)
static enum org_cherry_tok
lex_comment(struct org_cherry_context* context)
{
struct CryArray* buffer = context->buffer;
byte_t* p = context->src;
unicode_t ch = cry_utf8_get(p);
unicode_t ch = org_cherry_utf8_get(p);
while(ch != '\0' && ch != '\r' && ch != '\n') {
cry_array_append(buffer, p, cry_utf8_codepoints(p));
cry_array_append(buffer, p, org_cherry_utf8_codepoints(p));
p = cry_utf8_next(p);
ch = cry_utf8_get(p);
p = org_cherry_utf8_next(p);
ch = org_cherry_utf8_get(p);
}
cry_array_append(buffer, "\0", 1);
......@@ -524,24 +525,24 @@ lex_comment(struct CyContext* context)
static int