aplikasi
editor teks (src/kernel/apps/editor.c)
nano-like text editor yang jalan di kernel mode.
text buffer
static char text_buf[4096]; // buffer teks
static uint32_t buf_used = 0; // jumlah byte terpakai
static uint32_t cursor_pos = 0; // posisi cursor di buffer
key handling
| key | aksi |
|---|---|
| ctrl+x | exit (simpan dulu kalau ada perubahan) |
| ctrl+s | save ke file via vfs_write |
| arrow up/down | pindah baris |
| arrow left/right | pindah karakter |
| home | ke awal baris |
| end | ke akhir baris |
| backspace | delete karakter sebelumnya |
| delete | delete karakter di cursor |
| enter | insert newline |
| tab | insert 4 spasi |
| printable chars | insert karakter |
rendering
tampilkan banyak baris dari posisi scroll. setiap baris di-render langsung ke vga buffer. status bar di baris 24 (terakhir) menampilkan filename, line, column.
save
- open file dengan
VFS_O_WRITE | VFS_O_CREATE | VFS_O_TRUNC vfs_write(fd, text_buf, buf_used)- close fd
built-in assembler (src/kernel/apps/asm.c)
assembler x86 32-bit lengkap ~banyak baris. menghasilkan machine code yang langsung di-eksekusi.
alur assembler
asm_assemble(code, &exec_addr):
1. copy code ke input_buf[]
2. proses baris per baris:
- kalau ada ':' -> register label
- kalau ada instruksi -> parse mnemonic + operand -> emit byte
3. apply patches (forward reference)
4. alloc physical page di CODE_VIRT (0x40000000)
5. copy code_buf ke CODE_VIRT
6. return exec_addr = CODE_VIRT
tabel label
static struct {
char name[32]; // nama label
int pos; // posisi byte di code_buf
} labels[MAX_LABELS]; // MAX_LABELS = 32
tabel patch
static struct {
int pos; /* byte pertama yang perlu di-patch */
int from; /* posisi setelah instruksi (untuk menghitung relatif) */
char target[32]; /* nama label tujuan */
int type; /* 0 = rel8, 1 = rel32, 2 = abs32 */
} patches[MAX_PATCHES]; // MAX_PATCHES = 128
tabel external symbol
typedef struct {
const char *name; /* nama label (e.g. "_printf") */
void *addr; /* alamat fungsi di kernel */
} extern_sym_t;
assembler mencari external symbol di tabel ini kalau label tidak ketemu di labels[].
instruksi didukung
mov: reg/reg, reg/mem, mem/reg, imm, seg reg add/sub/cmp/xor/and/or: reg/reg, mem, imm jmp/je/jne/jg/jl/jge/jle: short (rel8) atau near (rel32) call: rel32 (termasuk ke external symbol) push: reg, imm (8/), label pop/inc/dec: reg imul: reg/reg idiv/cdq: reg div: unsigned divide neg: negate test: logical and, set flags movzx: mov zero-extend setcc: set byte on condition (sete, setne, setl, setg, setle, setge, setb, setbe, seta, setae) cmovcc: conditional move (cmove, cmovne, cmovl, cmovle, cmovg, cmovge) int: software interrupt nop/hlt/sti/cli: single byte pusha/popa: push/pop all registers db: data bytes (mixed format: string + angka)
times directive
if (streq(mnem, "times")) {
uint32_t count = parse_int(ops, ...);
char *instr = skip_count(ops);
/* parse instruksi sekali */
char sub_mnem[16];
parse_mnemonic(instr, sub_mnem, &sub_ops);
if (streq(sub_mnem, "db")) {
/* parse args sekali, emit N kali */
uint8_t values[128];
int nv = parse_db_args(sub_ops, values);
for (uint32_t rep = 0; rep < count; rep++)
for (int vi = 0; vi < nv; vi++) emit(values[vi]);
return 0;
}
if (streq(sub_mnem, "nop"))
for (uint32_t rep = 0; rep < count; rep++) emit(0x90);
}
gen_push dengan label
sebelumnya push hanya mendukung register dan immediate. sekarang mendukung label:
uint32_t imm;
if (!parse_int(ops, &imm)) {
/* mungkin label */
int target = find_label(ops);
if (target >= 0) { emit(0x68); emit32(CODE_VIRT + target); return 0; }
add_patch(code_len + 1, 0, ops, 2); /* forward reference */
emit(0x68); emit32(0);
return 0;
}
occ compiler (src/kernel/lib/)
compiler subset c yang terdiri dari beberapa komponen:
- lexer.c -> tokenizer
- parser.c -> ast builder
- codegen.c -> x86 assembly generator
lexer (lexer.c)
tokenizer yang membaca input karakter per karakter.
typedef enum {
TOKEN_EOF, TOKEN_IDENTIFIER, TOKEN_NUMBER, TOKEN_STRING,
TOKEN_INT, TOKEN_CHAR_TYPE, TOKEN_VOID,
TOKEN_IF, TOKEN_ELSE, TOKEN_WHILE, TOKEN_FOR,
TOKEN_RETURN, TOKEN_BREAK, TOKEN_CONTINUE,
TOKEN_PLUS, TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, TOKEN_PERCENT,
TOKEN_ASSIGN, TOKEN_EQUAL, TOKEN_NOT_EQUAL,
TOKEN_LESS, TOKEN_LESS_EQUAL, TOKEN_GREATER, TOKEN_GREATER_EQUAL,
TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_LBRACE, TOKEN_RBRACE,
TOKEN_LBRACKET, TOKEN_RBRACKET, TOKEN_SEMICOLON, TOKEN_COMMA,
TOKEN_INTEGER_LITERAL, TOKEN_STRING_LITERAL, TOKEN_CHAR_LITERAL,
} token_type_t;
parser (parser.c)
recursive descent parser yang menghasilkan ast.
tipe ast node:
typedef enum {
AST_PROGRAM, AST_FUNCTION, AST_DECLARATION, AST_ASSIGNMENT,
AST_RETURN, AST_IF, AST_WHILE, AST_FOR, AST_CALL, AST_COMPOUND,
AST_INTEGER_LITERAL, AST_STRING_LITERAL, AST_IDENTIFIER,
AST_BINARY_OP, AST_ARRAY_SUBSCRIPT
} ast_type_t;
struktur node:
typedef struct ast_node {
ast_type_t type;
struct ast_node *left; /* operand kiri / child pertama */
struct ast_node *right; /* operand kanan / sibling */
struct ast_node *condition;/* untuk if/while */
struct ast_node *body; /* untuk function, if, while, for */
struct ast_node *else_body;/* untuk if-else */
struct ast_node *params; /* untuk function parameter */
int int_value;
char *string_value;
token_type_t op;
} ast_node_t;
parsing function dengan parameter
static ast_node_t *parse_function(parser_t *p) {
parser_eat(p, TOKEN_INT); /* atau TOKEN_CHAR_TYPE atau TOKEN_VOID */
/* optional * untuk pointer type */
/* ... function name ... */
parser_eat(p, TOKEN_LPAREN);
/* parse parameters: type name (, type name)* */
while (p->current_token.type != TOKEN_RPAREN) {
parser_eat(p, TOKEN_INT); /* type */
if (p->current_token.type == TOKEN_STAR) parser_advance(p);
/* parameter name */
ast_node_t *param = ast_new_node(AST_DECLARATION);
param->string_value = p->current_token.value;
/* link ke parameter list */
if (p->current_token.type == TOKEN_COMMA) parser_advance(p);
}
parser_eat(p, TOKEN_RPAREN);
node->params = params;
node->body = parse_compound(p);
}
parsing for loop
static ast_node_t *parse_for(parser_t *p) {
ast_node_t *node = ast_new_node(AST_FOR);
parser_eat(p, TOKEN_FOR);
parser_eat(p, TOKEN_LPAREN);
/* init statement (optional) */
if (p->current_token.type != TOKEN_SEMICOLON) node->left = parse_expression(p);
parser_eat(p, TOKEN_SEMICOLON);
/* condition (optional, default true) */
if (p->current_token.type != TOKEN_SEMICOLON) node->condition = parse_expression(p);
parser_eat(p, TOKEN_SEMICOLON);
/* increment (optional) */
if (p->current_token.type != TOKEN_RPAREN) node->right = parse_expression(p);
parser_eat(p, TOKEN_RPAREN);
node->body = parse_statement(p);
return node;
}
parsing array subscript
di parse_factor, setelah identifier, kalau ditemukan [:
if (p->current_token.type == TOKEN_LBRACKET) {
ast_node_t *sub = ast_new_node(AST_ARRAY_SUBSCRIPT);
sub->string_value = node->string_value; /* array name */
parser_advance(p);
sub->left = parse_expression(p); /* index */
parser_eat(p, TOKEN_RBRACKET);
return sub;
}
codegen (codegen.c)
walk ast node by node, generate x86 assembly.
gen_expression untuk AST_STRING_LITERAL
case AST_STRING_LITERAL:
/* jmp over data */
emit_str(cg, " jmp "); emit_line(cg, end_label);
/* data label */
emit_str(cg, str_label); emit_line(cg, ":");
/* db 72, 101, 108,... (setiap byte sebagai angka) */
emit_str(cg, " db ");
for (int si = 0; string_value[si]; si++) {
char buf[16]; int_to_str((unsigned char)sc, buf);
emit_str(cg, buf);
if (string_value[si+1]) emit_str(cg, ", ");
}
emit_line(cg, ", 0");
/* end label + load address */
emit_str(cg, end_label); emit_line(cg, ":");
emit_str(cg, " mov eax, "); emit_line(cg, str_label);
break;
gen_function untuk parameter
ast_node_t *param = func->params;
while (param) {
int pv = alloc_var(param->string_value);
/* param di [ebp + 8], [ebp + 12], ... */
char tmp_str[16]; int_to_str(param_offset, tmp_str);
emit_str(cg, " mov eax, [ebp + "); emit_str(cg, tmp_str); emit_line(cg, "]");
emit_store_var(cg, pv);
param = param->right;
param_offset += 4;
}
gen_for
/* init */
if (fornode->left) gen_expression(cg, fornode->left);
emit_str(cg, loop_label); emit_line(cg, ":");
/* condition */
if (fornode->condition) gen_expression(cg, fornode->condition);
else emit_line(cg, " mov eax, 1");
emit_line(cg, " cmp eax, 0");
emit_str(cg, " je "); emit_line(cg, end_label);
/* body */
gen_statement(cg, fornode->body);
/* increment */
if (fornode->right) gen_expression(cg, fornode->right);
emit_str(cg, " jmp "); emit_line(cg, loop_label);
emit_str(cg, end_label); emit_line(cg, ":");
codegen untuk array subscript
case AST_ARRAY_SUBSCRIPT:
gen_expression(cg, expr->left); /* index */
emit_line(cg, " shl eax, 2"); /* index * 4 (sizeof int) */
int v = find_var(expr->string_value);
if (v >= 0) {
/* array sebagai local variable */
char b[16]; int_to_str(v, b);
emit_str(cg, " lea ebx, [ebp - "); emit_str(cg, b); emit_line(cg, "]");
emit_line(cg, " mov eax, [ebx + eax]");
}
break;
gen_function_call
push args dari kanan ke kiri (cdecl calling convention):
for (int i = n - 1; i >= 0; i--) {
gen_expression(cg, args[i]);
emit_line(cg, " push eax");
}
emit_str(cg, " call _"); emit_str(cg, call->string_value); emit_line(cg, "");
if (arg_count > 0) {
char cleanup[32];
int_to_str(arg_count * 4, cleanup);
emit_str(cg, " add esp, "); emit_line(cg, cleanup);
}