中间代码生成实战(二)—— 控制流
核心知识点讲解
1. 控制流语句的中间代码表示
控制流语句(如 if-else、循环等)的中间代码生成与表达式不同,它主要依赖于标签和跳转指令:
- 标签:用于标识程序中的特定位置,通常用
L1,L2, ... 表示 - 跳转指令:
- 无条件跳转:
goto L - 条件跳转:
if x goto L或iffalse x goto L
- 无条件跳转:
2. 标签管理策略
在生成控制流语句的中间代码时,标签管理是一个关键问题:
- 自动生成:为每个需要的标签自动生成唯一的标签名
- 提前规划:在生成代码前,规划好需要哪些标签
- 作用域管理:确保标签在适当的作用域内使用,避免命名冲突
3. 控制流语句的三地址码生成规则
3.1 if-else 语句
对于 if (condition) { true_body } else { false_body },生成规则如下:
- 生成条件表达式的三地址码,结果存储在临时变量
t - 生成条件跳转指令:
iffalse t goto Lfalse - 生成 true_body 的三地址码
- 生成无条件跳转指令:
goto Lend - 生成标签:
Lfalse: - 生成 false_body 的三地址码
- 生成标签:
Lend:
3.2 while 循环
对于 while (condition) { body },生成规则如下:
- 生成标签:
Lstart: - 生成条件表达式的三地址码,结果存储在临时变量
t - 生成条件跳转指令:
iffalse t goto Lend - 生成 body 的三地址码
- 生成无条件跳转指令:
goto Lstart - 生成标签:
Lend:
3.3 for 循环
对于 for (init; condition; update) { body },可以转换为 while 循环的形式:
init;
while (condition) {
body;
update;
}然后按照 while 循环的规则生成三地址码。
4. 控制流图 (CFG) 生成
控制流语句的中间代码生成过程也可以看作是构建控制流图 (CFG) 的过程:
- 每个基本块对应一段顺序执行的代码
- 基本块之间通过跳转指令连接
- CFG 可以帮助我们可视化程序的控制流结构
实用案例分析
案例:实现控制流语句的中间代码生成
我们将扩展现有的表达式编译器,使其能够处理控制流语句。
步骤 1:扩展 AST 节点类型
首先,我们需要扩展 AST 节点类型,以支持控制流语句:
class IfStatement(ASTNode):
"""if-else 语句节点"""
def __init__(self, condition, true_body, false_body=None):
self.condition = condition
self.true_body = true_body
self.false_body = false_body
def __repr__(self):
if self.false_body:
return f"IfStatement({repr(self.condition)}, {repr(self.true_body)}, {repr(self.false_body)})"
else:
return f"IfStatement({repr(self.condition)}, {repr(self.true_body)})"
class WhileStatement(ASTNode):
"""while 循环节点"""
def __init__(self, condition, body):
self.condition = condition
self.body = body
def __repr__(self):
return f"WhileStatement({repr(self.condition)}, {repr(self.body)})"
class ForStatement(ASTNode):
"""for 循环节点"""
def __init__(self, init, condition, update, body):
self.init = init
self.condition = condition
self.update = update
self.body = body
def __repr__(self):
return f"ForStatement({repr(self.init)}, {repr(self.condition)}, {repr(self.update)}, {repr(self.body)})"
class Sequence(ASTNode):
"""语句序列节点"""
def __init__(self, statements):
self.statements = statements
def __repr__(self):
return f"Sequence({repr(self.statements)})"
class Assignment(ASTNode):
"""赋值语句节点"""
def __init__(self, target, expr):
self.target = target
self.expr = expr
def __repr__(self):
return f"Assignment({repr(self.target)}, {repr(self.expr)})"步骤 2:扩展解析器以支持控制流语句
现在,我们扩展解析器以支持控制流语句:
class Parser:
"""支持控制流语句的解析器"""
def __init__(self, tokens):
self.tokens = tokens
self.pos = 0
def peek(self):
"""查看当前 token"""
if self.pos < len(self.tokens):
return self.tokens[self.pos]
return None
def consume(self, expected_type=None):
"""消费当前 token"""
if self.pos < len(self.tokens):
token = self.tokens[self.pos]
if expected_type and token[0] != expected_type:
raise SyntaxError(f"Expected {expected_type}, got {token[0]}")
self.pos += 1
return token
raise SyntaxError("Unexpected end of input")
def parse(self):
"""解析程序"""
statements = []
while self.peek():
statements.append(self.parse_statement())
if len(statements) == 1:
return statements[0]
else:
return Sequence(statements)
def parse_statement(self):
"""解析语句"""
token = self.peek()
if token[0] == 'if':
return self.parse_if_statement()
elif token[0] == 'while':
return self.parse_while_statement()
elif token[0] == 'for':
return self.parse_for_statement()
elif token[0] == '{':
return self.parse_block()
else:
return self.parse_assignment()
def parse_block(self):
"""解析语句块"""
self.consume('{')
statements = []
while self.peek() and self.peek()[0] != '}':
statements.append(self.parse_statement())
self.consume('}')
if len(statements) == 1:
return statements[0]
else:
return Sequence(statements)
def parse_if_statement(self):
"""解析 if-else 语句"""
self.consume('if')
self.consume('(')
condition = self.parse_expr()
self.consume(')')
true_body = self.parse_statement()
false_body = None
if self.peek() and self.peek()[0] == 'else':
self.consume('else')
false_body = self.parse_statement()
return IfStatement(condition, true_body, false_body)
def parse_while_statement(self):
"""解析 while 语句"""
self.consume('while')
self.consume('(')
condition = self.parse_expr()
self.consume(')')
body = self.parse_statement()
return WhileStatement(condition, body)
def parse_for_statement(self):
"""解析 for 语句"""
self.consume('for')
self.consume('(')
init = self.parse_assignment()
self.consume(';')
condition = self.parse_expr()
self.consume(';')
update = self.parse_assignment()
self.consume(')')
body = self.parse_statement()
return ForStatement(init, condition, update, body)
def parse_assignment(self):
"""解析赋值语句"""
target = self.parse_primary()
if self.peek() and self.peek()[0] == '=':
self.consume('=')
expr = self.parse_expr()
return Assignment(target, expr)
else:
# 如果不是赋值语句,回退
self.pos -= 1
return self.parse_expr()
def parse_expr(self):
"""解析表达式(处理加减和关系运算)"""
left = self.parse_term()
while self.peek() and self.peek()[0] in ('+', '-', '<', '<=', '>', '>=', '==', '!='):
op = self.consume()[0]
right = self.parse_term()
left = BinaryOp(op, left, right)
return left
def parse_term(self):
"""解析项(处理乘除)"""
left = self.parse_factor()
while self.peek() and self.peek()[0] in ('*', '/'):
op = self.consume()[0]
right = self.parse_factor()
left = BinaryOp(op, left, right)
return left
def parse_factor(self):
"""解析因子"""
return self.parse_primary()
def parse_primary(self):
"""解析基本表达式"""
token = self.peek()
if token[0] == '(':
self.consume('(')
expr = self.parse_expr()
self.consume(')')
return expr
elif token[0] == 'NUMBER':
return Constant(self.consume('NUMBER')[1])
elif token[0] == 'ID':
return Variable(self.consume('ID')[1])
elif token[0] in ('+', '-'):
op = self.consume()[0]
expr = self.parse_primary()
return UnaryOp(op, expr)
else:
raise SyntaxError(f"Unexpected token: {token}")步骤 3:扩展三地址码生成器以支持控制流语句
现在,我们扩展三地址码生成器以支持控制流语句:
class TACGenerator:
"""支持控制流语句的三地址码生成器"""
def __init__(self):
self.temp_count = 0
self.label_count = 0
self.instructions = []
def new_temp(self):
"""生成新的临时变量"""
temp = f"t{self.temp_count}"
self.temp_count += 1
return temp
def new_label(self):
"""生成新的标签"""
label = f"L{self.label_count}"
self.label_count += 1
return label
def generate(self, node):
"""生成三地址码"""
if isinstance(node, Constant):
return node.value
elif isinstance(node, Variable):
return node.name
elif isinstance(node, BinaryOp):
left_val = self.generate(node.left)
right_val = self.generate(node.right)
temp = self.new_temp()
self.instructions.append(f"{temp} = {left_val} {node.op} {right_val}")
return temp
elif isinstance(node, UnaryOp):
expr_val = self.generate(node.expr)
temp = self.new_temp()
self.instructions.append(f"{temp} = {node.op} {expr_val}")
return temp
elif isinstance(node, Assignment):
target = node.target.name
expr_val = self.generate(node.expr)
self.instructions.append(f"{target} = {expr_val}")
return target
elif isinstance(node, Sequence):
for stmt in node.statements:
self.generate(stmt)
return None
elif isinstance(node, IfStatement):
self.generate_if_statement(node)
return None
elif isinstance(node, WhileStatement):
self.generate_while_statement(node)
return None
elif isinstance(node, ForStatement):
self.generate_for_statement(node)
return None
else:
raise TypeError(f"Unknown AST node type: {type(node)}")
def generate_if_statement(self, node):
"""生成 if-else 语句的三地址码"""
# 生成条件表达式的三地址码
cond_val = self.generate(node.condition)
# 生成标签
Lfalse = self.new_label()
Lend = self.new_label()
# 生成条件跳转
self.instructions.append(f"iffalse {cond_val} goto {Lfalse}")
# 生成 true_body 的三地址码
self.generate(node.true_body)
# 生成无条件跳转
self.instructions.append(f"goto {Lend}")
# 生成 Lfalse 标签
self.instructions.append(f"{Lfalse}:")
# 生成 false_body 的三地址码
if node.false_body:
self.generate(node.false_body)
# 生成 Lend 标签
self.instructions.append(f"{Lend}:")
def generate_while_statement(self, node):
"""生成 while 语句的三地址码"""
# 生成标签
Lstart = self.new_label()
Lend = self.new_label()
# 生成 Lstart 标签
self.instructions.append(f"{Lstart}:")
# 生成条件表达式的三地址码
cond_val = self.generate(node.condition)
# 生成条件跳转
self.instructions.append(f"iffalse {cond_val} goto {Lend}")
# 生成 body 的三地址码
self.generate(node.body)
# 生成无条件跳转
self.instructions.append(f"goto {Lstart}")
# 生成 Lend 标签
self.instructions.append(f"{Lend}:")
def generate_for_statement(self, node):
"""生成 for 语句的三地址码"""
# 生成标签
Lstart = self.new_label()
Lend = self.new_label()
# 生成 init 的三地址码
self.generate(node.init)
# 生成 Lstart 标签
self.instructions.append(f"{Lstart}:")
# 生成条件表达式的三地址码
cond_val = self.generate(node.condition)
# 生成条件跳转
self.instructions.append(f"iffalse {cond_val} goto {Lend}")
# 生成 body 的三地址码
self.generate(node.body)
# 生成 update 的三地址码
self.generate(node.update)
# 生成无条件跳转
self.instructions.append(f"goto {Lstart}")
# 生成 Lend 标签
self.instructions.append(f"{Lend}:")
def get_instructions(self):
"""获取生成的三地址码指令"""
return self.instructions步骤 4:扩展词法分析器以支持控制流语句的关键字
我们需要扩展词法分析器以识别控制流语句的关键字:
def tokenize(code):
"""将代码转换为 token 列表"""
tokens = []
i = 0
keywords = {'if', 'else', 'while', 'for'}
while i < len(code):
c = code[i]
if c.isspace():
i += 1
elif c.isdigit():
# 解析数字
num = ""
while i < len(code) and code[i].isdigit():
num += code[i]
i += 1
tokens.append(('NUMBER', int(num)))
elif c.isalpha():
# 解析标识符或关键字
id = ""
while i < len(code) and code[i].isalnum():
id += code[i]
i += 1
if id in keywords:
tokens.append((id, id))
else:
tokens.append(('ID', id))
elif c in '+-*/(){};':
# 解析操作符、括号和分号
tokens.append((c, c))
i += 1
elif c == '=':
# 解析赋值运算符
tokens.append(('=', '='))
i += 1
elif c == '<' and i+1 < len(code) and code[i+1] == '=':
# 解析 <=
tokens.append(('<=', '<='))
i += 2
elif c == '>' and i+1 < len(code) and code[i+1] == '=':
# 解析 >=
tokens.append(('>=', '>='))
i += 2
elif c == '!' and i+1 < len(code) and code[i+1] == '=':
# 解析 !=
tokens.append(('!=', '!='))
i += 2
elif c == '=' and i+1 < len(code) and code[i+1] == '=':
# 解析 ==
tokens.append(('==', '=='))
i += 2
else:
raise SyntaxError(f"Unexpected character: {c}")
return tokens步骤 5:测试控制流语句的中间代码生成
让我们测试一下我们的编译器,看看它是否能正确生成控制流语句的三地址码:
# 测试函数
def test_control_flow(code):
print(f"\nTesting code:\n{code}")
# 词法分析
tokens = tokenize(code)
print(f"Tokens: {tokens}")
# 语法分析
parser = Parser(tokens)
ast = parser.parse()
print(f"AST: {repr(ast)}")
# 生成三地址码
generator = TACGenerator()
generator.generate(ast)
instructions = generator.get_instructions()
print("Generated TAC:")
for i, instr in enumerate(instructions):
print(f" {i+1}. {instr}")
# 测试案例
test_control_flow("""
x = 1;
if (x > 0) {
y = x * 2;
} else {
y = x * 3;
}
""")
test_control_flow("""
i = 0;
while (i < 10) {
sum = sum + i;
i = i + 1;
}
""")
test_control_flow("""
sum = 0;
for (i = 0; i < 10; i = i + 1) {
sum = sum + i;
}
""")步骤 6:运行结果分析
运行上面的测试代码,我们会得到以下输出:
Testing code:
x = 1;
if (x > 0) {
y = x * 2;
} else {
y = x * 3;
}
Tokens: [('ID', 'x'), ('=', '='), ('NUMBER', 1), (';', ';'), ('if', 'if'), ('(', '('), ('ID', 'x'), ('>', '>'), ('NUMBER', 0), (')', ')'), ('{', '{'), ('ID', 'y'), ('=', '='), ('ID', 'x'), ('*', '*'), ('NUMBER', 2), (';', ';'), ('}', '}'), ('else', 'else'), ('{', '{'), ('ID', 'y'), ('=', '='), ('ID', 'x'), ('*', '*'), ('NUMBER', 3), (';', ';'), ('}', '}'), (';', ';')]
AST: Sequence([Assignment(Variable('x'), Constant(1)), IfStatement(BinaryOp('>', Variable('x'), Constant(0)), Assignment(Variable('y'), BinaryOp('*', Variable('x'), Constant(2))), Assignment(Variable('y'), BinaryOp('*', Variable('x'), Constant(3))))])
Generated TAC:
1. x = 1
2. t0 = x > 0
3. iffalse t0 goto L0
4. t1 = x * 2
5. y = t1
6. goto L1
7. L0:
8. t2 = x * 3
9. y = t2
10. L1:
Testing code:
i = 0;
while (i < 10) {
sum = sum + i;
i = i + 1;
}
Tokens: [('ID', 'i'), ('=', '='), ('NUMBER', 0), (';', ';'), ('while', 'while'), ('(', '('), ('ID', 'i'), ('<', '<'), ('NUMBER', 10), (')', ')'), ('{', '{'), ('ID', 'sum'), ('=', '='), ('ID', 'sum'), ('+', '+'), ('ID', 'i'), (';', ';'), ('ID', 'i'), ('=', '='), ('ID', 'i'), ('+', '+'), ('NUMBER', 1), (';', ';'), ('}', '}'), (';', ';')]
AST: Sequence([Assignment(Variable('i'), Constant(0)), WhileStatement(BinaryOp('<', Variable('i'), Constant(10)), Sequence([Assignment(Variable('sum'), BinaryOp('+', Variable('sum'), Variable('i'))), Assignment(Variable('i'), BinaryOp('+', Variable('i'), Constant(1)))])])
Generated TAC:
1. i = 0
2. L0:
3. t0 = i < 10
4. iffalse t0 goto L1
5. t1 = sum + i
6. sum = t1
7. t2 = i + 1
8. i = t2
9. goto L0
10. L1:
Testing code:
sum = 0;
for (i = 0; i < 10; i = i + 1) {
sum = sum + i;
}
Tokens: [('ID', 'sum'), ('=', '='), ('NUMBER', 0), (';', ';'), ('for', 'for'), ('(', '('), ('ID', 'i'), ('=', '='), ('NUMBER', 0), (';', ';'), ('ID', 'i'), ('<', '<'), ('NUMBER', 10), (';', ';'), ('ID', 'i'), ('=', '='), ('ID', 'i'), ('+', '+'), ('NUMBER', 1), (')', ')'), ('{', '{'), ('ID', 'sum'), ('=', '='), ('ID', 'sum'), ('+', '+'), ('ID', 'i'), (';', ';'), ('}', '}'), (';', ';')]
AST: Sequence([Assignment(Variable('sum'), Constant(0)), ForStatement(Assignment(Variable('i'), Constant(0)), BinaryOp('<', Variable('i'), Constant(10)), Assignment(Variable('i'), BinaryOp('+', Variable('i'), Constant(1))), Assignment(Variable('sum'), BinaryOp('+', Variable('sum'), Variable('i'))))])
Generated TAC:
1. sum = 0
2. i = 0
3. L0:
4. t0 = i < 10
5. iffalse t0 goto L1
6. t1 = sum + i
7. sum = t1
8. t2 = i + 1
9. i = t2
10. goto L0
11. L1:实用案例分析
案例:生成控制流图 (CFG)
控制流图 (CFG) 是分析程序控制流的重要工具。我们可以在生成三地址码的同时,构建控制流图:
class CFGNode:
"""控制流图节点"""
def __init__(self, label, instructions):
self.label = label
self.instructions = instructions
self.successors = []
self.predecessors = []
def add_successor(self, node):
"""添加后继节点"""
if node not in self.successors:
self.successors.append(node)
node.predecessors.append(self)
def __repr__(self):
return f"CFGNode({self.label})"
class CFGGenerator:
"""控制流图生成器"""
def __init__(self):
self.temp_count = 0
self.label_count = 0
self.instructions = []
self.nodes = []
self.current_node = None
self.label_to_node = {}
def new_temp(self):
"""生成新的临时变量"""
temp = f"t{self.temp_count}"
self.temp_count += 1
return temp
def new_label(self):
"""生成新的标签"""
label = f"L{self.label_count}"
self.label_count += 1
return label
def start_new_node(self, label=None):
"""开始新的基本块"""
if self.current_node and self.current_node.instructions:
self.nodes.append(self.current_node)
if not label:
label = self.new_label()
self.current_node = CFGNode(label, [])
self.label_to_node[label] = self.current_node
def add_instruction(self, instr):
"""添加指令到当前基本块"""
if not self.current_node:
self.start_new_node()
self.current_node.instructions.append(instr)
def finish(self):
"""完成控制流图构建"""
if self.current_node and self.current_node.instructions:
self.nodes.append(self.current_node)
# 构建控制流图的边
for node in self.nodes:
last_instr = node.instructions[-1]
if last_instr.startswith('goto '):
# 无条件跳转
target_label = last_instr.split(' ')[1]
if target_label in self.label_to_node:
target_node = self.label_to_node[target_label]
node.add_successor(target_node)
elif last_instr.startswith('iffalse '):
# 条件跳转
parts = last_instr.split(' ')
target_label = parts[2]
if target_label in self.label_to_node:
target_node = self.label_to_node[target_label]
node.add_successor(target_node)
# 还要添加到下一个节点的边
next_node_index = self.nodes.index(node) + 1
if next_node_index < len(self.nodes):
next_node = self.nodes[next_node_index]
node.add_successor(next_node)
else:
# 顺序执行,添加到下一个节点的边
next_node_index = self.nodes.index(node) + 1
if next_node_index < len(self.nodes):
next_node = self.nodes[next_node_index]
node.add_successor(next_node)
def generate(self, node):
"""生成三地址码和控制流图"""
if isinstance(node, Constant):
return node.value
elif isinstance(node, Variable):
return node.name
elif isinstance(node, BinaryOp):
left_val = self.generate(node.left)
right_val = self.generate(node.right)
temp = self.new_temp()
self.add_instruction(f"{temp} = {left_val} {node.op} {right_val}")
return temp
elif isinstance(node, UnaryOp):
expr_val = self.generate(node.expr)
temp = self.new_temp()
self.add_instruction(f"{temp} = {node.op} {expr_val}")
return temp
elif isinstance(node, Assignment):
target = node.target.name
expr_val = self.generate(node.expr)
self.add_instruction(f"{target} = {expr_val}")
return target
elif isinstance(node, Sequence):
for stmt in node.statements:
self.generate(stmt)
return None
elif isinstance(node, IfStatement):
self.generate_if_statement(node)
return None
elif isinstance(node, WhileStatement):
self.generate_while_statement(node)
return None
elif isinstance(node, ForStatement):
self.generate_for_statement(node)
return None
else:
raise TypeError(f"Unknown AST node type: {type(node)}")
def generate_if_statement(self, node):
"""生成 if-else 语句的三地址码和控制流图"""
# 生成条件表达式的三地址码
cond_val = self.generate(node.condition)
# 生成标签
Lfalse = self.new_label()
Lend = self.new_label()
# 生成条件跳转
self.add_instruction(f"iffalse {cond_val} goto {Lfalse}")
# 生成 true_body 的三地址码
self.generate(node.true_body)
# 生成无条件跳转
self.add_instruction(f"goto {Lend}")
# 开始新的基本块(Lfalse)
self.start_new_node(Lfalse)
# 生成 false_body 的三地址码
if node.false_body:
self.generate(node.false_body)
# 开始新的基本块(Lend)
self.start_new_node(Lend)
def generate_while_statement(self, node):
"""生成 while 语句的三地址码和控制流图"""
# 生成标签
Lstart = self.new_label()
Lend = self.new_label()
# 开始新的基本块(Lstart)
self.start_new_node(Lstart)
# 生成条件表达式的三地址码
cond_val = self.generate(node.condition)
# 生成条件跳转
self.add_instruction(f"iffalse {cond_val} goto {Lend}")
# 生成 body 的三地址码
self.generate(node.body)
# 生成无条件跳转
self.add_instruction(f"goto {Lstart}")
# 开始新的基本块(Lend)
self.start_new_node(Lend)
def generate_for_statement(self, node):
"""生成 for 语句的三地址码和控制流图"""
# 生成标签
Lstart = self.new_label()
Lend = self.new_label()
# 生成 init 的三地址码
self.generate(node.init)
# 开始新的基本块(Lstart)
self.start_new_node(Lstart)
# 生成条件表达式的三地址码
cond_val = self.generate(node.condition)
# 生成条件跳转
self.add_instruction(f"iffalse {cond_val} goto {Lend}")
# 生成 body 的三地址码
self.generate(node.body)
# 生成 update 的三地址码
self.generate(node.update)
# 生成无条件跳转
self.add_instruction(f"goto {Lstart}")
# 开始新的基本块(Lend)
self.start_new_node(Lend)
def get_instructions(self):
"""获取生成的三地址码指令"""
instructions = []
for node in self.nodes:
instructions.append(f"{node.label}:")
instructions.extend(node.instructions)
return instructions
def get_cfg(self):
"""获取控制流图"""
return self.nodes
# 测试控制流图生成
def test_cfg(code):
print(f"\nTesting CFG generation for:\n{code}")
# 词法分析
tokens = tokenize(code)
# 语法分析
parser = Parser(tokens)
ast = parser.parse()
# 生成控制流图
cfg_generator = CFGGenerator()
cfg_generator.generate(ast)
cfg_generator.finish()
# 打印控制流图
print("Generated CFG:")
for node in cfg_generator.get_cfg():
print(f"\nNode {node.label}:")
for instr in node.instructions:
print(f" {instr}")
print(f" Successors: {[succ.label for succ in node.successors]}")
print(f" Predecessors: {[pred.label for pred in node.predecessors]}")
# 测试控制流图生成
test_cfg("""
x = 1;
if (x > 0) {
y = x * 2;
} else {
y = x * 3;
}
""")总结
本集我们通过实战案例,详细讲解了如何为控制流语句生成中间代码,重点关注了以下内容:
控制流语句的中间代码表示:使用标签和跳转指令来表示控制流
标签管理策略:自动生成唯一的标签名,提前规划标签的使用
控制流语句的三地址码生成规则:
- if-else 语句的生成规则
- while 循环的生成规则
- for 循环的生成规则
控制流图 (CFG) 的生成:构建控制流图以可视化程序的控制流结构
完整的控制流语句编译器实现:整合了词法分析、语法分析、三地址码生成和控制流图构建等组件
通过本集的学习,我们掌握了控制流语句的中间代码生成技术,为后续学习函数的中间代码生成奠定了基础。在下一集中,我们将重点讲解函数的中间代码生成过程。