CodeGPT/repl.py at main · xlisp/CodeGPT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#!/usr/bin/env python3
"""
CodeGPT REPL — 代码补全交互式命令行

用法:
    python repl.py                        # 加载默认检查点
    python repl.py --out_dir=out-codegpt  # 指定检查点目录
    python repl.py --temperature=0.3      # 调整温度

REPL 内命令:
    /help               显示帮助
    /fim                进入 FIM 填空模式
    /complete           回到补全模式（默认）
    /context            显示当前对话上下文
    /reset              清空上下文，重新开始
    /temp <float>       调整采样温度（如 /temp 0.3）
    /tokens <int>       调整最大生成 token 数
    /topk <int>         调整 top-k 参数
    /lang <str>         设置语言（python/javascript/go 等）
    /quit               退出
"""

import os
import sys
import argparse
import textwrap
from contextlib import nullcontext

import torch

from model import CodeGPT, CodeGPTConfig
from tokenizer import CodeTokenizer, SPECIAL_TOKENS

# ─────────────────────────── ANSI 颜色 ────────────────────────────
RESET  = "\033[0m"
BOLD   = "\033[1m"
DIM    = "\033[2m"
GREEN  = "\033[32m"
CYAN   = "\033[36m"
YELLOW = "\033[33m"
BLUE   = "\033[34m"
RED    = "\033[31m"
GRAY   = "\033[90m"

def c(text, color): return f"{color}{text}{RESET}"

# ─────────────────────────── 参数解析 ─────────────────────────────
def parse_args():
    p = argparse.ArgumentParser(description="CodeGPT REPL")
    p.add_argument("--out_dir",    default="out-codegpt")
    p.add_argument("--temperature",type=float, default=0.3)
    p.add_argument("--top_k",      type=int,   default=50)
    p.add_argument("--top_p",      type=float, default=0.95)
    p.add_argument("--max_tokens", type=int,   default=200)
    p.add_argument("--rep_penalty",type=float, default=1.1)
    p.add_argument("--lang",       default="python")
    p.add_argument("--device",     default=None)
    return p.parse_args()

# ─────────────────────────── 模型加载 ─────────────────────────────
def load_model(out_dir, device):
    ckpt_path = os.path.join(out_dir, "ckpt.pt")
    if not os.path.exists(ckpt_path):
        print(c(f"错误：找不到检查点 {ckpt_path}", RED))
        sys.exit(1)

    print(c("  加载模型中...", DIM), end="", flush=True)
    ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
    config = CodeGPTConfig(**ckpt["model_args"])
    model = CodeGPT(config)

    sd = ckpt["model"]
    for k in list(sd):
        if k.startswith("_orig_mod."):
            sd[k[10:]] = sd.pop(k)
    model.load_state_dict(sd)
    model.eval().to(device)

    iter_num = ckpt.get("iter_num", "?")
    val_loss = ckpt.get("best_val_loss", "?")
    if isinstance(val_loss, torch.Tensor):
        val_loss = f"{val_loss.item():.4f}"
    print(c(f" 完成（iter={iter_num}, val_loss={val_loss}）", GREEN))
    return model, config

# ─────────────────────────── 编码工具 ─────────────────────────────
def encode_complete(tokenizer, text, lang):
    ids = [SPECIAL_TOKENS["<|code_start|>"]]
    lang_tok = f"<|lang:{lang}|>"
    if lang_tok in SPECIAL_TOKENS:
        ids.append(SPECIAL_TOKENS[lang_tok])
    ids.extend(tokenizer.encode_raw(text))
    return ids

def encode_fim(tokenizer, prefix, suffix):
    ids  = [SPECIAL_TOKENS["<|fim_prefix|>"]]
    ids += tokenizer.encode_raw(prefix)
    ids += [SPECIAL_TOKENS["<|fim_suffix|>"]]
    ids += tokenizer.encode_raw(suffix)
    ids += [SPECIAL_TOKENS["<|fim_middle|>"]]
    return ids

# ─────────────────────────── 生成 ─────────────────────────────────
def generate(model, tokenizer, input_ids, ctx, device,
             max_tokens, temperature, top_k, top_p, rep_penalty):
    stop_tokens = [SPECIAL_TOKENS["<|endoftext|>"], SPECIAL_TOKENS["<|code_end|>"]]
    x = torch.tensor(input_ids, dtype=torch.long, device=device)[None]
    with torch.no_grad():
        with ctx:
            y = model.generate(
                x,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                stop_tokens=stop_tokens,
                repetition_penalty=rep_penalty,
            )
    generated = y[0].tolist()[len(input_ids):]
    return tokenizer.decode(generated), generated

# ─────────────────────────── 输出渲染 ─────────────────────────────
def print_completion(prompt_text, completion_text):
    print()
    # 打印 prompt（暗色）
    for line in prompt_text.splitlines():
        print(c("  " + line, DIM))
    # 打印补全（高亮绿色）
    if completion_text.strip():
        for line in completion_text.splitlines():
            print(c("  " + line, GREEN))
    else:
        print(c("  （模型未生成有意义内容，尝试调低 temperature）", YELLOW))
    print()

def print_header(config, args, device):
    print()
    print(c("╔══════════════════════════════════════════════════╗", CYAN))
    print(c("║           CodeGPT  交互式代码补全 REPL           ║", CYAN))
    print(c("╚══════════════════════════════════════════════════╝", RESET))
    print(f"  模型参数:  {config.n_layer}层 × {config.n_head}头 × {config.n_embd}维  "
          f"({sum(p.numel() for p in []) or '123.59'}M params)")
    print(f"  设备:      {device}")
    print(f"  温度:      {args.temperature}    top_k: {args.top_k}    最大 tokens: {args.max_tokens}")
    print(f"  语言:      {args.lang}")
    print()
    print(c("  输入代码片段按 Enter 补全。空行结束多行输入。", GRAY))
    print(c("  输入 /help 查看所有命令。", GRAY))
    print()

def print_help():
    print(c(textwrap.dedent("""
    ┌─ 命令 ──────────────────────────────────────────────┐
    │  /help              显示此帮助                       │
    │  /fim               进入 FIM 填空模式                │
    │  /complete          回到普通补全模式                  │
    │  /context           显示当前累积上下文               │
    │  /reset             清空上下文                       │
    │  /temp <n>          设置温度  (当前: {temp})         │
    │  /tokens <n>        设置最大 token 数                │
    │  /topk <n>          设置 top-k                       │
    │  /lang <name>       设置语言 (python/go/js/...)      │
    │  /quit  /exit  q    退出                             │
    └──────────────────────────────────────────────────────┘
    多行输入：以 : {{ \\ 结尾自动进入多行，空行结束。
    """).strip(), CYAN))
    print()

# ─────────────────────────── 多行输入 ────────────────────────────
def read_multiline(first_line):
    """读取多行输入：首行后若以 : { \\ 结尾则继续读，空行结束。
    返回 (代码文本, 挂起的命令行 or None)。
    """
    lines = [first_line]
    triggers = (":", "{", "\\", ",", "(", "[")
    if not first_line.rstrip().endswith(triggers):
        return "\n".join(lines), None
    print(c("  (多行模式，空行结束输入)", GRAY))
    pending_cmd = None
    while True:
        try:
            line = input(c("  ... ", BLUE))
        except (EOFError, KeyboardInterrupt):
            break
        if line == "":
            break
        if line.startswith("/") or line.lower() in ("q", "quit", "exit"):
            pending_cmd = line
            break
        lines.append(line)
    return "\n".join(lines), pending_cmd

# ─────────────────────────── 主 REPL ─────────────────────────────
def repl(model, tokenizer, config, args, device, ctx):
    mode      = "complete"   # "complete" | "fim"
    context   = ""           # 累积上下文（complete 模式）
    temp      = args.temperature
    top_k     = args.top_k
    max_tokens= args.max_tokens
    lang      = args.lang

    print_header(config, args, device)

    while True:
        # ── 提示符 ──
        if mode == "fim":
            prompt_prefix = c("[FIM] prefix> ", YELLOW)
        else:
            ctx_indicator = c(f"[+{len(context)}字符]", GRAY) if context else ""
            prompt_prefix = c(">>> ", CYAN) + ctx_indicator + " "

        try:
            raw = input(prompt_prefix).rstrip()
        except (EOFError, KeyboardInterrupt):
            print(c("\n再见！", GREEN))
            break

        if not raw:
            continue

        # ── 命令处理 ──
        if raw.startswith("/"):
            cmd = raw.split()[0].lower()
            rest = raw[len(cmd):].strip()

            if cmd in ("/quit", "/exit", "/q"):
                print(c("再见！", GREEN))
                break
            elif cmd == "/help":
                print_help()
            elif cmd == "/fim":
                mode = "fim"
                print(c("  已切换到 FIM 填空模式。先输入 prefix，再输入 suffix。", YELLOW))
            elif cmd == "/complete":
                mode = "complete"
                context = ""
                print(c("  已切换到补全模式，上下文已清空。", GREEN))
            elif cmd == "/reset":
                context = ""
                print(c("  上下文已清空。", GREEN))
            elif cmd == "/context":
                if context:
                    print(c("  当前上下文：", GRAY))
                    for line in context.splitlines():
                        print(c("    " + line, DIM))
                else:
                    print(c("  上下文为空。", GRAY))
                print()
            elif cmd == "/temp":
                try:
                    temp = float(rest)
                    print(c(f"  温度设为 {temp}", GREEN))
                except ValueError:
                    print(c("  用法：/temp 0.3", RED))
            elif cmd == "/tokens":
                try:
                    max_tokens = int(rest)
                    print(c(f"  最大 token 数设为 {max_tokens}", GREEN))
                except ValueError:
                    print(c("  用法：/tokens 200", RED))
            elif cmd == "/topk":
                try:
                    top_k = int(rest)
                    print(c(f"  top-k 设为 {top_k}", GREEN))
                except ValueError:
                    print(c("  用法：/topk 50", RED))
            elif cmd == "/lang":
                if rest:
                    lang = rest
                    print(c(f"  语言设为 {lang}", GREEN))
                else:
                    print(c("  用法：/lang python", RED))
            else:
                print(c(f"  未知命令：{cmd}，输入 /help 查看帮助", RED))
            continue

        # 退出别名
        if raw.lower() in ("q", "quit", "exit"):
            print(c("再见！", GREEN))
            break

        # ── FIM 模式：两段输入 ──
        if mode == "fim":
            prefix_text, _ = read_multiline(raw)
            print(c("[FIM] suffix> ", YELLOW), end="", flush=True)
            try:
                suffix_raw = input().rstrip()
            except (EOFError, KeyboardInterrupt):
                print()
                continue
            suffix_text, _ = read_multiline(suffix_raw) if suffix_raw else ("", None)

            print(c("  生成中...", DIM), end="\r", flush=True)
            input_ids = encode_fim(tokenizer, prefix_text, suffix_text)
            completion, _ = generate(
                model, tokenizer, input_ids, ctx, device,
                max_tokens, temp, top_k, top_p=args.top_p, rep_penalty=args.rep_penalty,
            )
            print(" " * 20, end="\r")  # 清除"生成中"
            print()
            print(c("  ┌─ FIM 填空结果 ─────────────────────────────", YELLOW))
            print(c("  │ prefix:  ", DIM) + prefix_text.replace("\n", "↵ "))
            print(c("  │ infill:  ", GREEN) + completion.replace("\n", "↵ "))
            print(c("  │ suffix:  ", DIM) + suffix_text.replace("\n", "↵ "))
            print(c("  └───────────────────────────────────────────", YELLOW))
            print()
            continue

        # ── 补全模式 ──
        user_input, pending_cmd = read_multiline(raw)

        # 把新输入追加到上下文
        if context:
            full_prompt = context + "\n" + user_input
        else:
            full_prompt = user_input

        print(c("  生成中...", DIM), end="\r", flush=True)
        input_ids = encode_complete(tokenizer, full_prompt, lang)
        completion, _ = generate(
            model, tokenizer, input_ids, ctx, device,
            max_tokens, temp, top_k, top_p=args.top_p, rep_penalty=args.rep_penalty,
        )
        print(" " * 20, end="\r")  # 清除"生成中"

        print_completion(full_prompt, completion)

        # 把 prompt + 补全追加到上下文，供下次连续输入
        context = full_prompt + completion
        # 防止上下文超过 block_size 的 token 预算，简单按字符截断（保留尾部）
        max_ctx_chars = config.block_size * 3  # 粗略估计 3字符/token
        if len(context) > max_ctx_chars:
            context = context[-max_ctx_chars:]
            print(c(f"  （上下文过长，已裁剪保留最后 {max_ctx_chars} 字符）", GRAY))

        # 多行模式中截获的命令，立即执行
        if pending_cmd:
            if pending_cmd.lower() in ("/quit", "/exit", "q", "quit", "exit"):
                print(c("再见！", GREEN))
                break
            elif pending_cmd == "/reset":
                context = ""
                print(c("  上下文已清空。", GREEN))
            elif pending_cmd == "/context":
                for line in context.splitlines()[:20]:
                    print(c("    " + line, DIM))


# ─────────────────────────── 入口 ─────────────────────────────────
def main():
    args = parse_args()

    # 设备
    if args.device:
        device = args.device
    elif torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"

    # dtype（GTX 1080 必须 float16）
    if device == "cuda":
        dtype = torch.float16
    else:
        dtype = torch.float32

    ctx = (nullcontext() if device == "cpu"
           else torch.amp.autocast(device_type=device, dtype=dtype))

    torch.manual_seed(42)
    if device == "cuda":
        torch.cuda.manual_seed(42)
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

    model, config = load_model(args.out_dir, device)
    tokenizer = CodeTokenizer()

    repl(model, tokenizer, config, args, device, ctx)


if __name__ == "__main__":
    main()