-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreference_lora.py
More file actions
132 lines (114 loc) · 3.78 KB
/
reference_lora.py
File metadata and controls
132 lines (114 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
assert torch.cuda.is_available()
device = torch.device("cuda")
def load_lora_model(base_model_name, lora_path, device="cuda"):
"""
load base model and lora adapter
:param base_model_name: name/path of base model
:param lora_path: path of LoRA adapter
:param device: running device (cuda/cpu)
:return: model and tokenizer
"""
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
base_model_name,
trust_remote_code=True,
padding_side="right"
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# load base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
# device_map="auto",
device_map={"": device}
)
# load lora adapter
model = PeftModel.from_pretrained(base_model, lora_path)
model = model.merge_and_unload()
model.save_pretrained("./output/DeepSeek_full")
tokenizer.save_pretrained("./output/DeepSeek_full")
model.to(device)
model.eval()
return model, tokenizer
def build_prompt(instruction, input_context=None):
"""
construct prompt
:param instruction: instruction description
:param input_context: input context
:return: prompt
"""
if input_context:
return f"User: {instruction}\n{input_context}\n\nAssistant: "
else:
return f"User: {instruction}\n\nAssistant: "
def generate_code(model, tokenizer, prompt, generation_config=None):
default_config = {
"max_new_tokens": 1024,
"temperature": 0.8,
"top_p": 0.95,
"do_sample": True,
"pad_token_id": tokenizer.eos_token_id,
"eos_token_id": tokenizer.eos_token_id
}
if generation_config:
default_config.update(generation_config)
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=2048
).to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
**default_config
)
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
assistant_start = full_response.find("Assistant: ") + len("Assistant: ")
code = full_response[assistant_start:].strip()
return code
def main(load_8bit: bool = False,
base_model: str = "",
lora_weights: str = "",
):
DEVICE = "cuda"
BASE_MODEL = base_model or os.environ.get("BASE_MODEL", "")
assert (
BASE_MODEL
), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
LORA_PATH = lora_weights or os.environ.get("LORA_PATH", "")
# load model
model, tokenizer = load_lora_model(BASE_MODEL, LORA_PATH, DEVICE)
# prompt construction
instruction = '''
from typing import List
def mean_absolute_deviation(numbers: List[float]) -> float:
""" For a given list of input numbers, calculate Mean Absolute Deviation
around the mean of this dataset.
Mean Absolute Deviation is the average absolute difference between each
element and a centerpoint (mean in this case):
MAD = average | x - x_mean |
>>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])
1.0
"""
'''
prompt = build_prompt(instruction)
generation_config = {
"max_new_tokens": 256,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1
}
generated_code = generate_code(model, tokenizer, prompt, generation_config)
# show code
print("="*50)
print("Code:")
print("="*50)
print(generated_code)
if __name__ == "__main__":
main()