|
| 1 | +/* |
| 2 | + * Copyright (C) 2018-2023 Chatopera Inc, <https://www.chatopera.com> |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package com.chatopera.bot.sdk; |
| 18 | + |
| 19 | +import com.chatopera.bot.exception.ChatbotException; |
| 20 | +import com.chatopera.bot.exception.ResourceInvalidException; |
| 21 | +import com.chatopera.bot.exception.ResourceOperationException; |
| 22 | +import com.chatopera.bot.sdk.basics.Response; |
| 23 | +import com.chatopera.bot.sdk.models.Status; |
| 24 | +import com.chatopera.bot.utils.Logger; |
| 25 | +import org.apache.commons.lang3.StringUtils; |
| 26 | +import org.json.JSONObject; |
| 27 | + |
| 28 | +import java.net.MalformedURLException; |
| 29 | + |
| 30 | +/** |
| 31 | + * 训练管理 |
| 32 | + */ |
| 33 | +public class TrainMgr { |
| 34 | + |
| 35 | + private Chatbot chatbot; |
| 36 | + |
| 37 | + // 不支持定义空实例 |
| 38 | + private TrainMgr() { |
| 39 | + } |
| 40 | + |
| 41 | + public TrainMgr(final String clientId, final String clientSecret, final String baseUrl) throws MalformedURLException, ChatbotException, MalformedURLException, ChatbotException { |
| 42 | + this.chatbot = new Chatbot(clientId, clientSecret, baseUrl); |
| 43 | + } |
| 44 | + |
| 45 | + public TrainMgr(final String clientId, final String clientSecret) throws MalformedURLException, ChatbotException { |
| 46 | + this.chatbot = new Chatbot(clientId, clientSecret); |
| 47 | + } |
| 48 | + |
| 49 | + /** |
| 50 | + * Get Chatbot Status |
| 51 | + * |
| 52 | + * @return |
| 53 | + */ |
| 54 | + public Status getStatus() throws ChatbotException, ResourceInvalidException { |
| 55 | + Response resp = this.chatbot.command("GET", "/clause/devver/build"); |
| 56 | + if (resp.getRc() == 0) { |
| 57 | + Status status = new Status(resp.getStatus()); |
| 58 | + return status; |
| 59 | + } else { |
| 60 | + throw new ChatbotException("Unexpected status result. " + (StringUtils.isNotBlank(resp.getError()) ? resp.getError() : "")); |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + /** |
| 65 | + * 训练多轮对话脚本 |
| 66 | + * |
| 67 | + * @return |
| 68 | + */ |
| 69 | + protected boolean trainConversations() throws ChatbotException, ResourceInvalidException { |
| 70 | + Status currentStatus = getStatus(); |
| 71 | + |
| 72 | + if (currentStatus.getReparse() != 0) { |
| 73 | + // 多轮对话待同步 |
| 74 | + Response resp = this.chatbot.command("POST", "/conversation/sync/customdicts"); |
| 75 | + return resp.getRc() == 0; |
| 76 | + } else { |
| 77 | + // 服务器端机器人多轮对话已经同步最新词典 |
| 78 | + return true; |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + /** |
| 83 | + * 训练意图识别,返回提交结果,该任务提交成功后,异步执行。 |
| 84 | + * |
| 85 | + * @return true: 开始执行训练(此时服务器在执行任务,训练时间取决于数据量);false:不能正常执行训练 |
| 86 | + * @throws ChatbotException |
| 87 | + * @throws ResourceInvalidException |
| 88 | + */ |
| 89 | + protected boolean trainIntents() throws ChatbotException, ResourceInvalidException, ResourceOperationException { |
| 90 | + Status currentStatus = getStatus(); |
| 91 | + |
| 92 | + if (currentStatus.getRetrain() != 0) { |
| 93 | + // 意图识别模型待训练 |
| 94 | + Response resp = this.chatbot.command("POST", "/clause/devver/train"); |
| 95 | + if (resp.getRc() == 0) { |
| 96 | + // 提交并开始执行训练 |
| 97 | + return true; |
| 98 | + } else if (resp.getRc() == 21 || resp.getRc() == 22 || resp.getRc() == 24) { |
| 99 | + Logger.warn("[trainIntents] 没有意图或意图没有说法,此时不需要训练。"); |
| 100 | + return true; |
| 101 | + } else if (resp.getRc() == 25) { |
| 102 | + Logger.warn("[trainIntents] 存在不合法的词典信息,无法开始训练"); |
| 103 | + return false; |
| 104 | + } else { |
| 105 | + throw new ResourceOperationException("[trainIntents] Unexpected operation results."); |
| 106 | + } |
| 107 | + } else { |
| 108 | + // 训练任务未开始,因为服务器端意图识别模型已经同步最新的训练数据 |
| 109 | + // 不需要重新训练 |
| 110 | + return true; |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + |
| 115 | + /** |
| 116 | + * 训练知识库,该任务提交成功后,异步执行。 |
| 117 | + * |
| 118 | + * @return 训练任务是否启动。 |
| 119 | + */ |
| 120 | + protected boolean trainFAQs() throws ResourceInvalidException, ChatbotException { |
| 121 | + Status currentStatus = getStatus(); |
| 122 | + |
| 123 | + if (currentStatus.getReindex() != 0) { |
| 124 | + // 多轮对话待同步 |
| 125 | + Response resp = this.chatbot.command("POST", "/faq/sync/customdicts"); |
| 126 | + return resp.getRc() == 0; |
| 127 | + } else { |
| 128 | + // 服务器端机器人知识库索引已经与最新词典信息一致,不需要重新训练 |
| 129 | + return true; |
| 130 | + } |
| 131 | + } |
| 132 | + |
| 133 | + /** |
| 134 | + * Start in parallel, train all domains |
| 135 | + */ |
| 136 | + public void trainAll() { |
| 137 | + |
| 138 | + try { |
| 139 | + this.trainConversations(); |
| 140 | + } catch (Exception e) { |
| 141 | + Logger.trace("[trainAll] unexpected result to start train conversation."); |
| 142 | + e.printStackTrace(); |
| 143 | + } |
| 144 | + |
| 145 | + try { |
| 146 | + this.trainFAQs(); |
| 147 | + } catch (Exception e) { |
| 148 | + Logger.trace("[trainAll] unexpected result to start train faqs."); |
| 149 | + e.printStackTrace(); |
| 150 | + } |
| 151 | + |
| 152 | + try { |
| 153 | + this.trainIntents(); |
| 154 | + } catch (Exception e) { |
| 155 | + Logger.trace("[trainAll] unexpected result to start train intents."); |
| 156 | + e.printStackTrace(); |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + /** |
| 161 | + * 等待训练任务结束 |
| 162 | + */ |
| 163 | + public void waitForJobsDone() throws InterruptedException, ResourceInvalidException, ChatbotException { |
| 164 | + while (!isUpdated()) { |
| 165 | + Logger.warn("TrainMgr [waitForJobsDone] still in progress ..."); |
| 166 | + Thread.sleep(10000); |
| 167 | + } |
| 168 | + } |
| 169 | + |
| 170 | + |
| 171 | + /** |
| 172 | + * 检查机器人的训练任务是否已经结束 |
| 173 | + * |
| 174 | + * @return |
| 175 | + */ |
| 176 | + private boolean isUpdated() throws ResourceInvalidException, ChatbotException { |
| 177 | + final Status status = getStatus(); |
| 178 | + |
| 179 | + boolean isUpdatedIntents = true; |
| 180 | + boolean isUpdatedFAQs = true; |
| 181 | + |
| 182 | + /** |
| 183 | + * 检查多轮对话状态 |
| 184 | + * 多轮对话的重新训练不是异步的,在提交了训练任务后,是立即返回结果的 |
| 185 | + */ |
| 186 | + boolean isUpdatedConversations = true; |
| 187 | + |
| 188 | + /** |
| 189 | + * 检查意图识别状态 |
| 190 | + */ |
| 191 | + if (status.getRetrain() == 1) { |
| 192 | + // 同步中 |
| 193 | + isUpdatedIntents = false; |
| 194 | + } else { |
| 195 | + // 以下情况,返回 true |
| 196 | + // * 意图识别完成同步 |
| 197 | + // * 训练过程中,意图识别数据发生变化,本次训练不有意义 |
| 198 | + isUpdatedIntents = true; |
| 199 | + } |
| 200 | + |
| 201 | + /** |
| 202 | + * 检查 FAQs 状态 |
| 203 | + */ |
| 204 | + if (status.getReindex() == 1) { |
| 205 | + // 正在更新 |
| 206 | + isUpdatedFAQs = false; |
| 207 | + } else { |
| 208 | + // 以下情况,返回 true |
| 209 | + // * 已经同步 |
| 210 | + // * 知识库或自定义词典变更,无法继续同步,可返回后重新提交。 |
| 211 | + isUpdatedFAQs = true; |
| 212 | + } |
| 213 | + |
| 214 | + |
| 215 | + return isUpdatedIntents && isUpdatedFAQs && isUpdatedConversations; |
| 216 | + } |
| 217 | + |
| 218 | + |
| 219 | +} |
0 commit comments