feat(sj_1.0.0): 1、 定时任务告警新增失败原因 2、完善失败场景告警

This commit is contained in:
opensnail 2024-05-07 18:44:03 +08:00
parent 30221e8339
commit cd3ec5f781
10 changed files with 58 additions and 23 deletions

View File

@ -13,13 +13,10 @@ public enum JobNotifySceneEnum {
/********************************Job****************************************/
JOB_TASK_ERROR(1, "JOB任务执行失败", NodeTypeEnum.SERVER),
JOB_CLIENT_ERROR(2, "客户端执行失败", NodeTypeEnum.CLIENT),
/********************************Workflow****************************************/
WORKFLOW_TASK_ERROR(100, "Workflow任务执行失败", NodeTypeEnum.SERVER),
WORKFLOW_TASK_CALLBACK_ERROR(101, "回调节点任务执行失败", NodeTypeEnum.SERVER),
WORKFLOW_TASK_DECISION_ERROR(102, "判定节点任务执行失败", NodeTypeEnum.SERVER),
WORKFLOW_TASK_ERROR(100, "Workflow任务执行失败", NodeTypeEnum.SERVER)
;
/**

View File

@ -6,6 +6,7 @@ import lombok.Getter;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
/**
* 标识某个操作的具体原因
@ -47,4 +48,10 @@ public enum JobOperationReasonEnum {
WORKFLOW_NODE_NO_REQUIRED.getReason(), WORKFLOW_DECISION_FAILED.getReason(),
WORKFLOW_CONDITION_NODE_EXECUTION_ERROR.getReason());
public static JobOperationReasonEnum getByReason(Integer reason) {
if (Objects.isNull(reason)) {
return NONE;
}
return Arrays.stream(values()).filter(e -> reason.equals(e.reason)).findFirst().orElse(NONE);
}
}

View File

@ -25,11 +25,14 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.RateLimiter;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.NotNull;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationListener;
import org.springframework.scheduling.TaskScheduler;
import org.springframework.transaction.event.TransactionPhase;
import org.springframework.transaction.event.TransactionalApplicationListener;
import org.springframework.util.CollectionUtils;
import java.time.Duration;
@ -43,7 +46,8 @@ import java.util.stream.Collectors;
* @since 2.5.0
*/
@Slf4j
public abstract class AbstractAlarm<E extends ApplicationEvent, A extends AlarmInfo> implements ApplicationListener<E>,
public abstract class AbstractAlarm<E extends ApplicationEvent, A extends AlarmInfo> implements
TransactionalApplicationListener<E>,
Runnable,
Lifecycle {
@ -218,6 +222,12 @@ public abstract class AbstractAlarm<E extends ApplicationEvent, A extends AlarmI
}
protected abstract int getNotifyScene();
@NotNull
@Override
public TransactionPhase getTransactionPhase() {
return TransactionPhase.AFTER_COMPLETION;
}
}

View File

@ -27,7 +27,14 @@ public class JobAlarmInfo extends AlarmInfo {
*/
private String executorInfo;
/**
* 执行参数
*/
private String argsStr;
/**
* 操作原因
*/
private Integer operationReason;
}

View File

@ -1,7 +1,9 @@
package com.aizuda.snailjob.server.job.task.support.alarm.listener;
import cn.hutool.core.util.StrUtil;
import com.aizuda.snailjob.common.core.alarm.AlarmContext;
import com.aizuda.snailjob.common.core.enums.JobNotifySceneEnum;
import com.aizuda.snailjob.common.core.enums.JobOperationReasonEnum;
import com.aizuda.snailjob.common.core.util.EnvironmentUtils;
import com.aizuda.snailjob.common.log.SnailJobLog;
import com.aizuda.snailjob.server.common.AlarmInfoConverter;
@ -49,6 +51,7 @@ public class JobTaskFailAlarmListener extends AbstractJobAlarm<JobTaskFailAlarmE
> 组名称:{} \s
> 任务名称:{} \s
> 执行器名称:{} \s
> 失败原因:{} \s
> 方法参数:{} \s
> 时间:{};
""";
@ -72,17 +75,19 @@ public class JobTaskFailAlarmListener extends AbstractJobAlarm<JobTaskFailAlarmE
@Override
protected AlarmContext buildAlarmContext(JobAlarmInfo alarmDTO, NotifyConfigInfo notifyConfig) {
String desc = JobOperationReasonEnum.getByReason(alarmDTO.getOperationReason()).getDesc();
// 预警
return AlarmContext.build()
.text(MESSAGES_FORMATTER,
EnvironmentUtils.getActiveProfile(),
alarmDTO.getNamespaceId(),
alarmDTO.getGroupName(),
alarmDTO.getJobName(),
alarmDTO.getExecutorInfo(),
alarmDTO.getArgsStr(),
DateUtils.toNowFormat(DateUtils.NORM_DATETIME_PATTERN))
.title("{}环境 JOB任务失败", EnvironmentUtils.getActiveProfile());
.text(MESSAGES_FORMATTER,
EnvironmentUtils.getActiveProfile(),
alarmDTO.getNamespaceId(),
alarmDTO.getGroupName(),
alarmDTO.getJobName(),
alarmDTO.getExecutorInfo(),
desc,
alarmDTO.getArgsStr(),
DateUtils.toNowFormat(DateUtils.NORM_DATETIME_PATTERN))
.title("{}环境 JOB任务失败", EnvironmentUtils.getActiveProfile());
}
@Override

View File

@ -6,8 +6,6 @@ import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
import com.aizuda.snailjob.server.job.task.enums.BlockStrategyEnum;
import com.aizuda.snailjob.server.job.task.support.generator.batch.JobTaskBatchGenerator;
import com.aizuda.snailjob.server.job.task.support.generator.batch.JobTaskBatchGeneratorContext;
import com.aizuda.snailjob.server.job.task.support.generator.batch.JobTaskBatchGenerator;
import com.aizuda.snailjob.server.job.task.support.generator.batch.JobTaskBatchGeneratorContext;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Component;

View File

@ -182,6 +182,10 @@ public class JobExecutorActor extends AbstractActor {
Assert.isTrue(1 == jobTaskBatchMapper.updateById(jobTaskBatch),
() -> new SnailJobServerException("更新任务失败"));
if (JobTaskBatchStatusEnum.NOT_SUCCESS.contains(taskStatus)) {
SpringContext.getContext().publishEvent(new JobTaskFailAlarmEvent(taskExecute.getTaskBatchId()));
}
}
private void doHandlerResidentTask(Job job, TaskExecuteDTO taskExecuteDTO) {

View File

@ -4,6 +4,7 @@ import akka.actor.AbstractActor;
import akka.actor.ActorRef;
import com.aizuda.snailjob.client.model.ExecuteResult;
import com.aizuda.snailjob.client.model.request.DispatchJobRequest;
import com.aizuda.snailjob.common.core.context.SpringContext;
import com.aizuda.snailjob.common.core.enums.JobTaskStatusEnum;
import com.aizuda.snailjob.common.core.enums.StatusEnum;
import com.aizuda.snailjob.common.log.SnailJobLog;
@ -19,6 +20,7 @@ import com.aizuda.snailjob.server.common.dto.JobLogMetaDTO;
import com.aizuda.snailjob.server.job.task.dto.RealJobExecutorDTO;
import com.aizuda.snailjob.server.job.task.support.ClientCallbackHandler;
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
import com.aizuda.snailjob.server.job.task.support.alarm.event.JobTaskFailAlarmEvent;
import com.aizuda.snailjob.server.job.task.support.callback.ClientCallbackContext;
import com.aizuda.snailjob.server.job.task.support.callback.ClientCallbackFactory;
import com.github.rholder.retry.Attempt;
@ -92,15 +94,15 @@ public class RequestClientActor extends AbstractActor {
}
} catch (Exception e) {
Throwable throwable = e;
Throwable throwable;
if (e.getClass().isAssignableFrom(RetryException.class)) {
RetryException re = (RetryException) e;
throwable = re.getLastFailedAttempt().getExceptionCause();
}
if (e.getClass().isAssignableFrom(UndeclaredThrowableException.class)) {
} else if (e.getClass().isAssignableFrom(UndeclaredThrowableException.class)) {
UndeclaredThrowableException re = (UndeclaredThrowableException) e;
throwable = re.getUndeclaredThrowable();
} else {
throwable = e;
}
JobLogMetaDTO jobLogMetaDTO = JobTaskConverter.INSTANCE.toJobLogDTO(realJobExecutorDTO);
@ -114,7 +116,7 @@ public class RequestClientActor extends AbstractActor {
}
taskExecuteFailure(realJobExecutorDTO, throwable.getMessage());
SpringContext.getContext().publishEvent(new JobTaskFailAlarmEvent(dispatchJobRequest.getTaskBatchId()));
}
}

View File

@ -1,5 +1,6 @@
package com.aizuda.snailjob.server.job.task.support.prepare.job;
import com.aizuda.snailjob.common.core.context.SpringContext;
import com.aizuda.snailjob.common.core.enums.JobOperationReasonEnum;
import com.aizuda.snailjob.common.core.enums.JobTaskBatchStatusEnum;
import com.aizuda.snailjob.common.core.util.JsonUtil;
@ -9,6 +10,7 @@ import com.aizuda.snailjob.server.job.task.support.BlockStrategy;
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
import com.aizuda.snailjob.server.job.task.dto.JobTaskPrepareDTO;
import com.aizuda.snailjob.server.job.task.support.JobTaskStopHandler;
import com.aizuda.snailjob.server.job.task.support.alarm.event.JobTaskFailAlarmEvent;
import com.aizuda.snailjob.server.job.task.support.block.job.BlockStrategyContext;
import com.aizuda.snailjob.server.job.task.support.block.job.JobBlockStrategyFactory;
import com.aizuda.snailjob.server.job.task.support.stop.JobTaskStopFactory;
@ -62,6 +64,7 @@ public class RunningJobPrepareHandler extends AbstractJobPrePareHandler {
stopJobContext.setJobOperationReason(JobOperationReasonEnum.TASK_EXECUTION_TIMEOUT.getReason());
stopJobContext.setNeedUpdateTaskStatus(Boolean.TRUE);
instanceInterrupt.stop(stopJobContext);
SpringContext.getContext().publishEvent(new JobTaskFailAlarmEvent(prepare.getTaskBatchId()));
}
}

View File

@ -1,12 +1,14 @@
package com.aizuda.snailjob.server.job.task.support.stop;
import akka.actor.ActorRef;
import com.aizuda.snailjob.common.core.context.SpringContext;
import com.aizuda.snailjob.common.core.enums.JobTaskBatchStatusEnum;
import com.aizuda.snailjob.common.core.enums.JobTaskStatusEnum;
import com.aizuda.snailjob.server.common.akka.ActorGenerator;
import com.aizuda.snailjob.server.job.task.support.JobTaskConverter;
import com.aizuda.snailjob.server.job.task.dto.JobExecutorResultDTO;
import com.aizuda.snailjob.server.job.task.support.JobTaskStopHandler;
import com.aizuda.snailjob.server.job.task.support.alarm.event.JobTaskFailAlarmEvent;
import com.aizuda.snailjob.template.datasource.persistence.mapper.JobTaskBatchMapper;
import com.aizuda.snailjob.template.datasource.persistence.mapper.JobTaskMapper;
import com.aizuda.snailjob.template.datasource.persistence.po.JobTask;